From cdde031ac2c8124721655532ee6f4149e20e9c61 Mon Sep 17 00:00:00 2001 From: Dylan Baker Date: Mon, 12 Apr 2021 11:26:45 -0700 Subject: [PATCH] classic/i965: Remove driver Reviewed-by: Emma Anholt Acked-by: Jason Ekstrand Acked-by: Kenneth Graunke Reviewed-by: Adam Jackson Part-of: --- .gitlab-ci.yml | 4 - .gitlab-ci/meson/build.sh | 1 - include/pci_ids/i965_pci_ids.h | 260 - meson.build | 28 +- meson_options.txt | 16 +- src/intel/dev/intel_device_info.c | 4 +- src/intel/dev/intel_device_info_test.c | 7 +- src/loader/meson.build | 9 - src/loader/pci_id_driver_map.h | 7 - src/mesa/drivers/dri/i965/brw_batch.c | 1332 ----- src/mesa/drivers/dri/i965/brw_batch.h | 166 - src/mesa/drivers/dri/i965/brw_binding_tables.c | 307 - src/mesa/drivers/dri/i965/brw_blit.c | 790 --- src/mesa/drivers/dri/i965/brw_blit.h | 65 - src/mesa/drivers/dri/i965/brw_blorp.c | 1678 ------ src/mesa/drivers/dri/i965/brw_blorp.h | 137 - src/mesa/drivers/dri/i965/brw_buffer_objects.c | 710 --- src/mesa/drivers/dri/i965/brw_buffer_objects.h | 141 - src/mesa/drivers/dri/i965/brw_buffers.c | 74 - src/mesa/drivers/dri/i965/brw_buffers.h | 35 - src/mesa/drivers/dri/i965/brw_bufmgr.c | 1967 ------- src/mesa/drivers/dri/i965/brw_bufmgr.h | 404 -- src/mesa/drivers/dri/i965/brw_clear.c | 302 - src/mesa/drivers/dri/i965/brw_clip.c | 210 - src/mesa/drivers/dri/i965/brw_compute.c | 151 - src/mesa/drivers/dri/i965/brw_conditional_render.c | 193 - src/mesa/drivers/dri/i965/brw_context.c | 1975 ------- src/mesa/drivers/dri/i965/brw_context.h | 1637 ------ src/mesa/drivers/dri/i965/brw_copy_image.c | 139 - src/mesa/drivers/dri/i965/brw_cs.c | 220 - src/mesa/drivers/dri/i965/brw_cs.h | 46 - src/mesa/drivers/dri/i965/brw_curbe.c | 356 -- src/mesa/drivers/dri/i965/brw_defines.h | 1668 ------ src/mesa/drivers/dri/i965/brw_disk_cache.c | 417 -- src/mesa/drivers/dri/i965/brw_draw.c | 1361 ----- src/mesa/drivers/dri/i965/brw_draw.h | 87 - src/mesa/drivers/dri/i965/brw_draw_upload.c | 801 --- src/mesa/drivers/dri/i965/brw_extensions.c | 404 -- src/mesa/drivers/dri/i965/brw_fbo.c | 1139 ---- src/mesa/drivers/dri/i965/brw_fbo.h | 255 - src/mesa/drivers/dri/i965/brw_ff_gs.c | 178 - src/mesa/drivers/dri/i965/brw_ff_gs.h | 42 - src/mesa/drivers/dri/i965/brw_formatquery.c | 119 - src/mesa/drivers/dri/i965/brw_generate_mipmap.c | 144 - src/mesa/drivers/dri/i965/brw_gs.c | 256 - src/mesa/drivers/dri/i965/brw_gs.h | 52 - src/mesa/drivers/dri/i965/brw_gs_surface_state.c | 117 - src/mesa/drivers/dri/i965/brw_image.h | 122 - src/mesa/drivers/dri/i965/brw_link.cpp | 401 -- src/mesa/drivers/dri/i965/brw_meta_util.c | 422 -- src/mesa/drivers/dri/i965/brw_meta_util.h | 59 - src/mesa/drivers/dri/i965/brw_mipmap_tree.c | 3308 ----------- src/mesa/drivers/dri/i965/brw_mipmap_tree.h | 741 --- src/mesa/drivers/dri/i965/brw_misc_state.c | 728 --- src/mesa/drivers/dri/i965/brw_multisample_state.h | 111 - src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp | 450 -- src/mesa/drivers/dri/i965/brw_object_purgeable.c | 187 - src/mesa/drivers/dri/i965/brw_performance_query.c | 533 -- src/mesa/drivers/dri/i965/brw_pipe_control.c | 454 -- src/mesa/drivers/dri/i965/brw_pipe_control.h | 95 - src/mesa/drivers/dri/i965/brw_pixel.c | 133 - src/mesa/drivers/dri/i965/brw_pixel.h | 61 - src/mesa/drivers/dri/i965/brw_pixel_bitmap.c | 363 -- src/mesa/drivers/dri/i965/brw_pixel_copy.c | 212 - src/mesa/drivers/dri/i965/brw_pixel_draw.c | 178 - src/mesa/drivers/dri/i965/brw_pixel_read.c | 300 - src/mesa/drivers/dri/i965/brw_primitive_restart.c | 462 -- src/mesa/drivers/dri/i965/brw_program.c | 888 --- src/mesa/drivers/dri/i965/brw_program.h | 145 - src/mesa/drivers/dri/i965/brw_program_binary.c | 353 -- src/mesa/drivers/dri/i965/brw_program_cache.c | 523 -- src/mesa/drivers/dri/i965/brw_queryobj.c | 621 -- src/mesa/drivers/dri/i965/brw_reset.c | 86 - src/mesa/drivers/dri/i965/brw_screen.c | 2886 ---------- src/mesa/drivers/dri/i965/brw_screen.h | 173 - src/mesa/drivers/dri/i965/brw_sf.c | 171 - src/mesa/drivers/dri/i965/brw_state.c | 119 - src/mesa/drivers/dri/i965/brw_state.h | 370 -- src/mesa/drivers/dri/i965/brw_state_upload.c | 789 --- src/mesa/drivers/dri/i965/brw_structs.h | 68 - src/mesa/drivers/dri/i965/brw_surface_formats.c | 558 -- src/mesa/drivers/dri/i965/brw_sync.c | 642 --- src/mesa/drivers/dri/i965/brw_tcs.c | 295 - src/mesa/drivers/dri/i965/brw_tcs_surface_state.c | 116 - src/mesa/drivers/dri/i965/brw_tes.c | 233 - src/mesa/drivers/dri/i965/brw_tes_surface_state.c | 116 - src/mesa/drivers/dri/i965/brw_tex.c | 415 -- src/mesa/drivers/dri/i965/brw_tex.h | 58 - src/mesa/drivers/dri/i965/brw_tex_copy.c | 72 - src/mesa/drivers/dri/i965/brw_tex_image.c | 992 ---- src/mesa/drivers/dri/i965/brw_tex_obj.h | 101 - src/mesa/drivers/dri/i965/brw_tex_validate.c | 223 - src/mesa/drivers/dri/i965/brw_upload.c | 134 - src/mesa/drivers/dri/i965/brw_urb.c | 268 - src/mesa/drivers/dri/i965/brw_util.c | 125 - src/mesa/drivers/dri/i965/brw_util.h | 74 - src/mesa/drivers/dri/i965/brw_vs.c | 369 -- src/mesa/drivers/dri/i965/brw_vs.h | 57 - src/mesa/drivers/dri/i965/brw_vs_surface_state.c | 119 - src/mesa/drivers/dri/i965/brw_wm.c | 639 -- src/mesa/drivers/dri/i965/brw_wm.h | 59 - src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 1692 ------ src/mesa/drivers/dri/i965/genX_blorp_exec.c | 412 -- src/mesa/drivers/dri/i965/genX_boilerplate.h | 160 - src/mesa/drivers/dri/i965/genX_pipe_control.c | 514 -- src/mesa/drivers/dri/i965/genX_state_upload.c | 6088 -------------------- src/mesa/drivers/dri/i965/gfx4_blorp_exec.h | 196 - src/mesa/drivers/dri/i965/gfx6_clip_state.c | 89 - src/mesa/drivers/dri/i965/gfx6_constant_state.c | 361 -- src/mesa/drivers/dri/i965/gfx6_multisample_state.c | 61 - src/mesa/drivers/dri/i965/gfx6_queryobj.c | 560 -- src/mesa/drivers/dri/i965/gfx6_sampler_state.c | 57 - src/mesa/drivers/dri/i965/gfx6_sol.c | 522 -- src/mesa/drivers/dri/i965/gfx6_urb.c | 153 - src/mesa/drivers/dri/i965/gfx7_l3_state.c | 312 - src/mesa/drivers/dri/i965/gfx7_sol_state.c | 149 - src/mesa/drivers/dri/i965/gfx7_urb.c | 280 - src/mesa/drivers/dri/i965/gfx8_depth_state.c | 197 - src/mesa/drivers/dri/i965/gfx8_multisample_state.c | 55 - src/mesa/drivers/dri/i965/hsw_queryobj.c | 486 -- src/mesa/drivers/dri/i965/hsw_sol.c | 264 - src/mesa/drivers/dri/i965/libdrm_macros.h | 59 - src/mesa/drivers/dri/i965/meson.build | 166 - src/mesa/drivers/dri/meson.build | 5 - 124 files changed, 8 insertions(+), 55418 deletions(-) delete mode 100644 include/pci_ids/i965_pci_ids.h delete mode 100644 src/mesa/drivers/dri/i965/brw_batch.c delete mode 100644 src/mesa/drivers/dri/i965/brw_batch.h delete mode 100644 src/mesa/drivers/dri/i965/brw_binding_tables.c delete mode 100644 src/mesa/drivers/dri/i965/brw_blit.c delete mode 100644 src/mesa/drivers/dri/i965/brw_blit.h delete mode 100644 src/mesa/drivers/dri/i965/brw_blorp.c delete mode 100644 src/mesa/drivers/dri/i965/brw_blorp.h delete mode 100644 src/mesa/drivers/dri/i965/brw_buffer_objects.c delete mode 100644 src/mesa/drivers/dri/i965/brw_buffer_objects.h delete mode 100644 src/mesa/drivers/dri/i965/brw_buffers.c delete mode 100644 src/mesa/drivers/dri/i965/brw_buffers.h delete mode 100644 src/mesa/drivers/dri/i965/brw_bufmgr.c delete mode 100644 src/mesa/drivers/dri/i965/brw_bufmgr.h delete mode 100644 src/mesa/drivers/dri/i965/brw_clear.c delete mode 100644 src/mesa/drivers/dri/i965/brw_clip.c delete mode 100644 src/mesa/drivers/dri/i965/brw_compute.c delete mode 100644 src/mesa/drivers/dri/i965/brw_conditional_render.c delete mode 100644 src/mesa/drivers/dri/i965/brw_context.c delete mode 100644 src/mesa/drivers/dri/i965/brw_context.h delete mode 100644 src/mesa/drivers/dri/i965/brw_copy_image.c delete mode 100644 src/mesa/drivers/dri/i965/brw_cs.c delete mode 100644 src/mesa/drivers/dri/i965/brw_cs.h delete mode 100644 src/mesa/drivers/dri/i965/brw_curbe.c delete mode 100644 src/mesa/drivers/dri/i965/brw_defines.h delete mode 100644 src/mesa/drivers/dri/i965/brw_disk_cache.c delete mode 100644 src/mesa/drivers/dri/i965/brw_draw.c delete mode 100644 src/mesa/drivers/dri/i965/brw_draw.h delete mode 100644 src/mesa/drivers/dri/i965/brw_draw_upload.c delete mode 100644 src/mesa/drivers/dri/i965/brw_extensions.c delete mode 100644 src/mesa/drivers/dri/i965/brw_fbo.c delete mode 100644 src/mesa/drivers/dri/i965/brw_fbo.h delete mode 100644 src/mesa/drivers/dri/i965/brw_ff_gs.c delete mode 100644 src/mesa/drivers/dri/i965/brw_ff_gs.h delete mode 100644 src/mesa/drivers/dri/i965/brw_formatquery.c delete mode 100644 src/mesa/drivers/dri/i965/brw_generate_mipmap.c delete mode 100644 src/mesa/drivers/dri/i965/brw_gs.c delete mode 100644 src/mesa/drivers/dri/i965/brw_gs.h delete mode 100644 src/mesa/drivers/dri/i965/brw_gs_surface_state.c delete mode 100644 src/mesa/drivers/dri/i965/brw_image.h delete mode 100644 src/mesa/drivers/dri/i965/brw_link.cpp delete mode 100644 src/mesa/drivers/dri/i965/brw_meta_util.c delete mode 100644 src/mesa/drivers/dri/i965/brw_meta_util.h delete mode 100644 src/mesa/drivers/dri/i965/brw_mipmap_tree.c delete mode 100644 src/mesa/drivers/dri/i965/brw_mipmap_tree.h delete mode 100644 src/mesa/drivers/dri/i965/brw_misc_state.c delete mode 100644 src/mesa/drivers/dri/i965/brw_multisample_state.h delete mode 100644 src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp delete mode 100644 src/mesa/drivers/dri/i965/brw_object_purgeable.c delete mode 100644 src/mesa/drivers/dri/i965/brw_performance_query.c delete mode 100644 src/mesa/drivers/dri/i965/brw_pipe_control.c delete mode 100644 src/mesa/drivers/dri/i965/brw_pipe_control.h delete mode 100644 src/mesa/drivers/dri/i965/brw_pixel.c delete mode 100644 src/mesa/drivers/dri/i965/brw_pixel.h delete mode 100644 src/mesa/drivers/dri/i965/brw_pixel_bitmap.c delete mode 100644 src/mesa/drivers/dri/i965/brw_pixel_copy.c delete mode 100644 src/mesa/drivers/dri/i965/brw_pixel_draw.c delete mode 100644 src/mesa/drivers/dri/i965/brw_pixel_read.c delete mode 100644 src/mesa/drivers/dri/i965/brw_primitive_restart.c delete mode 100644 src/mesa/drivers/dri/i965/brw_program.c delete mode 100644 src/mesa/drivers/dri/i965/brw_program.h delete mode 100644 src/mesa/drivers/dri/i965/brw_program_binary.c delete mode 100644 src/mesa/drivers/dri/i965/brw_program_cache.c delete mode 100644 src/mesa/drivers/dri/i965/brw_queryobj.c delete mode 100644 src/mesa/drivers/dri/i965/brw_reset.c delete mode 100644 src/mesa/drivers/dri/i965/brw_screen.c delete mode 100644 src/mesa/drivers/dri/i965/brw_screen.h delete mode 100644 src/mesa/drivers/dri/i965/brw_sf.c delete mode 100644 src/mesa/drivers/dri/i965/brw_state.c delete mode 100644 src/mesa/drivers/dri/i965/brw_state.h delete mode 100644 src/mesa/drivers/dri/i965/brw_state_upload.c delete mode 100644 src/mesa/drivers/dri/i965/brw_structs.h delete mode 100644 src/mesa/drivers/dri/i965/brw_surface_formats.c delete mode 100644 src/mesa/drivers/dri/i965/brw_sync.c delete mode 100644 src/mesa/drivers/dri/i965/brw_tcs.c delete mode 100644 src/mesa/drivers/dri/i965/brw_tcs_surface_state.c delete mode 100644 src/mesa/drivers/dri/i965/brw_tes.c delete mode 100644 src/mesa/drivers/dri/i965/brw_tes_surface_state.c delete mode 100644 src/mesa/drivers/dri/i965/brw_tex.c delete mode 100644 src/mesa/drivers/dri/i965/brw_tex.h delete mode 100644 src/mesa/drivers/dri/i965/brw_tex_copy.c delete mode 100644 src/mesa/drivers/dri/i965/brw_tex_image.c delete mode 100644 src/mesa/drivers/dri/i965/brw_tex_obj.h delete mode 100644 src/mesa/drivers/dri/i965/brw_tex_validate.c delete mode 100644 src/mesa/drivers/dri/i965/brw_upload.c delete mode 100644 src/mesa/drivers/dri/i965/brw_urb.c delete mode 100644 src/mesa/drivers/dri/i965/brw_util.c delete mode 100644 src/mesa/drivers/dri/i965/brw_util.h delete mode 100644 src/mesa/drivers/dri/i965/brw_vs.c delete mode 100644 src/mesa/drivers/dri/i965/brw_vs.h delete mode 100644 src/mesa/drivers/dri/i965/brw_vs_surface_state.c delete mode 100644 src/mesa/drivers/dri/i965/brw_wm.c delete mode 100644 src/mesa/drivers/dri/i965/brw_wm.h delete mode 100644 src/mesa/drivers/dri/i965/brw_wm_surface_state.c delete mode 100644 src/mesa/drivers/dri/i965/genX_blorp_exec.c delete mode 100644 src/mesa/drivers/dri/i965/genX_boilerplate.h delete mode 100644 src/mesa/drivers/dri/i965/genX_pipe_control.c delete mode 100644 src/mesa/drivers/dri/i965/genX_state_upload.c delete mode 100644 src/mesa/drivers/dri/i965/gfx4_blorp_exec.h delete mode 100644 src/mesa/drivers/dri/i965/gfx6_clip_state.c delete mode 100644 src/mesa/drivers/dri/i965/gfx6_constant_state.c delete mode 100644 src/mesa/drivers/dri/i965/gfx6_multisample_state.c delete mode 100644 src/mesa/drivers/dri/i965/gfx6_queryobj.c delete mode 100644 src/mesa/drivers/dri/i965/gfx6_sampler_state.c delete mode 100644 src/mesa/drivers/dri/i965/gfx6_sol.c delete mode 100644 src/mesa/drivers/dri/i965/gfx6_urb.c delete mode 100644 src/mesa/drivers/dri/i965/gfx7_l3_state.c delete mode 100644 src/mesa/drivers/dri/i965/gfx7_sol_state.c delete mode 100644 src/mesa/drivers/dri/i965/gfx7_urb.c delete mode 100644 src/mesa/drivers/dri/i965/gfx8_depth_state.c delete mode 100644 src/mesa/drivers/dri/i965/gfx8_multisample_state.c delete mode 100644 src/mesa/drivers/dri/i965/hsw_queryobj.c delete mode 100644 src/mesa/drivers/dri/i965/hsw_sol.c delete mode 100644 src/mesa/drivers/dri/i965/libdrm_macros.h delete mode 100644 src/mesa/drivers/dri/i965/meson.build diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 8aff723..f8a4c57 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -825,7 +825,6 @@ fedora-release: -Wno-error=uninitialized CPP_ARGS: > -Wno-error=array-bounds - DRI_DRIVERS: "i965" DRI_LOADERS: > -D glx=dri -D gbm=enabled @@ -1039,7 +1038,6 @@ debian-clang: -Wno-error=unused-variable DRI_LOADERS: > -D glvnd=true - DRI_DRIVERS: "auto" GALLIUM_DRIVERS: "iris,nouveau,kmsro,r300,r600,freedreno,swr,swrast,svga,v3d,vc4,virgl,etnaviv,panfrost,lima,zink,radeonsi,tegra,d3d12,crocus" VULKAN_DRIVERS: intel,amd,freedreno,broadcom,virtio-experimental CC: clang @@ -1118,7 +1116,6 @@ debian-i386: CROSS: i386 VULKAN_DRIVERS: intel,amd,swrast,virtio-experimental GALLIUM_DRIVERS: "iris,nouveau,r300,r600,radeonsi,swrast,virgl,zink,crocus" - DRI_DRIVERS: "i965" EXTRA_OPTION: > -D vulkan-layers=device-select,overlay @@ -1155,7 +1152,6 @@ debian-mingw32-x86_64: -Wno-error=format -Wno-error=format-extra-args CPP_ARGS: $C_ARGS - DRI_DRIVERS: "" GALLIUM_DRIVERS: "swrast" EXTRA_OPTION: > -Dllvm=disabled diff --git a/.gitlab-ci/meson/build.sh b/.gitlab-ci/meson/build.sh index 56391e5..d052397 100755 --- a/.gitlab-ci/meson/build.sh +++ b/.gitlab-ci/meson/build.sh @@ -68,7 +68,6 @@ meson _build --native-file=native.file \ -D cpp_args="$(echo -n $CPP_ARGS)" \ -D libunwind=${UNWIND} \ ${DRI_LOADERS} \ - -D dri-drivers=${DRI_DRIVERS:-[]} \ ${GALLIUM_ST} \ -D gallium-drivers=${GALLIUM_DRIVERS:-[]} \ -D vulkan-drivers=${VULKAN_DRIVERS:-[]} \ diff --git a/include/pci_ids/i965_pci_ids.h b/include/pci_ids/i965_pci_ids.h deleted file mode 100644 index 6c0cb46..0000000 --- a/include/pci_ids/i965_pci_ids.h +++ /dev/null @@ -1,260 +0,0 @@ -#ifndef PREFER_CROCUS -CHIPSET(0x29A2, i965, "BW", "Intel(R) 965G") -CHIPSET(0x2992, i965, "BW", "Intel(R) 965Q") -CHIPSET(0x2982, i965, "BW", "Intel(R) 965G") -CHIPSET(0x2972, i965, "BW", "Intel(R) 946GZ") -CHIPSET(0x2A02, i965, "CL", "Intel(R) 965GM") -CHIPSET(0x2A12, i965, "CL", "Intel(R) 965GME/GLE") - -CHIPSET(0x2A42, g4x, "CTG", "Mobile Intel® GM45 Express Chipset") -CHIPSET(0x2E02, g4x, "ELK", "Intel(R) Integrated Graphics Device") -CHIPSET(0x2E12, g4x, "ELK", "Intel(R) Q45/Q43") -CHIPSET(0x2E22, g4x, "ELK", "Intel(R) G45/G43") -CHIPSET(0x2E32, g4x, "ELK", "Intel(R) G41") -CHIPSET(0x2E42, g4x, "ELK", "Intel(R) B43") -CHIPSET(0x2E92, g4x, "ELK", "Intel(R) B43") - -CHIPSET(0x0042, ilk, "ILK", "Intel(R) HD Graphics") -CHIPSET(0x0046, ilk, "ILK", "Intel(R) HD Graphics") - -CHIPSET(0x0102, snb_gt1, "SNB GT1", "Intel(R) HD Graphics 2000") -CHIPSET(0x0112, snb_gt2, "SNB GT2", "Intel(R) HD Graphics 3000") -CHIPSET(0x0122, snb_gt2, "SNB GT2", "Intel(R) HD Graphics 3000") -CHIPSET(0x0106, snb_gt1, "SNB GT1", "Intel(R) HD Graphics 2000") -CHIPSET(0x0116, snb_gt2, "SNB GT2", "Intel(R) HD Graphics 3000") -CHIPSET(0x0126, snb_gt2, "SNB GT2", "Intel(R) HD Graphics 3000") -CHIPSET(0x010A, snb_gt1, "SNB GT1", "Intel(R) HD Graphics 2000") - -CHIPSET(0x0152, ivb_gt1, "IVB GT1", "Intel(R) HD Graphics 2500") -CHIPSET(0x0162, ivb_gt2, "IVB GT2", "Intel(R) HD Graphics 4000") -CHIPSET(0x0156, ivb_gt1, "IVB GT1", "Intel(R) HD Graphics 2500") -CHIPSET(0x0166, ivb_gt2, "IVB GT2", "Intel(R) HD Graphics 4000") -CHIPSET(0x015a, ivb_gt1, "IVB GT1", "Intel(R) HD Graphics") -CHIPSET(0x016a, ivb_gt2, "IVB GT2", "Intel(R) HD Graphics P4000") - -CHIPSET(0x0402, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics") -CHIPSET(0x0412, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics 4600") -CHIPSET(0x0422, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics") -CHIPSET(0x0406, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics") -CHIPSET(0x0416, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics 4600") -CHIPSET(0x0426, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics") -CHIPSET(0x040A, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics") -CHIPSET(0x041A, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics P4600/P4700") -CHIPSET(0x042A, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics") -CHIPSET(0x040B, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics") -CHIPSET(0x041B, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics") -CHIPSET(0x042B, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics") -CHIPSET(0x040E, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics") -CHIPSET(0x041E, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics 4400") -CHIPSET(0x042E, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics") -CHIPSET(0x0C02, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics") -CHIPSET(0x0C12, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics") -CHIPSET(0x0C22, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics") -CHIPSET(0x0C06, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics") -CHIPSET(0x0C16, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics") -CHIPSET(0x0C26, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics") -CHIPSET(0x0C0A, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics") -CHIPSET(0x0C1A, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics") -CHIPSET(0x0C2A, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics") -CHIPSET(0x0C0B, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics") -CHIPSET(0x0C1B, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics") -CHIPSET(0x0C2B, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics") -CHIPSET(0x0C0E, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics") -CHIPSET(0x0C1E, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics") -CHIPSET(0x0C2E, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics") -CHIPSET(0x0A02, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics") -CHIPSET(0x0A12, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics") -CHIPSET(0x0A22, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics") -CHIPSET(0x0A06, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics") -CHIPSET(0x0A16, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics 4400") -CHIPSET(0x0A26, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics 5000") -CHIPSET(0x0A0A, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics") -CHIPSET(0x0A1A, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics") -CHIPSET(0x0A2A, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics") -CHIPSET(0x0A0B, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics") -CHIPSET(0x0A1B, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics") -CHIPSET(0x0A2B, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics") -CHIPSET(0x0A0E, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics") -CHIPSET(0x0A1E, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics 4200") -CHIPSET(0x0A2E, hsw_gt3, "HSW GT3", "Intel(R) Iris(R) Graphics 5100") -CHIPSET(0x0D02, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics") -CHIPSET(0x0D12, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics 4600") -CHIPSET(0x0D22, hsw_gt3, "HSW GT3", "Intel(R) Iris(R) Pro Graphics 5200") -CHIPSET(0x0D06, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics") -CHIPSET(0x0D16, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics") -CHIPSET(0x0D26, hsw_gt3, "HSW GT3", "Intel(R) Iris(R) Pro Graphics P5200") -CHIPSET(0x0D0A, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics") -CHIPSET(0x0D1A, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics") -CHIPSET(0x0D2A, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics") -CHIPSET(0x0D0B, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics") -CHIPSET(0x0D1B, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics") -CHIPSET(0x0D2B, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics") -CHIPSET(0x0D0E, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics") -CHIPSET(0x0D1E, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics") -CHIPSET(0x0D2E, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics") - -CHIPSET(0x0F31, byt, "BYT", "Intel(R) HD Graphics") -CHIPSET(0x0F32, byt, "BYT", "Intel(R) HD Graphics") -CHIPSET(0x0F33, byt, "BYT", "Intel(R) HD Graphics") -CHIPSET(0x0157, byt, "BYT", "Intel(R) HD Graphics") -CHIPSET(0x0155, byt, "BYT", "Intel(R) HD Graphics") - -CHIPSET(0x22B0, chv, "CHV", "Intel(R) HD Graphics") -CHIPSET(0x22B1, chv, "BSW", "Intel(R) HD Graphics XXX") /* Overridden in brw_get_renderer_string */ -CHIPSET(0x22B2, chv, "CHV", "Intel(R) HD Graphics") -CHIPSET(0x22B3, chv, "CHV", "Intel(R) HD Graphics") -#endif - -#ifndef PREFER_IRIS -CHIPSET(0x1602, bdw_gt1, "BDW GT1", "Intel(R) HD Graphics") -CHIPSET(0x1606, bdw_gt1, "BDW GT1", "Intel(R) HD Graphics") -CHIPSET(0x160A, bdw_gt1, "BDW GT1", "Intel(R) HD Graphics") -CHIPSET(0x160B, bdw_gt1, "BDW GT1", "Intel(R) HD Graphics") -CHIPSET(0x160D, bdw_gt1, "BDW GT1", "Intel(R) HD Graphics") -CHIPSET(0x160E, bdw_gt1, "BDW GT1", "Intel(R) HD Graphics") -CHIPSET(0x1612, bdw_gt2, "BDW GT2", "Intel(R) HD Graphics 5600") -CHIPSET(0x1616, bdw_gt2, "BDW GT2", "Intel(R) HD Graphics 5500") -CHIPSET(0x161A, bdw_gt2, "BDW GT2", "Intel(R) HD Graphics P5700") -CHIPSET(0x161B, bdw_gt2, "BDW GT2", "Intel(R) HD Graphics") -CHIPSET(0x161D, bdw_gt2, "BDW GT2", "Intel(R) HD Graphics") -CHIPSET(0x161E, bdw_gt2, "BDW GT2", "Intel(R) HD Graphics 5300") -CHIPSET(0x1622, bdw_gt3, "BDW GT3", "Intel(R) Iris(R) Pro Graphics 6200") -CHIPSET(0x1626, bdw_gt3, "BDW GT3", "Intel(R) HD Graphics 6000") -CHIPSET(0x162A, bdw_gt3, "BDW GT3", "Intel(R) Iris(R) Pro Graphics P6300") -CHIPSET(0x162B, bdw_gt3, "BDW GT3", "Intel(R) Iris(R) Graphics 6100") -CHIPSET(0x162D, bdw_gt3, "BDW GT3", "Intel(R) HD Graphics") -CHIPSET(0x162E, bdw_gt3, "BDW GT3", "Intel(R) HD Graphics") - -CHIPSET(0x1902, skl_gt1, "SKL GT1", "Intel(R) HD Graphics 510") -CHIPSET(0x1906, skl_gt1, "SKL GT1", "Intel(R) HD Graphics 510") -CHIPSET(0x190A, skl_gt1, "SKL GT1", "Intel(R) HD Graphics") -CHIPSET(0x190B, skl_gt1, "SKL GT1", "Intel(R) HD Graphics 510") -CHIPSET(0x190E, skl_gt1, "SKL GT1", "Intel(R) HD Graphics") -CHIPSET(0x1912, skl_gt2, "SKL GT2", "Intel(R) HD Graphics 530") -CHIPSET(0x1913, skl_gt2, "SKL GT2F", "Intel(R) HD Graphics") -CHIPSET(0x1915, skl_gt2, "SKL GT2F", "Intel(R) HD Graphics") -CHIPSET(0x1916, skl_gt2, "SKL GT2", "Intel(R) HD Graphics 520") -CHIPSET(0x1917, skl_gt2, "SKL GT2F", "Intel(R) HD Graphics") -CHIPSET(0x191A, skl_gt2, "SKL GT2", "Intel(R) HD Graphics") -CHIPSET(0x191B, skl_gt2, "SKL GT2", "Intel(R) HD Graphics 530") -CHIPSET(0x191D, skl_gt2, "SKL GT2", "Intel(R) HD Graphics P530") -CHIPSET(0x191E, skl_gt2, "SKL GT2", "Intel(R) HD Graphics 515") -CHIPSET(0x1921, skl_gt2, "SKL GT2", "Intel(R) HD Graphics 520") -CHIPSET(0x1923, skl_gt3, "SKL GT3", "Intel(R) HD Graphics 535") -CHIPSET(0x1926, skl_gt3, "SKL GT3", "Intel(R) Iris(R) Graphics 540") -CHIPSET(0x1927, skl_gt3, "SKL GT3", "Intel(R) Iris(R) Graphics 550") -CHIPSET(0x192A, skl_gt4, "SKL GT4", "Intel(R) HD Graphics") -CHIPSET(0x192B, skl_gt3, "SKL GT3", "Intel(R) Iris(R) Graphics 555") -CHIPSET(0x192D, skl_gt3, "SKL GT3", "Intel(R) Iris(R) Graphics P555") -CHIPSET(0x1932, skl_gt4, "SKL GT4", "Intel(R) Iris(R) Pro Graphics 580") -CHIPSET(0x193A, skl_gt4, "SKL GT4", "Intel(R) Iris(R) Pro Graphics P580") -CHIPSET(0x193B, skl_gt4, "SKL GT4", "Intel(R) Iris(R) Pro Graphics 580") -CHIPSET(0x193D, skl_gt4, "SKL GT4", "Intel(R) Iris(R) Pro Graphics P580") - -CHIPSET(0x0A84, bxt, "BXT 3", "Intel(R) HD Graphics") -CHIPSET(0x1A84, bxt, "BXT 3", "Intel(R) HD Graphics") -CHIPSET(0x1A85, bxt_2x6, "BXT 2", "Intel(R) HD Graphics") -CHIPSET(0x5A84, bxt, "APL 3", "Intel(R) HD Graphics 505") -CHIPSET(0x5A85, bxt_2x6, "APL 2", "Intel(R) HD Graphics 500") - -CHIPSET(0x3184, glk, "GLK 3", "Intel(R) UHD Graphics 605") -CHIPSET(0x3185, glk_2x6, "GLK 2", "Intel(R) UHD Graphics 600") - -CHIPSET(0x5902, kbl_gt1, "KBL GT1", "Intel(R) HD Graphics 610") -CHIPSET(0x5906, kbl_gt1, "KBL GT1", "Intel(R) HD Graphics 610") -CHIPSET(0x590A, kbl_gt1, "KBL GT1", "Intel(R) HD Graphics") -CHIPSET(0x5908, kbl_gt1, "KBL GT1", "Intel(R) HD Graphics") -CHIPSET(0x590B, kbl_gt1, "KBL GT1", "Intel(R) HD Graphics 610") -CHIPSET(0x590E, kbl_gt1, "KBL GT1", "Intel(R) HD Graphics") -CHIPSET(0x5913, kbl_gt1_5, "KBL GT1.5", "Intel(R) HD Graphics") -CHIPSET(0x5915, kbl_gt1_5, "KBL GT1.5", "Intel(R) HD Graphics") -CHIPSET(0x5917, kbl_gt2, "KBL GT2", "Intel(R) UHD Graphics 620") -CHIPSET(0x5912, kbl_gt2, "KBL GT2", "Intel(R) HD Graphics 630") -CHIPSET(0x5916, kbl_gt2, "KBL GT2", "Intel(R) HD Graphics 620") -CHIPSET(0x591A, kbl_gt2, "KBL GT2", "Intel(R) HD Graphics P630") -CHIPSET(0x591B, kbl_gt2, "KBL GT2", "Intel(R) HD Graphics 630") -CHIPSET(0x591D, kbl_gt2, "KBL GT2", "Intel(R) HD Graphics P630") -CHIPSET(0x591E, kbl_gt2, "KBL GT2", "Intel(R) HD Graphics 615") -CHIPSET(0x5921, kbl_gt2, "KBL GT2F", "Intel(R) HD Graphics 620") -CHIPSET(0x5923, kbl_gt3, "KBL GT3", "Intel(R) HD Graphics 635") -CHIPSET(0x5926, kbl_gt3, "KBL GT3", "Intel(R) Iris(R) Plus Graphics 640 (Kaby Lake GT3e)") -CHIPSET(0x5927, kbl_gt3, "KBL GT3", "Intel(R) Iris(R) Plus Graphics 650 (Kaby Lake GT3e)") -CHIPSET(0x593B, kbl_gt4, "KBL GT4", "Intel(R) HD Graphics") - -CHIPSET(0x591C, kbl_gt2, "AML-KBL", "Intel(R) UHD Graphics 615") -CHIPSET(0x87C0, kbl_gt2, "AML-KBL", "Intel(R) UHD Graphics 617") - -CHIPSET(0x87CA, cfl_gt2, "AML-CFL", "Intel(R) UHD Graphics") - -CHIPSET(0x3E90, cfl_gt1, "CFL GT1", "Intel(R) UHD Graphics 610") -CHIPSET(0x3E93, cfl_gt1, "CFL GT1", "Intel(R) UHD Graphics 610") -CHIPSET(0x3E99, cfl_gt1, "CFL GT1", "Intel(R) UHD Graphics 610") -CHIPSET(0x3E9C, cfl_gt1, "CFL GT1", "Intel(R) UHD Graphics 610") -CHIPSET(0x3E91, cfl_gt2, "CFL GT2", "Intel(R) UHD Graphics 630") -CHIPSET(0x3E92, cfl_gt2, "CFL GT2", "Intel(R) UHD Graphics 630") -CHIPSET(0x3E96, cfl_gt2, "CFL GT2", "Intel(R) UHD Graphics P630") -CHIPSET(0x3E98, cfl_gt2, "CFL GT2", "Intel(R) UHD Graphics 630") -CHIPSET(0x3E9A, cfl_gt2, "CFL GT2", "Intel(R) UHD Graphics P630") -CHIPSET(0x3E9B, cfl_gt2, "CFL GT2", "Intel(R) UHD Graphics 630") -CHIPSET(0x3E94, cfl_gt2, "CFL GT2", "Intel(R) UHD Graphics P630") -CHIPSET(0x3EA9, cfl_gt2, "CFL GT2", "Intel(R) UHD Graphics 620") -CHIPSET(0x3EA5, cfl_gt3, "CFL GT3", "Intel(R) Iris(R) Plus Graphics 655") -CHIPSET(0x3EA6, cfl_gt3, "CFL GT3", "Intel(R) Iris(R) Plus Graphics 645") -CHIPSET(0x3EA7, cfl_gt3, "CFL GT3", "Intel(R) HD Graphics") -CHIPSET(0x3EA8, cfl_gt3, "CFL GT3", "Intel(R) Iris(R) Plus Graphics 655") - -CHIPSET(0x3EA1, cfl_gt1, "WHL GT1", "Intel(R) UHD Graphics 610") -CHIPSET(0x3EA4, cfl_gt1, "WHL GT1", "Intel(R) UHD Graphics") -CHIPSET(0x3EA0, cfl_gt2, "WHL GT2", "Intel(R) UHD Graphics 620") -CHIPSET(0x3EA3, cfl_gt2, "WHL GT2", "Intel(R) UHD Graphics") -CHIPSET(0x3EA2, cfl_gt3, "WHL GT3", "Intel(R) UHD Graphics") - -CHIPSET(0x9B21, cfl_gt1, "CML GT1", "Intel(R) UHD Graphics") -CHIPSET(0x9BA0, cfl_gt1, "CML GT1", "Intel(R) UHD Graphics") -CHIPSET(0x9BA2, cfl_gt1, "CML GT1", "Intel(R) UHD Graphics") -CHIPSET(0x9BA4, cfl_gt1, "CML GT1", "Intel(R) UHD Graphics") -CHIPSET(0x9BA5, cfl_gt1, "CML GT1", "Intel(R) UHD Graphics 610") -CHIPSET(0x9BA8, cfl_gt1, "CML GT1", "Intel(R) UHD Graphics 610") -CHIPSET(0x9BAA, cfl_gt1, "CML GT1", "Intel(R) UHD Graphics") -CHIPSET(0x9BAB, cfl_gt1, "CML GT1", "Intel(R) UHD Graphics") -CHIPSET(0x9BAC, cfl_gt1, "CML GT1", "Intel(R) UHD Graphics") -CHIPSET(0x9B41, cfl_gt2, "CML GT2", "Intel(R) UHD Graphics") -CHIPSET(0x9BC0, cfl_gt2, "CML GT2", "Intel(R) UHD Graphics") -CHIPSET(0x9BC2, cfl_gt2, "CML GT2", "Intel(R) UHD Graphics") -CHIPSET(0x9BC4, cfl_gt2, "CML GT2", "Intel(R) UHD Graphics") -CHIPSET(0x9BC5, cfl_gt2, "CML GT2", "Intel(R) UHD Graphics 630") -CHIPSET(0x9BC6, cfl_gt2, "CML GT2", "Intel(R) UHD Graphics P630") -CHIPSET(0x9BC8, cfl_gt2, "CML GT2", "Intel(R) UHD Graphics 630") -CHIPSET(0x9BCA, cfl_gt2, "CML GT2", "Intel(R) UHD Graphics") -CHIPSET(0x9BCB, cfl_gt2, "CML GT2", "Intel(R) UHD Graphics") -CHIPSET(0x9BCC, cfl_gt2, "CML GT2", "Intel(R) UHD Graphics") -CHIPSET(0x9BE6, cfl_gt2, "CML GT2", "Intel(R) UHD Graphics P630") -CHIPSET(0x9BF6, cfl_gt2, "CML GT2", "Intel(R) UHD Graphics P630") - -CHIPSET(0x8A50, icl_gt2, "ICL GT2", "Intel(R) HD Graphics") -CHIPSET(0x8A51, icl_gt2, "ICL GT2", "Intel(R) Iris(R) Plus Graphics") -CHIPSET(0x8A52, icl_gt2, "ICL GT2", "Intel(R) Iris(R) Plus Graphics") -CHIPSET(0x8A53, icl_gt2, "ICL GT2", "Intel(R) Iris(R) Plus Graphics") -CHIPSET(0x8A54, icl_gt1_5, "ICL GT1.5", "Intel(R) Iris(R) Plus Graphics") -CHIPSET(0x8A56, icl_gt1, "ICL GT1", "Intel(R) UHD Graphics") -CHIPSET(0x8A57, icl_gt1_5, "ICL GT1.5", "Intel(R) HD Graphics") -CHIPSET(0x8A58, icl_gt1, "ICL GT1", "Intel(R) UHD Graphics") -CHIPSET(0x8A59, icl_gt1_5, "ICL GT1.5", "Intel(R) HD Graphics") -CHIPSET(0x8A5A, icl_gt1_5, "ICL GT1.5", "Intel(R) Iris(R) Plus Graphics") -CHIPSET(0x8A5B, icl_gt1, "ICL GT1", "Intel(R) HD Graphics") -CHIPSET(0x8A5C, icl_gt1_5, "ICL GT1.5", "Intel(R) Iris(R) Plus Graphics") -CHIPSET(0x8A5D, icl_gt1, "ICL GT1", "Intel(R) HD Graphics") -CHIPSET(0x8A71, icl_gt0_5, "ICL GT0.5", "Intel(R) HD Graphics") - -CHIPSET(0x4500, ehl_4x8, "EHL", "Intel(R) UHD Graphics") -CHIPSET(0x4541, ehl_2x4, "EHL", "Intel(R) UHD Graphics") -CHIPSET(0x4551, ehl_4x4, "EHL", "Intel(R) UHD Graphics") -CHIPSET(0x4555, ehl_2x8, "EHL", "Intel(R) UHD Graphics") -CHIPSET(0x4557, ehl_4x5, "EHL", "Intel(R) UHD Graphics") -CHIPSET(0x4571, ehl_4x8, "EHL", "Intel(R) UHD Graphics") -CHIPSET(0x4E51, ehl_4x4, "JSL", "Intel(R) UHD Graphics") -CHIPSET(0x4E55, ehl_2x8, "JSL", "Intel(R) UHD Graphics") -CHIPSET(0x4E57, ehl_4x5, "JSL", "Intel(R) UHD Graphics") -CHIPSET(0x4E61, ehl_4x6, "JSL", "Intel(R) UHD Graphics") -CHIPSET(0x4E71, ehl_4x8, "JSL", "Intel(R) UHD Graphics") -#endif diff --git a/meson.build b/meson.build index 03830e4..8b8ffc9 100644 --- a/meson.build +++ b/meson.build @@ -174,28 +174,10 @@ with_shared_glapi = with_shared_glapi and with_any_opengl system_has_kms_drm = ['openbsd', 'netbsd', 'freebsd', 'gnu/kfreebsd', 'dragonfly', 'linux', 'sunos'].contains(host_machine.system()) dri_drivers = get_option('dri-drivers') -if dri_drivers.contains('auto') - if system_has_kms_drm - # TODO: PPC, Sparc - if ['x86', 'x86_64'].contains(host_machine.cpu_family()) - dri_drivers = ['i965'] - elif ['arm', 'aarch64', 'mips', 'mips64'].contains(host_machine.cpu_family()) - dri_drivers = [] - else - error('Unknown architecture @0@. Please pass -Ddri-drivers to set driver options. Patches gladly accepted to fix this.'.format( - host_machine.cpu_family())) - endif - elif ['darwin', 'windows', 'cygwin', 'haiku'].contains(host_machine.system()) - # only swrast would make sense here, but gallium swrast is a much better default - dri_drivers = [] - else - error('Unknown OS @0@. Please pass -Ddri-drivers to set driver options. Patches gladly accepted to fix this.'.format( - host_machine.system())) - endif +if dri_drivers.length() != 0 + error('Mesa\'s main branch no longer has any "classic" drivers, use the "amber" branch instead.') endif -with_dri_i965 = dri_drivers.contains('i965') - with_dri = dri_drivers.length() != 0 gallium_drivers = get_option('gallium-drivers') @@ -205,7 +187,7 @@ if gallium_drivers.contains('auto') if ['x86', 'x86_64'].contains(host_machine.cpu_family()) gallium_drivers = [ 'r300', 'r600', 'radeonsi', 'nouveau', 'virgl', 'svga', 'swrast', - 'iris', 'crocus' + 'iris', 'crocus', 'i915' ] elif ['arm', 'aarch64'].contains(host_machine.cpu_family()) gallium_drivers = [ @@ -293,7 +275,7 @@ with_broadcom_vk = _vulkan_drivers.contains('broadcom') with_any_vk = _vulkan_drivers.length() != 0 with_any_broadcom = with_gallium_vc4 or with_gallium_v3d or with_broadcom_vk -with_any_intel = with_dri_i965 or with_intel_vk or with_gallium_iris or with_gallium_crocus +with_any_intel = with_intel_vk or with_gallium_iris or with_gallium_crocus if with_swrast_vk and not with_gallium_softpipe error('swrast vulkan requires gallium swrast') @@ -1493,8 +1475,6 @@ if cc.has_function('dl_iterate_phdr') pre_args += '-DHAVE_DL_ITERATE_PHDR' elif with_intel_vk error('Intel "Anvil" Vulkan driver requires the dl_iterate_phdr function') -elif with_dri_i965 and with_shader_cache - error('Intel i965 GL driver requires dl_iterate_phdr when built with shader caching.') endif # Determine whether or not the rt library is needed for time functions diff --git a/meson_options.txt b/meson_options.txt index ac49808..5278e17 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -54,9 +54,7 @@ option( option( 'dri-drivers', type : 'array', - value : ['auto'], - choices : ['auto', 'i965'], - description : 'List of dri drivers to build. If this is set to auto all drivers applicable to the target OS/architecture will be built' + description : 'DEPRECATED: List of dri drivers to build. If this is set to auto all drivers applicable to the target OS/architecture will be built' ) option( 'dri-drivers-path', @@ -455,18 +453,6 @@ option( value : true, description : 'Enable direct rendering in GLX and EGL for DRI', ) -option( - 'prefer-iris', - type : 'boolean', - value : true, - description : 'Prefer new Intel iris driver over older i965 driver' -) -option( - 'prefer-crocus', - type : 'boolean', - value : false, - description : 'Prefer new crocus driver over older i965 driver for gen4-7' -) option('egl-lib-suffix', type : 'string', value : '', diff --git a/src/intel/dev/intel_device_info.c b/src/intel/dev/intel_device_info.c index fc2d0d0..4fb99fe 100644 --- a/src/intel/dev/intel_device_info.c +++ b/src/intel/dev/intel_device_info.c @@ -1247,7 +1247,7 @@ intel_get_device_info_from_pci_id(int pci_id, #undef CHIPSET #define CHIPSET(id, family, fam_str, name) \ case id: *devinfo = intel_device_info_##family; break; -#include "pci_ids/i965_pci_ids.h" +#include "pci_ids/crocus_pci_ids.h" #include "pci_ids/iris_pci_ids.h" #undef CHIPSET @@ -1269,7 +1269,7 @@ intel_get_device_info_from_pci_id(int pci_id, sizeof(devinfo->name)); \ strncpy(devinfo->name, _name " (" _fam_str ")", sizeof(devinfo->name)); \ break; -#include "pci_ids/i965_pci_ids.h" +#include "pci_ids/crocus_pci_ids.h" #include "pci_ids/iris_pci_ids.h" default: strncpy(devinfo->name, "Intel Unknown", sizeof(devinfo->name)); diff --git a/src/intel/dev/intel_device_info_test.c b/src/intel/dev/intel_device_info_test.c index 236310d..c65f7a7 100644 --- a/src/intel/dev/intel_device_info_test.c +++ b/src/intel/dev/intel_device_info_test.c @@ -14,13 +14,8 @@ main(int argc, char *argv[]) } chipsets[] = { #undef CHIPSET #define CHIPSET(id, family, family_str, str_name) { .pci_id = id, .name = str_name, }, -#include "pci_ids/crocus_pci_ids.h" -#include "pci_ids/i965_pci_ids.h" #include "pci_ids/iris_pci_ids.h" -#undef CHIPSET -#define CHIPSET(id, fam_str, str_name) { .pci_id = id, .name = str_name, }, -#include "pci_ids/i915_pci_ids.h" -#undef CHIPSET +#include "pci_ids/crocus_pci_ids.h" }; for (uint32_t i = 0; i < ARRAY_SIZE(chipsets); i++) { diff --git a/src/loader/meson.build b/src/loader/meson.build index 0a529d8..6334cb9 100644 --- a/src/loader/meson.build +++ b/src/loader/meson.build @@ -40,15 +40,6 @@ loader_c_args = [ '-DUSE_DRICONF', '-DDEFAULT_DRIVER_DIR="@0@"'.format(dri_search_path), ] - -if get_option('prefer-iris') - loader_c_args += ['-DPREFER_IRIS'] -endif - -if get_option('prefer-crocus') - loader_c_args += ['-DPREFER_CROCUS'] -endif - libloader = static_library( 'loader', ['loader_dri_helper.c', 'loader.c'], diff --git a/src/loader/pci_id_driver_map.h b/src/loader/pci_id_driver_map.h index 544f192..5ffcf05 100644 --- a/src/loader/pci_id_driver_map.h +++ b/src/loader/pci_id_driver_map.h @@ -8,12 +8,6 @@ # error "Only include from loader.c" #endif -static const int i965_chip_ids[] = { -#define CHIPSET(chip, family, family_str, name) chip, -#include "pci_ids/i965_pci_ids.h" -#undef CHIPSET -}; - static const int crocus_chip_ids[] = { #define CHIPSET(chip, family, family_str, name) chip, #include "pci_ids/crocus_pci_ids.h" @@ -53,7 +47,6 @@ static const struct { int num_chips_ids; bool (*predicate)(int fd); } driver_map[] = { - { 0x8086, "i965", i965_chip_ids, ARRAY_SIZE(i965_chip_ids) }, { 0x8086, "crocus", crocus_chip_ids, ARRAY_SIZE(crocus_chip_ids) }, { 0x8086, "iris", NULL, -1, is_kernel_i915 }, { 0x1002, "r300", r300_chip_ids, ARRAY_SIZE(r300_chip_ids) }, diff --git a/src/mesa/drivers/dri/i965/brw_batch.c b/src/mesa/drivers/dri/i965/brw_batch.c deleted file mode 100644 index 9955018..0000000 --- a/src/mesa/drivers/dri/i965/brw_batch.c +++ /dev/null @@ -1,1332 +0,0 @@ -/* - * Copyright 2006 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "brw_batch.h" -#include "brw_buffer_objects.h" -#include "brw_bufmgr.h" -#include "brw_buffers.h" -#include "brw_fbo.h" -#include "brw_context.h" -#include "brw_defines.h" -#include "brw_state.h" -#include "common/intel_decoder.h" -#include "common/intel_gem.h" - -#include "util/hash_table.h" - -#include -#include "drm-uapi/i915_drm.h" - -#define FILE_DEBUG_FLAG DEBUG_BUFMGR - -/** - * Target sizes of the batch and state buffers. We create the initial - * buffers at these sizes, and flush when they're nearly full. If we - * underestimate how close we are to the end, and suddenly need more space - * in the middle of a draw, we can grow the buffers, and finish the draw. - * At that point, we'll be over our target size, so the next operation - * should flush. Each time we flush the batch, we recreate both buffers - * at the original target size, so it doesn't grow without bound. - */ -#define BATCH_SZ (20 * 1024) -#define STATE_SZ (16 * 1024) - -static void -brw_batch_reset(struct brw_context *brw); -static void -brw_new_batch(struct brw_context *brw); - -static unsigned -num_fences(struct brw_batch *batch) -{ - return util_dynarray_num_elements(&batch->exec_fences, - struct drm_i915_gem_exec_fence); -} - - -static void -dump_validation_list(struct brw_batch *batch) -{ - fprintf(stderr, "Validation list (length %d):\n", batch->exec_count); - - for (int i = 0; i < batch->exec_count; i++) { - uint64_t flags = batch->validation_list[i].flags; - assert(batch->validation_list[i].handle == - batch->exec_bos[i]->gem_handle); - fprintf(stderr, "[%2d]: %2d %-14s %p %s%-7s @ 0x%"PRIx64"%s (%"PRIu64"B)\n", - i, - batch->validation_list[i].handle, - batch->exec_bos[i]->name, - batch->exec_bos[i], - (flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) ? "(48b" : "(32b", - (flags & EXEC_OBJECT_WRITE) ? " write)" : ")", - (uint64_t)batch->validation_list[i].offset, - (flags & EXEC_OBJECT_PINNED) ? " (pinned)" : "", - batch->exec_bos[i]->size); - } -} - -static struct intel_batch_decode_bo -decode_get_bo(void *v_brw, bool ppgtt, uint64_t address) -{ - struct brw_context *brw = v_brw; - struct brw_batch *batch = &brw->batch; - - for (int i = 0; i < batch->exec_count; i++) { - struct brw_bo *bo = batch->exec_bos[i]; - /* The decoder zeroes out the top 16 bits, so we need to as well */ - uint64_t bo_address = bo->gtt_offset & (~0ull >> 16); - - if (address >= bo_address && address < bo_address + bo->size) { - return (struct intel_batch_decode_bo) { - .addr = bo_address, - .size = bo->size, - .map = brw_bo_map(brw, bo, MAP_READ), - }; - } - } - - return (struct intel_batch_decode_bo) { }; -} - -static unsigned -decode_get_state_size(void *v_brw, uint64_t address, uint64_t base_address) -{ - struct brw_context *brw = v_brw; - struct brw_batch *batch = &brw->batch; - unsigned size = (uintptr_t) - _mesa_hash_table_u64_search(batch->state_batch_sizes, - address - base_address); - return size; -} - -static void -init_reloc_list(struct brw_reloc_list *rlist, int count) -{ - rlist->reloc_count = 0; - rlist->reloc_array_size = count; - rlist->relocs = malloc(rlist->reloc_array_size * - sizeof(struct drm_i915_gem_relocation_entry)); -} - -void -brw_batch_init(struct brw_context *brw) -{ - struct brw_screen *screen = brw->screen; - struct brw_batch *batch = &brw->batch; - const struct intel_device_info *devinfo = &screen->devinfo; - - if (INTEL_DEBUG(DEBUG_BATCH)) { - /* The shadow doesn't get relocs written so state decode fails. */ - batch->use_shadow_copy = false; - } else - batch->use_shadow_copy = !devinfo->has_llc; - - init_reloc_list(&batch->batch_relocs, 250); - init_reloc_list(&batch->state_relocs, 250); - - batch->batch.map = NULL; - batch->state.map = NULL; - batch->exec_count = 0; - batch->exec_array_size = 100; - batch->exec_bos = - malloc(batch->exec_array_size * sizeof(batch->exec_bos[0])); - batch->validation_list = - malloc(batch->exec_array_size * sizeof(batch->validation_list[0])); - batch->contains_fence_signal = false; - - if (INTEL_DEBUG(DEBUG_BATCH)) { - batch->state_batch_sizes = - _mesa_hash_table_u64_create(NULL); - - const unsigned decode_flags = - INTEL_BATCH_DECODE_FULL | - (INTEL_DEBUG(DEBUG_COLOR) ? INTEL_BATCH_DECODE_IN_COLOR : 0) | - INTEL_BATCH_DECODE_OFFSETS | - INTEL_BATCH_DECODE_FLOATS; - - intel_batch_decode_ctx_init(&batch->decoder, devinfo, stderr, - decode_flags, NULL, decode_get_bo, - decode_get_state_size, brw); - batch->decoder.max_vbo_decoded_lines = 100; - } - - batch->use_batch_first = - screen->kernel_features & KERNEL_ALLOWS_EXEC_BATCH_FIRST; - - /* PIPE_CONTROL needs a w/a but only on gfx6 */ - batch->valid_reloc_flags = EXEC_OBJECT_WRITE; - if (devinfo->ver == 6) - batch->valid_reloc_flags |= EXEC_OBJECT_NEEDS_GTT; - - brw_batch_reset(brw); -} - -#define READ_ONCE(x) (*(volatile __typeof__(x) *)&(x)) - -static unsigned -add_exec_bo(struct brw_batch *batch, struct brw_bo *bo) -{ - assert(bo->bufmgr == batch->batch.bo->bufmgr); - - unsigned index = READ_ONCE(bo->index); - - if (index < batch->exec_count && batch->exec_bos[index] == bo) - return index; - - /* May have been shared between multiple active batches */ - for (index = 0; index < batch->exec_count; index++) { - if (batch->exec_bos[index] == bo) - return index; - } - - brw_bo_reference(bo); - - if (batch->exec_count == batch->exec_array_size) { - batch->exec_array_size *= 2; - batch->exec_bos = - realloc(batch->exec_bos, - batch->exec_array_size * sizeof(batch->exec_bos[0])); - batch->validation_list = - realloc(batch->validation_list, - batch->exec_array_size * sizeof(batch->validation_list[0])); - } - - batch->validation_list[batch->exec_count] = - (struct drm_i915_gem_exec_object2) { - .handle = bo->gem_handle, - .offset = bo->gtt_offset, - .flags = bo->kflags, - }; - - bo->index = batch->exec_count; - batch->exec_bos[batch->exec_count] = bo; - batch->aperture_space += bo->size; - - return batch->exec_count++; -} - -static void -recreate_growing_buffer(struct brw_context *brw, - struct brw_growing_bo *grow, - const char *name, unsigned size, - enum brw_memory_zone memzone) -{ - struct brw_screen *screen = brw->screen; - struct brw_batch *batch = &brw->batch; - struct brw_bufmgr *bufmgr = screen->bufmgr; - - /* We can't grow buffers when using softpin, so just overallocate them. */ - if (brw_using_softpin(bufmgr)) - size *= 2; - - grow->bo = brw_bo_alloc(bufmgr, name, size, memzone); - grow->bo->kflags |= can_do_exec_capture(screen) ? EXEC_OBJECT_CAPTURE : 0; - grow->partial_bo = NULL; - grow->partial_bo_map = NULL; - grow->partial_bytes = 0; - grow->memzone = memzone; - - if (batch->use_shadow_copy) - grow->map = realloc(grow->map, grow->bo->size); - else - grow->map = brw_bo_map(brw, grow->bo, MAP_READ | MAP_WRITE); -} - -static void -brw_batch_reset(struct brw_context *brw) -{ - struct brw_batch *batch = &brw->batch; - - if (batch->last_bo != NULL) { - brw_bo_unreference(batch->last_bo); - batch->last_bo = NULL; - } - batch->last_bo = batch->batch.bo; - - recreate_growing_buffer(brw, &batch->batch, "batchbuffer", BATCH_SZ, - BRW_MEMZONE_OTHER); - batch->map_next = batch->batch.map; - - recreate_growing_buffer(brw, &batch->state, "statebuffer", STATE_SZ, - BRW_MEMZONE_DYNAMIC); - - /* Avoid making 0 a valid state offset - otherwise the decoder will try - * and decode data when we use offset 0 as a null pointer. - */ - batch->state_used = 1; - - add_exec_bo(batch, batch->batch.bo); - assert(batch->batch.bo->index == 0); - - batch->needs_sol_reset = false; - batch->state_base_address_emitted = false; - - if (batch->state_batch_sizes) - _mesa_hash_table_u64_clear(batch->state_batch_sizes); - - /* Always add workaround_bo which contains a driver identifier to be - * recorded in error states. - */ - struct brw_bo *identifier_bo = brw->workaround_bo; - if (identifier_bo) - add_exec_bo(batch, identifier_bo); - - if (batch->contains_fence_signal) - batch->contains_fence_signal = false; -} - -static void -brw_batch_reset_and_clear_render_cache(struct brw_context *brw) -{ - brw_batch_reset(brw); - brw_cache_sets_clear(brw); -} - -void -brw_batch_save_state(struct brw_context *brw) -{ - brw->batch.saved.map_next = brw->batch.map_next; - brw->batch.saved.batch_reloc_count = brw->batch.batch_relocs.reloc_count; - brw->batch.saved.state_reloc_count = brw->batch.state_relocs.reloc_count; - brw->batch.saved.exec_count = brw->batch.exec_count; -} - -bool -brw_batch_saved_state_is_empty(struct brw_context *brw) -{ - struct brw_batch *batch = &brw->batch; - return (batch->saved.map_next == batch->batch.map); -} - -void -brw_batch_reset_to_saved(struct brw_context *brw) -{ - for (int i = brw->batch.saved.exec_count; - i < brw->batch.exec_count; i++) { - brw_bo_unreference(brw->batch.exec_bos[i]); - } - brw->batch.batch_relocs.reloc_count = brw->batch.saved.batch_reloc_count; - brw->batch.state_relocs.reloc_count = brw->batch.saved.state_reloc_count; - brw->batch.exec_count = brw->batch.saved.exec_count; - - brw->batch.map_next = brw->batch.saved.map_next; - if (USED_BATCH(brw->batch) == 0) - brw_new_batch(brw); -} - -void -brw_batch_free(struct brw_batch *batch) -{ - if (batch->use_shadow_copy) { - free(batch->batch.map); - free(batch->state.map); - } - - for (int i = 0; i < batch->exec_count; i++) { - brw_bo_unreference(batch->exec_bos[i]); - } - free(batch->batch_relocs.relocs); - free(batch->state_relocs.relocs); - free(batch->exec_bos); - free(batch->validation_list); - - brw_bo_unreference(batch->last_bo); - brw_bo_unreference(batch->batch.bo); - brw_bo_unreference(batch->state.bo); - if (batch->state_batch_sizes) { - _mesa_hash_table_u64_destroy(batch->state_batch_sizes); - intel_batch_decode_ctx_finish(&batch->decoder); - } -} - -/** - * Finish copying the old batch/state buffer's contents to the new one - * after we tried to "grow" the buffer in an earlier operation. - */ -static void -finish_growing_bos(struct brw_growing_bo *grow) -{ - struct brw_bo *old_bo = grow->partial_bo; - if (!old_bo) - return; - - memcpy(grow->map, grow->partial_bo_map, grow->partial_bytes); - - grow->partial_bo = NULL; - grow->partial_bo_map = NULL; - grow->partial_bytes = 0; - - brw_bo_unreference(old_bo); -} - -static void -replace_bo_in_reloc_list(struct brw_reloc_list *rlist, - uint32_t old_handle, uint32_t new_handle) -{ - for (int i = 0; i < rlist->reloc_count; i++) { - if (rlist->relocs[i].target_handle == old_handle) - rlist->relocs[i].target_handle = new_handle; - } -} - -/** - * Grow either the batch or state buffer to a new larger size. - * - * We can't actually grow buffers, so we allocate a new one, copy over - * the existing contents, and update our lists to refer to the new one. - * - * Note that this is only temporary - each new batch recreates the buffers - * at their original target size (BATCH_SZ or STATE_SZ). - */ -static void -grow_buffer(struct brw_context *brw, - struct brw_growing_bo *grow, - unsigned existing_bytes, - unsigned new_size) -{ - struct brw_batch *batch = &brw->batch; - struct brw_bufmgr *bufmgr = brw->bufmgr; - struct brw_bo *bo = grow->bo; - - /* We can't grow buffers that are softpinned, as the growing mechanism - * involves putting a larger buffer at the same gtt_offset...and we've - * only allocated the smaller amount of VMA. Without relocations, this - * simply won't work. This should never happen, however. - */ - assert(!(bo->kflags & EXEC_OBJECT_PINNED)); - - perf_debug("Growing %s - ran out of space\n", bo->name); - - if (grow->partial_bo) { - /* We've already grown once, and now we need to do it again. - * Finish our last grow operation so we can start a new one. - * This should basically never happen. - */ - perf_debug("Had to grow multiple times"); - finish_growing_bos(grow); - } - - struct brw_bo *new_bo = - brw_bo_alloc(bufmgr, bo->name, new_size, grow->memzone); - - /* Copy existing data to the new larger buffer */ - grow->partial_bo_map = grow->map; - - if (batch->use_shadow_copy) { - /* We can't safely use realloc, as it may move the existing buffer, - * breaking existing pointers the caller may still be using. Just - * malloc a new copy and memcpy it like the normal BO path. - * - * Use bo->size rather than new_size because the bufmgr may have - * rounded up the size, and we want the shadow size to match. - */ - grow->map = malloc(new_bo->size); - } else { - grow->map = brw_bo_map(brw, new_bo, MAP_READ | MAP_WRITE); - } - - /* Try to put the new BO at the same GTT offset as the old BO (which - * we're throwing away, so it doesn't need to be there). - * - * This guarantees that our relocations continue to work: values we've - * already written into the buffer, values we're going to write into the - * buffer, and the validation/relocation lists all will match. - * - * Also preserve kflags for EXEC_OBJECT_CAPTURE. - */ - new_bo->gtt_offset = bo->gtt_offset; - new_bo->index = bo->index; - new_bo->kflags = bo->kflags; - - /* Batch/state buffers are per-context, and if we've run out of space, - * we must have actually used them before, so...they will be in the list. - */ - assert(bo->index < batch->exec_count); - assert(batch->exec_bos[bo->index] == bo); - - /* Update the validation list to use the new BO. */ - batch->validation_list[bo->index].handle = new_bo->gem_handle; - - if (!batch->use_batch_first) { - /* We're not using I915_EXEC_HANDLE_LUT, which means we need to go - * update the relocation list entries to point at the new BO as well. - * (With newer kernels, the "handle" is an offset into the validation - * list, which remains unchanged, so we can skip this.) - */ - replace_bo_in_reloc_list(&batch->batch_relocs, - bo->gem_handle, new_bo->gem_handle); - replace_bo_in_reloc_list(&batch->state_relocs, - bo->gem_handle, new_bo->gem_handle); - } - - /* Exchange the two BOs...without breaking pointers to the old BO. - * - * Consider this scenario: - * - * 1. Somebody calls brw_state_batch() to get a region of memory, and - * and then creates a brw_address pointing to brw->batch.state.bo. - * 2. They then call brw_state_batch() a second time, which happens to - * grow and replace the state buffer. They then try to emit a - * relocation to their first section of memory. - * - * If we replace the brw->batch.state.bo pointer at step 2, we would - * break the address created in step 1. They'd have a pointer to the - * old destroyed BO. Emitting a relocation would add this dead BO to - * the validation list...causing /both/ statebuffers to be in the list, - * and all kinds of disasters. - * - * This is not a contrived case - BLORP vertex data upload hits this. - * - * There are worse scenarios too. Fences for GL sync objects reference - * brw->batch.batch.bo. If we replaced the batch pointer when growing, - * we'd need to chase down every fence and update it to point to the - * new BO. Otherwise, it would refer to a "batch" that never actually - * gets submitted, and would fail to trigger. - * - * To work around both of these issues, we transmutate the buffers in - * place, making the existing struct brw_bo represent the new buffer, - * and "new_bo" represent the old BO. This is highly unusual, but it - * seems like a necessary evil. - * - * We also defer the memcpy of the existing batch's contents. Callers - * may make multiple brw_state_batch calls, and retain pointers to the - * old BO's map. We'll perform the memcpy in finish_growing_bo() when - * we finally submit the batch, at which point we've finished uploading - * state, and nobody should have any old references anymore. - * - * To do that, we keep a reference to the old BO in grow->partial_bo, - * and store the number of bytes to copy in grow->partial_bytes. We - * can monkey with the refcounts directly without atomics because these - * are per-context BOs and they can only be touched by this thread. - */ - assert(new_bo->refcount == 1); - new_bo->refcount = bo->refcount; - bo->refcount = 1; - - assert(list_is_empty(&bo->exports)); - assert(list_is_empty(&new_bo->exports)); - - struct brw_bo tmp; - memcpy(&tmp, bo, sizeof(struct brw_bo)); - memcpy(bo, new_bo, sizeof(struct brw_bo)); - memcpy(new_bo, &tmp, sizeof(struct brw_bo)); - - list_inithead(&bo->exports); - list_inithead(&new_bo->exports); - - grow->partial_bo = new_bo; /* the one reference of the OLD bo */ - grow->partial_bytes = existing_bytes; -} - -void -brw_batch_require_space(struct brw_context *brw, GLuint sz) -{ - struct brw_batch *batch = &brw->batch; - - const unsigned batch_used = USED_BATCH(*batch) * 4; - if (batch_used + sz >= BATCH_SZ && !batch->no_wrap) { - brw_batch_flush(brw); - } else if (batch_used + sz >= batch->batch.bo->size) { - const unsigned new_size = - MIN2(batch->batch.bo->size + batch->batch.bo->size / 2, - MAX_BATCH_SIZE); - grow_buffer(brw, &batch->batch, batch_used, new_size); - batch->map_next = (void *) batch->batch.map + batch_used; - assert(batch_used + sz < batch->batch.bo->size); - } -} - -/** - * Called when starting a new batch buffer. - */ -static void -brw_new_batch(struct brw_context *brw) -{ - /* Unreference any BOs held by the previous batch, and reset counts. */ - for (int i = 0; i < brw->batch.exec_count; i++) { - brw_bo_unreference(brw->batch.exec_bos[i]); - brw->batch.exec_bos[i] = NULL; - } - brw->batch.batch_relocs.reloc_count = 0; - brw->batch.state_relocs.reloc_count = 0; - brw->batch.exec_count = 0; - brw->batch.aperture_space = 0; - - brw_bo_unreference(brw->batch.state.bo); - - /* Create a new batchbuffer and reset the associated state: */ - brw_batch_reset_and_clear_render_cache(brw); - - /* If the kernel supports hardware contexts, then most hardware state is - * preserved between batches; we only need to re-emit state that is required - * to be in every batch. Otherwise we need to re-emit all the state that - * would otherwise be stored in the context (which for all intents and - * purposes means everything). - */ - if (brw->hw_ctx == 0) { - brw->ctx.NewDriverState |= BRW_NEW_CONTEXT; - brw_upload_invariant_state(brw); - } - - brw->ctx.NewDriverState |= BRW_NEW_BATCH; - - brw->ib.index_size = -1; - - /* We need to periodically reap the shader time results, because rollover - * happens every few seconds. We also want to see results every once in a - * while, because many programs won't cleanly destroy our context, so the - * end-of-run printout may not happen. - */ - if (INTEL_DEBUG(DEBUG_SHADER_TIME)) - brw_collect_and_report_shader_time(brw); - - brw_batch_maybe_noop(brw); -} - -/** - * Called from brw_batch_flush before emitting MI_BATCHBUFFER_END and - * sending it off. - * - * This function can emit state (say, to preserve registers that aren't saved - * between batches). - */ -static void -brw_finish_batch(struct brw_context *brw) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - brw->batch.no_wrap = true; - - /* Capture the closing pipeline statistics register values necessary to - * support query objects (in the non-hardware context world). - */ - brw_emit_query_end(brw); - - /* Work around L3 state leaks into contexts set MI_RESTORE_INHIBIT which - * assume that the L3 cache is configured according to the hardware - * defaults. On Kernel 4.16+, we no longer need to do this. - */ - if (devinfo->ver >= 7 && - !(brw->screen->kernel_features & KERNEL_ALLOWS_CONTEXT_ISOLATION)) - gfx7_restore_default_l3_config(brw); - - if (devinfo->platform == INTEL_PLATFORM_HSW) { - /* From the Haswell PRM, Volume 2b, Command Reference: Instructions, - * 3DSTATE_CC_STATE_POINTERS > "Note": - * - * "SW must program 3DSTATE_CC_STATE_POINTERS command at the end of every - * 3D batch buffer followed by a PIPE_CONTROL with RC flush and CS stall." - * - * From the example in the docs, it seems to expect a regular pipe control - * flush here as well. We may have done it already, but meh. - * - * See also WaAvoidRCZCounterRollover. - */ - brw_emit_mi_flush(brw); - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (2 - 2)); - OUT_BATCH(brw->cc.state_offset | 1); - ADVANCE_BATCH(); - brw_emit_pipe_control_flush(brw, PIPE_CONTROL_RENDER_TARGET_FLUSH | - PIPE_CONTROL_CS_STALL); - } - - /* Do not restore push constant packets during context restore. */ - if (devinfo->ver >= 7) - gfx7_emit_isp_disable(brw); - - /* Emit MI_BATCH_BUFFER_END to finish our batch. Note that execbuf2 - * requires our batch size to be QWord aligned, so we pad it out if - * necessary by emitting an extra MI_NOOP after the end. - */ - brw_batch_require_space(brw, 8); - *brw->batch.map_next++ = MI_BATCH_BUFFER_END; - if (USED_BATCH(brw->batch) & 1) { - *brw->batch.map_next++ = MI_NOOP; - } - - brw->batch.no_wrap = false; -} - -static void -throttle(struct brw_context *brw) -{ - /* Wait for the swapbuffers before the one we just emitted, so we - * don't get too many swaps outstanding for apps that are GPU-heavy - * but not CPU-heavy. - * - * We're using intelDRI2Flush (called from the loader before - * swapbuffer) and glFlush (for front buffer rendering) as the - * indicator that a frame is done and then throttle when we get - * here as we prepare to render the next frame. At this point for - * round trips for swap/copy and getting new buffers are done and - * we'll spend less time waiting on the GPU. - * - * Unfortunately, we don't have a handle to the batch containing - * the swap, and getting our hands on that doesn't seem worth it, - * so we just use the first batch we emitted after the last swap. - */ - if (brw->need_swap_throttle && brw->throttle_batch[0]) { - if (brw->throttle_batch[1]) { - if (!brw->disable_throttling) { - brw_bo_wait_rendering(brw->throttle_batch[1]); - } - brw_bo_unreference(brw->throttle_batch[1]); - } - brw->throttle_batch[1] = brw->throttle_batch[0]; - brw->throttle_batch[0] = NULL; - brw->need_swap_throttle = false; - /* Throttling here is more precise than the throttle ioctl, so skip it */ - brw->need_flush_throttle = false; - } - - if (brw->need_flush_throttle) { - drmCommandNone(brw->screen->fd, DRM_I915_GEM_THROTTLE); - brw->need_flush_throttle = false; - } -} - -static int -execbuffer(int fd, - struct brw_batch *batch, - uint32_t ctx_id, - int used, - int in_fence, - int *out_fence, - int flags) -{ - struct drm_i915_gem_execbuffer2 execbuf = { - .buffers_ptr = (uintptr_t) batch->validation_list, - .buffer_count = batch->exec_count, - .batch_start_offset = 0, - .batch_len = used, - .flags = flags, - .rsvd1 = ctx_id, /* rsvd1 is actually the context ID */ - }; - - unsigned long cmd = DRM_IOCTL_I915_GEM_EXECBUFFER2; - - if (in_fence != -1) { - execbuf.rsvd2 = in_fence; - execbuf.flags |= I915_EXEC_FENCE_IN; - } - - if (out_fence != NULL) { - cmd = DRM_IOCTL_I915_GEM_EXECBUFFER2_WR; - *out_fence = -1; - execbuf.flags |= I915_EXEC_FENCE_OUT; - } - - if (num_fences(batch)) { - execbuf.flags |= I915_EXEC_FENCE_ARRAY; - execbuf.num_cliprects = num_fences(batch); - execbuf.cliprects_ptr = - (uintptr_t)util_dynarray_begin(&batch->exec_fences); - } - - - int ret = drmIoctl(fd, cmd, &execbuf); - if (ret != 0) - ret = -errno; - - for (int i = 0; i < batch->exec_count; i++) { - struct brw_bo *bo = batch->exec_bos[i]; - - bo->idle = false; - bo->index = -1; - - /* Update brw_bo::gtt_offset */ - if (batch->validation_list[i].offset != bo->gtt_offset) { - DBG("BO %d migrated: 0x%" PRIx64 " -> 0x%" PRIx64 "\n", - bo->gem_handle, bo->gtt_offset, - (uint64_t)batch->validation_list[i].offset); - assert(!(bo->kflags & EXEC_OBJECT_PINNED)); - bo->gtt_offset = batch->validation_list[i].offset; - } - } - - if (ret == 0 && out_fence != NULL) - *out_fence = execbuf.rsvd2 >> 32; - - return ret; -} - -static int -submit_batch(struct brw_context *brw, int in_fence_fd, int *out_fence_fd) -{ - struct brw_batch *batch = &brw->batch; - int ret = 0; - - if (batch->use_shadow_copy) { - void *bo_map = brw_bo_map(brw, batch->batch.bo, MAP_WRITE); - memcpy(bo_map, batch->batch.map, 4 * USED_BATCH(*batch)); - - bo_map = brw_bo_map(brw, batch->state.bo, MAP_WRITE); - memcpy(bo_map, batch->state.map, batch->state_used); - } - - brw_bo_unmap(batch->batch.bo); - brw_bo_unmap(batch->state.bo); - - if (!brw->screen->devinfo.no_hw) { - /* The requirement for using I915_EXEC_NO_RELOC are: - * - * The addresses written in the objects must match the corresponding - * reloc.gtt_offset which in turn must match the corresponding - * execobject.offset. - * - * Any render targets written to in the batch must be flagged with - * EXEC_OBJECT_WRITE. - * - * To avoid stalling, execobject.offset should match the current - * address of that object within the active context. - */ - int flags = I915_EXEC_NO_RELOC | I915_EXEC_RENDER; - - if (batch->needs_sol_reset) - flags |= I915_EXEC_GEN7_SOL_RESET; - - /* Set statebuffer relocations */ - const unsigned state_index = batch->state.bo->index; - if (state_index < batch->exec_count && - batch->exec_bos[state_index] == batch->state.bo) { - struct drm_i915_gem_exec_object2 *entry = - &batch->validation_list[state_index]; - assert(entry->handle == batch->state.bo->gem_handle); - entry->relocation_count = batch->state_relocs.reloc_count; - entry->relocs_ptr = (uintptr_t) batch->state_relocs.relocs; - } - - /* Set batchbuffer relocations */ - struct drm_i915_gem_exec_object2 *entry = &batch->validation_list[0]; - assert(entry->handle == batch->batch.bo->gem_handle); - entry->relocation_count = batch->batch_relocs.reloc_count; - entry->relocs_ptr = (uintptr_t) batch->batch_relocs.relocs; - - if (batch->use_batch_first) { - flags |= I915_EXEC_BATCH_FIRST | I915_EXEC_HANDLE_LUT; - } else { - /* Move the batch to the end of the validation list */ - struct drm_i915_gem_exec_object2 tmp; - struct brw_bo *tmp_bo; - const unsigned index = batch->exec_count - 1; - - tmp = *entry; - *entry = batch->validation_list[index]; - batch->validation_list[index] = tmp; - - tmp_bo = batch->exec_bos[0]; - batch->exec_bos[0] = batch->exec_bos[index]; - batch->exec_bos[index] = tmp_bo; - } - - ret = execbuffer(brw->screen->fd, batch, brw->hw_ctx, - 4 * USED_BATCH(*batch), - in_fence_fd, out_fence_fd, flags); - - throttle(brw); - } - - if (INTEL_DEBUG(DEBUG_BATCH)) { - intel_print_batch(&batch->decoder, batch->batch.map, - 4 * USED_BATCH(*batch), - batch->batch.bo->gtt_offset, false); - } - - if (brw->ctx.Const.ResetStrategy == GL_LOSE_CONTEXT_ON_RESET_ARB) - brw_check_for_reset(brw); - - if (ret != 0) { - fprintf(stderr, "i965: Failed to submit batchbuffer: %s\n", - strerror(-ret)); - abort(); - } - - return ret; -} - -/** - * The in_fence_fd is ignored if -1. Otherwise this function takes ownership - * of the fd. - * - * The out_fence_fd is ignored if NULL. Otherwise, the caller takes ownership - * of the returned fd. - */ -int -_brw_batch_flush_fence(struct brw_context *brw, - int in_fence_fd, int *out_fence_fd, - const char *file, int line) -{ - int ret; - - if (USED_BATCH(brw->batch) == 0 && !brw->batch.contains_fence_signal) - return 0; - - /* Check that we didn't just wrap our batchbuffer at a bad time. */ - assert(!brw->batch.no_wrap); - - brw_finish_batch(brw); - brw_upload_finish(&brw->upload); - - finish_growing_bos(&brw->batch.batch); - finish_growing_bos(&brw->batch.state); - - if (brw->throttle_batch[0] == NULL) { - brw->throttle_batch[0] = brw->batch.batch.bo; - brw_bo_reference(brw->throttle_batch[0]); - } - - if (INTEL_DEBUG(DEBUG_BATCH | DEBUG_SUBMIT)) { - int bytes_for_commands = 4 * USED_BATCH(brw->batch); - int bytes_for_state = brw->batch.state_used; - fprintf(stderr, "%19s:%-3d: Batchbuffer flush with %5db (%0.1f%%) (pkt)," - " %5db (%0.1f%%) (state), %4d BOs (%0.1fMb aperture)," - " %4d batch relocs, %4d state relocs\n", file, line, - bytes_for_commands, 100.0f * bytes_for_commands / BATCH_SZ, - bytes_for_state, 100.0f * bytes_for_state / STATE_SZ, - brw->batch.exec_count, - (float) (brw->batch.aperture_space / (1024 * 1024)), - brw->batch.batch_relocs.reloc_count, - brw->batch.state_relocs.reloc_count); - - dump_validation_list(&brw->batch); - } - - ret = submit_batch(brw, in_fence_fd, out_fence_fd); - - if (INTEL_DEBUG(DEBUG_SYNC)) { - fprintf(stderr, "waiting for idle\n"); - brw_bo_wait_rendering(brw->batch.batch.bo); - } - - /* Start a new batch buffer. */ - brw_new_batch(brw); - - return ret; -} - -void -brw_batch_maybe_noop(struct brw_context *brw) -{ - if (!brw->frontend_noop || USED_BATCH(brw->batch) != 0) - return; - - BEGIN_BATCH(1); - OUT_BATCH(MI_BATCH_BUFFER_END); - ADVANCE_BATCH(); -} - -bool -brw_batch_references(struct brw_batch *batch, struct brw_bo *bo) -{ - unsigned index = READ_ONCE(bo->index); - if (index < batch->exec_count && batch->exec_bos[index] == bo) - return true; - - for (int i = 0; i < batch->exec_count; i++) { - if (batch->exec_bos[i] == bo) - return true; - } - return false; -} - -/* This is the only way buffers get added to the validate list. - */ -static uint64_t -emit_reloc(struct brw_batch *batch, - struct brw_reloc_list *rlist, uint32_t offset, - struct brw_bo *target, int32_t target_offset, - unsigned int reloc_flags) -{ - assert(target != NULL); - - if (target->kflags & EXEC_OBJECT_PINNED) { - brw_use_pinned_bo(batch, target, reloc_flags & RELOC_WRITE); - return intel_canonical_address(target->gtt_offset + target_offset); - } - - unsigned int index = add_exec_bo(batch, target); - struct drm_i915_gem_exec_object2 *entry = &batch->validation_list[index]; - - if (rlist->reloc_count == rlist->reloc_array_size) { - rlist->reloc_array_size *= 2; - rlist->relocs = realloc(rlist->relocs, - rlist->reloc_array_size * - sizeof(struct drm_i915_gem_relocation_entry)); - } - - if (reloc_flags & RELOC_32BIT) { - /* Restrict this buffer to the low 32 bits of the address space. - * - * Altering the validation list flags restricts it for this batch, - * but we also alter the BO's kflags to restrict it permanently - * (until the BO is destroyed and put back in the cache). Buffers - * may stay bound across batches, and we want keep it constrained. - */ - target->kflags &= ~EXEC_OBJECT_SUPPORTS_48B_ADDRESS; - entry->flags &= ~EXEC_OBJECT_SUPPORTS_48B_ADDRESS; - - /* RELOC_32BIT is not an EXEC_OBJECT_* flag, so get rid of it. */ - reloc_flags &= ~RELOC_32BIT; - } - - if (reloc_flags) - entry->flags |= reloc_flags & batch->valid_reloc_flags; - - rlist->relocs[rlist->reloc_count++] = - (struct drm_i915_gem_relocation_entry) { - .offset = offset, - .delta = target_offset, - .target_handle = batch->use_batch_first ? index : target->gem_handle, - .presumed_offset = entry->offset, - }; - - /* Using the old buffer offset, write in what the right data would be, in - * case the buffer doesn't move and we can short-circuit the relocation - * processing in the kernel - */ - return entry->offset + target_offset; -} - -void -brw_use_pinned_bo(struct brw_batch *batch, struct brw_bo *bo, - unsigned writable_flag) -{ - assert(bo->kflags & EXEC_OBJECT_PINNED); - assert((writable_flag & ~EXEC_OBJECT_WRITE) == 0); - - unsigned int index = add_exec_bo(batch, bo); - struct drm_i915_gem_exec_object2 *entry = &batch->validation_list[index]; - assert(entry->offset == bo->gtt_offset); - - if (writable_flag) - entry->flags |= EXEC_OBJECT_WRITE; -} - -uint64_t -brw_batch_reloc(struct brw_batch *batch, uint32_t batch_offset, - struct brw_bo *target, uint32_t target_offset, - unsigned int reloc_flags) -{ - assert(batch_offset <= batch->batch.bo->size - sizeof(uint32_t)); - - return emit_reloc(batch, &batch->batch_relocs, batch_offset, - target, target_offset, reloc_flags); -} - -uint64_t -brw_state_reloc(struct brw_batch *batch, uint32_t state_offset, - struct brw_bo *target, uint32_t target_offset, - unsigned int reloc_flags) -{ - assert(state_offset <= batch->state.bo->size - sizeof(uint32_t)); - - return emit_reloc(batch, &batch->state_relocs, state_offset, - target, target_offset, reloc_flags); -} - -/** - * Reserve some space in the statebuffer, or flush. - * - * This is used to estimate when we're near the end of the batch, - * so we can flush early. - */ -void -brw_require_statebuffer_space(struct brw_context *brw, int size) -{ - if (brw->batch.state_used + size >= STATE_SZ) - brw_batch_flush(brw); -} - -/** - * Allocates a block of space in the batchbuffer for indirect state. - */ -void * -brw_state_batch(struct brw_context *brw, - int size, - int alignment, - uint32_t *out_offset) -{ - struct brw_batch *batch = &brw->batch; - - assert(size < batch->state.bo->size); - - uint32_t offset = ALIGN(batch->state_used, alignment); - - if (offset + size >= STATE_SZ && !batch->no_wrap) { - brw_batch_flush(brw); - offset = ALIGN(batch->state_used, alignment); - } else if (offset + size >= batch->state.bo->size) { - const unsigned new_size = - MIN2(batch->state.bo->size + batch->state.bo->size / 2, - MAX_STATE_SIZE); - grow_buffer(brw, &batch->state, batch->state_used, new_size); - assert(offset + size < batch->state.bo->size); - } - - if (INTEL_DEBUG(DEBUG_BATCH)) { - _mesa_hash_table_u64_insert(batch->state_batch_sizes, - offset, (void *) (uintptr_t) size); - } - - batch->state_used = offset + size; - - *out_offset = offset; - return batch->state.map + (offset >> 2); -} - -void -brw_batch_data(struct brw_context *brw, - const void *data, GLuint bytes) -{ - assert((bytes & 3) == 0); - brw_batch_require_space(brw, bytes); - memcpy(brw->batch.map_next, data, bytes); - brw->batch.map_next += bytes >> 2; -} - -static void -load_sized_register_mem(struct brw_context *brw, - uint32_t reg, - struct brw_bo *bo, - uint32_t offset, - int size) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - int i; - - /* MI_LOAD_REGISTER_MEM only exists on Gfx7+. */ - assert(devinfo->ver >= 7); - - if (devinfo->ver >= 8) { - BEGIN_BATCH(4 * size); - for (i = 0; i < size; i++) { - OUT_BATCH(GFX7_MI_LOAD_REGISTER_MEM | (4 - 2)); - OUT_BATCH(reg + i * 4); - OUT_RELOC64(bo, 0, offset + i * 4); - } - ADVANCE_BATCH(); - } else { - BEGIN_BATCH(3 * size); - for (i = 0; i < size; i++) { - OUT_BATCH(GFX7_MI_LOAD_REGISTER_MEM | (3 - 2)); - OUT_BATCH(reg + i * 4); - OUT_RELOC(bo, 0, offset + i * 4); - } - ADVANCE_BATCH(); - } -} - -void -brw_load_register_mem(struct brw_context *brw, - uint32_t reg, - struct brw_bo *bo, - uint32_t offset) -{ - load_sized_register_mem(brw, reg, bo, offset, 1); -} - -void -brw_load_register_mem64(struct brw_context *brw, - uint32_t reg, - struct brw_bo *bo, - uint32_t offset) -{ - load_sized_register_mem(brw, reg, bo, offset, 2); -} - -/* - * Write an arbitrary 32-bit register to a buffer via MI_STORE_REGISTER_MEM. - */ -void -brw_store_register_mem32(struct brw_context *brw, - struct brw_bo *bo, uint32_t reg, uint32_t offset) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - assert(devinfo->ver >= 6); - - if (devinfo->ver >= 8) { - BEGIN_BATCH(4); - OUT_BATCH(MI_STORE_REGISTER_MEM | (4 - 2)); - OUT_BATCH(reg); - OUT_RELOC64(bo, RELOC_WRITE, offset); - ADVANCE_BATCH(); - } else { - BEGIN_BATCH(3); - OUT_BATCH(MI_STORE_REGISTER_MEM | (3 - 2)); - OUT_BATCH(reg); - OUT_RELOC(bo, RELOC_WRITE | RELOC_NEEDS_GGTT, offset); - ADVANCE_BATCH(); - } -} - -/* - * Write an arbitrary 64-bit register to a buffer via MI_STORE_REGISTER_MEM. - */ -void -brw_store_register_mem64(struct brw_context *brw, - struct brw_bo *bo, uint32_t reg, uint32_t offset) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - assert(devinfo->ver >= 6); - - /* MI_STORE_REGISTER_MEM only stores a single 32-bit value, so to - * read a full 64-bit register, we need to do two of them. - */ - if (devinfo->ver >= 8) { - BEGIN_BATCH(8); - OUT_BATCH(MI_STORE_REGISTER_MEM | (4 - 2)); - OUT_BATCH(reg); - OUT_RELOC64(bo, RELOC_WRITE, offset); - OUT_BATCH(MI_STORE_REGISTER_MEM | (4 - 2)); - OUT_BATCH(reg + sizeof(uint32_t)); - OUT_RELOC64(bo, RELOC_WRITE, offset + sizeof(uint32_t)); - ADVANCE_BATCH(); - } else { - BEGIN_BATCH(6); - OUT_BATCH(MI_STORE_REGISTER_MEM | (3 - 2)); - OUT_BATCH(reg); - OUT_RELOC(bo, RELOC_WRITE | RELOC_NEEDS_GGTT, offset); - OUT_BATCH(MI_STORE_REGISTER_MEM | (3 - 2)); - OUT_BATCH(reg + sizeof(uint32_t)); - OUT_RELOC(bo, RELOC_WRITE | RELOC_NEEDS_GGTT, offset + sizeof(uint32_t)); - ADVANCE_BATCH(); - } -} - -/* - * Write a 32-bit register using immediate data. - */ -void -brw_load_register_imm32(struct brw_context *brw, uint32_t reg, uint32_t imm) -{ - assert(brw->screen->devinfo.ver >= 6); - - BEGIN_BATCH(3); - OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2)); - OUT_BATCH(reg); - OUT_BATCH(imm); - ADVANCE_BATCH(); -} - -/* - * Write a 64-bit register using immediate data. - */ -void -brw_load_register_imm64(struct brw_context *brw, uint32_t reg, uint64_t imm) -{ - assert(brw->screen->devinfo.ver >= 6); - - BEGIN_BATCH(5); - OUT_BATCH(MI_LOAD_REGISTER_IMM | (5 - 2)); - OUT_BATCH(reg); - OUT_BATCH(imm & 0xffffffff); - OUT_BATCH(reg + 4); - OUT_BATCH(imm >> 32); - ADVANCE_BATCH(); -} - -/* - * Copies a 32-bit register. - */ -void -brw_load_register_reg(struct brw_context *brw, uint32_t dest, uint32_t src) -{ - assert(brw->screen->devinfo.verx10 >= 75); - - BEGIN_BATCH(3); - OUT_BATCH(MI_LOAD_REGISTER_REG | (3 - 2)); - OUT_BATCH(src); - OUT_BATCH(dest); - ADVANCE_BATCH(); -} - -/* - * Copies a 64-bit register. - */ -void -brw_load_register_reg64(struct brw_context *brw, uint32_t dest, uint32_t src) -{ - assert(brw->screen->devinfo.verx10 >= 75); - - BEGIN_BATCH(6); - OUT_BATCH(MI_LOAD_REGISTER_REG | (3 - 2)); - OUT_BATCH(src); - OUT_BATCH(dest); - OUT_BATCH(MI_LOAD_REGISTER_REG | (3 - 2)); - OUT_BATCH(src + sizeof(uint32_t)); - OUT_BATCH(dest + sizeof(uint32_t)); - ADVANCE_BATCH(); -} - -/* - * Write 32-bits of immediate data to a GPU memory buffer. - */ -void -brw_store_data_imm32(struct brw_context *brw, struct brw_bo *bo, - uint32_t offset, uint32_t imm) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - assert(devinfo->ver >= 6); - - BEGIN_BATCH(4); - OUT_BATCH(MI_STORE_DATA_IMM | (4 - 2)); - if (devinfo->ver >= 8) - OUT_RELOC64(bo, RELOC_WRITE, offset); - else { - OUT_BATCH(0); /* MBZ */ - OUT_RELOC(bo, RELOC_WRITE, offset); - } - OUT_BATCH(imm); - ADVANCE_BATCH(); -} - -/* - * Write 64-bits of immediate data to a GPU memory buffer. - */ -void -brw_store_data_imm64(struct brw_context *brw, struct brw_bo *bo, - uint32_t offset, uint64_t imm) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - assert(devinfo->ver >= 6); - - BEGIN_BATCH(5); - OUT_BATCH(MI_STORE_DATA_IMM | (5 - 2)); - if (devinfo->ver >= 8) - OUT_RELOC64(bo, RELOC_WRITE, offset); - else { - OUT_BATCH(0); /* MBZ */ - OUT_RELOC(bo, RELOC_WRITE, offset); - } - OUT_BATCH(imm & 0xffffffffu); - OUT_BATCH(imm >> 32); - ADVANCE_BATCH(); -} diff --git a/src/mesa/drivers/dri/i965/brw_batch.h b/src/mesa/drivers/dri/i965/brw_batch.h deleted file mode 100644 index 39b383b..0000000 --- a/src/mesa/drivers/dri/i965/brw_batch.h +++ /dev/null @@ -1,166 +0,0 @@ -#ifndef BRW_BATCH_H -#define BRW_BATCH_H - -#include "main/mtypes.h" - -#include "brw_context.h" -#include "brw_bufmgr.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/* The kernel assumes batchbuffers are smaller than 256kB. */ -#define MAX_BATCH_SIZE (256 * 1024) - -/* 3DSTATE_BINDING_TABLE_POINTERS has a U16 offset from Surface State Base - * Address, which means that we can't put binding tables beyond 64kB. This - * effectively limits the maximum statebuffer size to 64kB. - */ -#define MAX_STATE_SIZE (64 * 1024) - -struct brw_batch; - -void brw_batch_init(struct brw_context *brw); -void brw_batch_free(struct brw_batch *batch); -void brw_batch_save_state(struct brw_context *brw); -bool brw_batch_saved_state_is_empty(struct brw_context *brw); -void brw_batch_reset_to_saved(struct brw_context *brw); -void brw_batch_require_space(struct brw_context *brw, GLuint sz); -int _brw_batch_flush_fence(struct brw_context *brw, - int in_fence_fd, int *out_fence_fd, - const char *file, int line); -void brw_batch_maybe_noop(struct brw_context *brw); - -#define brw_batch_flush(brw) \ - _brw_batch_flush_fence((brw), -1, NULL, __FILE__, __LINE__) - -#define brw_batch_flush_fence(brw, in_fence_fd, out_fence_fd) \ - _brw_batch_flush_fence((brw), (in_fence_fd), (out_fence_fd), \ - __FILE__, __LINE__) - -/* Unlike bmBufferData, this currently requires the buffer be mapped. - * Consider it a convenience function wrapping multple - * brw_buffer_dword() calls. - */ -void brw_batch_data(struct brw_context *brw, - const void *data, GLuint bytes); - -static inline bool -brw_batch_has_aperture_space(struct brw_context *brw, uint64_t extra_space) -{ - return brw->batch.aperture_space + extra_space <= - brw->screen->aperture_threshold; -} - -bool brw_batch_references(struct brw_batch *batch, struct brw_bo *bo); - -#define RELOC_WRITE EXEC_OBJECT_WRITE -#define RELOC_NEEDS_GGTT EXEC_OBJECT_NEEDS_GTT -/* Inverted meaning, but using the same bit...emit_reloc will flip it. */ -#define RELOC_32BIT EXEC_OBJECT_SUPPORTS_48B_ADDRESS - -void brw_use_pinned_bo(struct brw_batch *batch, struct brw_bo *bo, - unsigned writeable_flag); - -uint64_t brw_batch_reloc(struct brw_batch *batch, - uint32_t batch_offset, - struct brw_bo *target, - uint32_t target_offset, - unsigned flags); -uint64_t brw_state_reloc(struct brw_batch *batch, - uint32_t batch_offset, - struct brw_bo *target, - uint32_t target_offset, - unsigned flags); - -#define USED_BATCH(_batch) \ - ((uintptr_t)((_batch).map_next - (_batch).batch.map)) - -static inline uint32_t float_as_int(float f) -{ - union { - float f; - uint32_t d; - } fi; - - fi.f = f; - return fi.d; -} - -static inline void -brw_batch_begin(struct brw_context *brw, int n) -{ - brw_batch_require_space(brw, n * 4); - -#ifdef DEBUG - brw->batch.emit = USED_BATCH(brw->batch); - brw->batch.total = n; -#endif -} - -static inline void -brw_batch_advance(struct brw_context *brw) -{ -#ifdef DEBUG - struct brw_batch *batch = &brw->batch; - unsigned int _n = USED_BATCH(*batch) - batch->emit; - assert(batch->total != 0); - if (_n != batch->total) { - fprintf(stderr, "ADVANCE_BATCH: %d of %d dwords emitted\n", - _n, batch->total); - abort(); - } - batch->total = 0; -#else - (void) brw; -#endif -} - -static inline bool -brw_ptr_in_state_buffer(struct brw_batch *batch, void *p) -{ - return (char *) p >= (char *) batch->state.map && - (char *) p < (char *) batch->state.map + batch->state.bo->size; -} - -#define BEGIN_BATCH(n) do { \ - brw_batch_begin(brw, (n)); \ - uint32_t *__map = brw->batch.map_next; \ - brw->batch.map_next += (n) - -#define BEGIN_BATCH_BLT(n) do { \ - assert(brw->screen->devinfo.ver < 6); \ - brw_batch_begin(brw, (n)); \ - uint32_t *__map = brw->batch.map_next; \ - brw->batch.map_next += (n) - -#define OUT_BATCH(d) *__map++ = (d) -#define OUT_BATCH_F(f) OUT_BATCH(float_as_int((f))) - -#define OUT_RELOC(buf, flags, delta) do { \ - uint32_t __offset = (__map - brw->batch.batch.map) * 4; \ - uint32_t reloc = \ - brw_batch_reloc(&brw->batch, __offset, (buf), (delta), (flags)); \ - OUT_BATCH(reloc); \ -} while (0) - -/* Handle 48-bit address relocations for Gfx8+ */ -#define OUT_RELOC64(buf, flags, delta) do { \ - uint32_t __offset = (__map - brw->batch.batch.map) * 4; \ - uint64_t reloc64 = \ - brw_batch_reloc(&brw->batch, __offset, (buf), (delta), (flags)); \ - OUT_BATCH(reloc64); \ - OUT_BATCH(reloc64 >> 32); \ -} while (0) - -#define ADVANCE_BATCH() \ - assert(__map == brw->batch.map_next); \ - brw_batch_advance(brw); \ -} while (0) - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/mesa/drivers/dri/i965/brw_binding_tables.c b/src/mesa/drivers/dri/i965/brw_binding_tables.c deleted file mode 100644 index 8ecdcc5..0000000 --- a/src/mesa/drivers/dri/i965/brw_binding_tables.c +++ /dev/null @@ -1,307 +0,0 @@ -/* - * Copyright © 2013 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -/** - * \file brw_binding_tables.c - * - * State atoms which upload the "binding table" for each shader stage. - * - * Binding tables map a numeric "surface index" to the SURFACE_STATE structure - * for a currently bound surface. This allows SEND messages (such as sampler - * or data port messages) to refer to a particular surface by number, rather - * than by pointer. - * - * The binding table is stored as a (sparse) array of SURFACE_STATE entries; - * surface indexes are simply indexes into the array. The ordering of the - * entries is entirely left up to software; see the SURF_INDEX_* macros in - * brw_context.h to see our current layout. - */ - -#include "main/mtypes.h" - -#include "brw_context.h" -#include "brw_defines.h" -#include "brw_state.h" -#include "brw_batch.h" - -/** - * Upload a shader stage's binding table as indirect state. - * - * This copies brw_stage_state::surf_offset[] into the indirect state section - * of the batchbuffer (allocated by brw_state_batch()). - */ -void -brw_upload_binding_table(struct brw_context *brw, - uint32_t packet_name, - const struct brw_stage_prog_data *prog_data, - struct brw_stage_state *stage_state) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - if (prog_data->binding_table.size_bytes == 0) { - /* There are no surfaces; skip making the binding table altogether. */ - if (stage_state->bind_bo_offset == 0 && devinfo->ver < 9) - return; - - stage_state->bind_bo_offset = 0; - } else { - /* Upload a new binding table. */ - if (INTEL_DEBUG(DEBUG_SHADER_TIME)) { - brw_emit_buffer_surface_state( - brw, &stage_state->surf_offset[ - prog_data->binding_table.shader_time_start], - brw->shader_time.bo, 0, ISL_FORMAT_RAW, - brw->shader_time.bo->size, 1, RELOC_WRITE); - } - uint32_t *bind = - brw_state_batch(brw, prog_data->binding_table.size_bytes, - 32, &stage_state->bind_bo_offset); - - /* BRW_NEW_SURFACES and BRW_NEW_*_CONSTBUF */ - memcpy(bind, stage_state->surf_offset, - prog_data->binding_table.size_bytes); - } - - brw->ctx.NewDriverState |= BRW_NEW_BINDING_TABLE_POINTERS; - - if (devinfo->ver >= 7) { - BEGIN_BATCH(2); - OUT_BATCH(packet_name << 16 | (2 - 2)); - /* Align SurfaceStateOffset[16:6] format to [15:5] PS Binding Table field - * when hw-generated binding table is enabled. - */ - OUT_BATCH(stage_state->bind_bo_offset); - ADVANCE_BATCH(); - } -} - -/** - * State atoms which upload the binding table for a particular shader stage. - * @{ - */ - -/** Upload the VS binding table. */ -static void -brw_vs_upload_binding_table(struct brw_context *brw) -{ - /* BRW_NEW_VS_PROG_DATA */ - const struct brw_stage_prog_data *prog_data = brw->vs.base.prog_data; - brw_upload_binding_table(brw, - _3DSTATE_BINDING_TABLE_POINTERS_VS, - prog_data, - &brw->vs.base); -} - -const struct brw_tracked_state brw_vs_binding_table = { - .dirty = { - .mesa = 0, - .brw = BRW_NEW_BATCH | - BRW_NEW_BLORP | - BRW_NEW_VS_CONSTBUF | - BRW_NEW_VS_PROG_DATA | - BRW_NEW_SURFACES, - }, - .emit = brw_vs_upload_binding_table, -}; - - -/** Upload the PS binding table. */ -static void -brw_upload_wm_binding_table(struct brw_context *brw) -{ - /* BRW_NEW_FS_PROG_DATA */ - const struct brw_stage_prog_data *prog_data = brw->wm.base.prog_data; - brw_upload_binding_table(brw, - _3DSTATE_BINDING_TABLE_POINTERS_PS, - prog_data, - &brw->wm.base); -} - -const struct brw_tracked_state brw_wm_binding_table = { - .dirty = { - .mesa = 0, - .brw = BRW_NEW_BATCH | - BRW_NEW_BLORP | - BRW_NEW_FS_PROG_DATA | - BRW_NEW_SURFACES, - }, - .emit = brw_upload_wm_binding_table, -}; - -/** Upload the TCS binding table (if tessellation stages are active). */ -static void -brw_tcs_upload_binding_table(struct brw_context *brw) -{ - /* Skip if the tessellation stages are disabled. */ - if (brw->programs[MESA_SHADER_TESS_EVAL] == NULL) - return; - - /* BRW_NEW_TCS_PROG_DATA */ - const struct brw_stage_prog_data *prog_data = brw->tcs.base.prog_data; - brw_upload_binding_table(brw, - _3DSTATE_BINDING_TABLE_POINTERS_HS, - prog_data, - &brw->tcs.base); -} - -const struct brw_tracked_state brw_tcs_binding_table = { - .dirty = { - .mesa = 0, - .brw = BRW_NEW_BATCH | - BRW_NEW_BLORP | - BRW_NEW_DEFAULT_TESS_LEVELS | - BRW_NEW_SURFACES | - BRW_NEW_TCS_CONSTBUF | - BRW_NEW_TCS_PROG_DATA, - }, - .emit = brw_tcs_upload_binding_table, -}; - -/** Upload the TES binding table (if TES is active). */ -static void -brw_tes_upload_binding_table(struct brw_context *brw) -{ - /* If there's no TES, skip changing anything. */ - if (brw->programs[MESA_SHADER_TESS_EVAL] == NULL) - return; - - /* BRW_NEW_TES_PROG_DATA */ - const struct brw_stage_prog_data *prog_data = brw->tes.base.prog_data; - brw_upload_binding_table(brw, - _3DSTATE_BINDING_TABLE_POINTERS_DS, - prog_data, - &brw->tes.base); -} - -const struct brw_tracked_state brw_tes_binding_table = { - .dirty = { - .mesa = 0, - .brw = BRW_NEW_BATCH | - BRW_NEW_BLORP | - BRW_NEW_SURFACES | - BRW_NEW_TES_CONSTBUF | - BRW_NEW_TES_PROG_DATA, - }, - .emit = brw_tes_upload_binding_table, -}; - -/** Upload the GS binding table (if GS is active). */ -static void -brw_gs_upload_binding_table(struct brw_context *brw) -{ - /* If there's no GS, skip changing anything. */ - if (brw->programs[MESA_SHADER_GEOMETRY] == NULL) - return; - - /* BRW_NEW_GS_PROG_DATA */ - const struct brw_stage_prog_data *prog_data = brw->gs.base.prog_data; - brw_upload_binding_table(brw, - _3DSTATE_BINDING_TABLE_POINTERS_GS, - prog_data, - &brw->gs.base); -} - -const struct brw_tracked_state brw_gs_binding_table = { - .dirty = { - .mesa = 0, - .brw = BRW_NEW_BATCH | - BRW_NEW_BLORP | - BRW_NEW_GS_CONSTBUF | - BRW_NEW_GS_PROG_DATA | - BRW_NEW_SURFACES, - }, - .emit = brw_gs_upload_binding_table, -}; -/** @} */ - -/** - * State atoms which emit 3DSTATE packets to update the binding table pointers. - * @{ - */ - -/** - * (Gfx4-5) Upload the binding table pointers for all shader stages. - * - * The binding table pointers are relative to the surface state base address, - * which points at the batchbuffer containing the streamed batch state. - */ -static void -gfx4_upload_binding_table_pointers(struct brw_context *brw) -{ - BEGIN_BATCH(6); - OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 | (6 - 2)); - OUT_BATCH(brw->vs.base.bind_bo_offset); - OUT_BATCH(0); /* gs */ - OUT_BATCH(0); /* clip */ - OUT_BATCH(0); /* sf */ - OUT_BATCH(brw->wm.base.bind_bo_offset); - ADVANCE_BATCH(); -} - -const struct brw_tracked_state brw_binding_table_pointers = { - .dirty = { - .mesa = 0, - .brw = BRW_NEW_BATCH | - BRW_NEW_BLORP | - BRW_NEW_BINDING_TABLE_POINTERS | - BRW_NEW_STATE_BASE_ADDRESS, - }, - .emit = gfx4_upload_binding_table_pointers, -}; - -/** - * (Sandybridge Only) Upload the binding table pointers for all shader stages. - * - * The binding table pointers are relative to the surface state base address, - * which points at the batchbuffer containing the streamed batch state. - */ -static void -gfx6_upload_binding_table_pointers(struct brw_context *brw) -{ - BEGIN_BATCH(4); - OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 | - GFX6_BINDING_TABLE_MODIFY_VS | - GFX6_BINDING_TABLE_MODIFY_GS | - GFX6_BINDING_TABLE_MODIFY_PS | - (4 - 2)); - OUT_BATCH(brw->vs.base.bind_bo_offset); /* vs */ - if (brw->ff_gs.prog_active) - OUT_BATCH(brw->ff_gs.bind_bo_offset); /* gs */ - else - OUT_BATCH(brw->gs.base.bind_bo_offset); /* gs */ - OUT_BATCH(brw->wm.base.bind_bo_offset); /* wm/ps */ - ADVANCE_BATCH(); -} - -const struct brw_tracked_state gfx6_binding_table_pointers = { - .dirty = { - .mesa = 0, - .brw = BRW_NEW_BATCH | - BRW_NEW_BLORP | - BRW_NEW_BINDING_TABLE_POINTERS | - BRW_NEW_STATE_BASE_ADDRESS, - }, - .emit = gfx6_upload_binding_table_pointers, -}; - -/** @} */ diff --git a/src/mesa/drivers/dri/i965/brw_blit.c b/src/mesa/drivers/dri/i965/brw_blit.c deleted file mode 100644 index 95f00e9..0000000 --- a/src/mesa/drivers/dri/i965/brw_blit.c +++ /dev/null @@ -1,790 +0,0 @@ -/* - * Copyright 2003 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "main/mtypes.h" -#include "main/blit.h" -#include "main/context.h" -#include "main/enums.h" -#include "main/fbobject.h" - -#include "brw_context.h" -#include "brw_defines.h" -#include "brw_blit.h" -#include "brw_buffers.h" -#include "brw_fbo.h" -#include "brw_batch.h" -#include "brw_mipmap_tree.h" - -#define FILE_DEBUG_FLAG DEBUG_BLIT - -static void -brw_miptree_set_alpha_to_one(struct brw_context *brw, - struct brw_mipmap_tree *mt, - int x, int y, int width, int height); - -static GLuint translate_raster_op(enum gl_logicop_mode logicop) -{ - return logicop | (logicop << 4); -} - -static uint32_t -br13_for_cpp(int cpp) -{ - switch (cpp) { - case 16: - return BR13_32323232; - case 8: - return BR13_16161616; - case 4: - return BR13_8888; - case 2: - return BR13_565; - case 1: - return BR13_8; - default: - unreachable("not reached"); - } -} - -/** - * Emits the packet for switching the blitter from X to Y tiled or back. - * - * This has to be called in a single BEGIN_BATCH_BLT_TILED() / - * ADVANCE_BATCH_TILED(). This is because BCS_SWCTRL is saved and restored as - * part of the power context, not a render context, and if the batchbuffer was - * to get flushed between setting and blitting, or blitting and restoring, our - * tiling state would leak into other unsuspecting applications (like the X - * server). - */ -static uint32_t * -set_blitter_tiling(struct brw_context *brw, - bool dst_y_tiled, bool src_y_tiled, - uint32_t *__map) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - const unsigned n_dwords = devinfo->ver >= 8 ? 5 : 4; - assert(devinfo->ver >= 6); - - /* Idle the blitter before we update how tiling is interpreted. */ - OUT_BATCH(MI_FLUSH_DW | (n_dwords - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - if (n_dwords == 5) - OUT_BATCH(0); - - OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2)); - OUT_BATCH(BCS_SWCTRL); - OUT_BATCH((BCS_SWCTRL_DST_Y | BCS_SWCTRL_SRC_Y) << 16 | - (dst_y_tiled ? BCS_SWCTRL_DST_Y : 0) | - (src_y_tiled ? BCS_SWCTRL_SRC_Y : 0)); - return __map; -} -#define SET_BLITTER_TILING(...) __map = set_blitter_tiling(__VA_ARGS__, __map) - -#define BEGIN_BATCH_BLT_TILED(n, dst_y_tiled, src_y_tiled) \ - unsigned set_tiling_batch_size = 0; \ - if (dst_y_tiled || src_y_tiled) { \ - if (devinfo->ver >= 8) \ - set_tiling_batch_size = 16; \ - else \ - set_tiling_batch_size = 14; \ - } \ - BEGIN_BATCH_BLT(n + set_tiling_batch_size); \ - if (dst_y_tiled || src_y_tiled) \ - SET_BLITTER_TILING(brw, dst_y_tiled, src_y_tiled) - -#define ADVANCE_BATCH_TILED(dst_y_tiled, src_y_tiled) \ - if (dst_y_tiled || src_y_tiled) \ - SET_BLITTER_TILING(brw, false, false); \ - ADVANCE_BATCH() - -bool -brw_miptree_blit_compatible_formats(mesa_format src, mesa_format dst) -{ - /* The BLT doesn't handle sRGB conversion */ - assert(src == _mesa_get_srgb_format_linear(src)); - assert(dst == _mesa_get_srgb_format_linear(dst)); - - /* No swizzle or format conversions possible, except... */ - if (src == dst) - return true; - - /* ...we can either discard the alpha channel when going from A->X, - * or we can fill the alpha channel with 0xff when going from X->A - */ - if (src == MESA_FORMAT_B8G8R8A8_UNORM || src == MESA_FORMAT_B8G8R8X8_UNORM) - return (dst == MESA_FORMAT_B8G8R8A8_UNORM || - dst == MESA_FORMAT_B8G8R8X8_UNORM); - - if (src == MESA_FORMAT_R8G8B8A8_UNORM || src == MESA_FORMAT_R8G8B8X8_UNORM) - return (dst == MESA_FORMAT_R8G8B8A8_UNORM || - dst == MESA_FORMAT_R8G8B8X8_UNORM); - - /* We can also discard alpha when going from A2->X2 for 2 bit alpha, - * however we can't fill the alpha channel with two 1 bits when going - * from X2->A2, because brw_miptree_set_alpha_to_one() is not yet - * ready for this / can only handle 8 bit alpha. - */ - if (src == MESA_FORMAT_B10G10R10A2_UNORM) - return (dst == MESA_FORMAT_B10G10R10A2_UNORM || - dst == MESA_FORMAT_B10G10R10X2_UNORM); - - if (src == MESA_FORMAT_R10G10B10A2_UNORM) - return (dst == MESA_FORMAT_R10G10B10A2_UNORM || - dst == MESA_FORMAT_R10G10B10X2_UNORM); - - return false; -} - -static void -get_blit_intratile_offset_el(const struct brw_context *brw, - struct brw_mipmap_tree *mt, - uint32_t total_x_offset_el, - uint32_t total_y_offset_el, - uint64_t *tile_offset_B, - uint32_t *x_offset_el, - uint32_t *y_offset_el) -{ - ASSERTED uint32_t z_offset_el, array_offset; - isl_tiling_get_intratile_offset_el(mt->surf.tiling, mt->surf.dim, - mt->surf.msaa_layout, - mt->cpp * 8, mt->surf.samples, - mt->surf.row_pitch_B, - mt->surf.array_pitch_el_rows, - total_x_offset_el, total_y_offset_el, 0, 0, - tile_offset_B, - x_offset_el, y_offset_el, - &z_offset_el, &array_offset); - assert(z_offset_el == 0); - assert(array_offset == 0); - - if (mt->surf.tiling == ISL_TILING_LINEAR) { - /* From the Broadwell PRM docs for XY_SRC_COPY_BLT::SourceBaseAddress: - * - * "Base address of the destination surface: X=0, Y=0. Lower 32bits - * of the 48bit addressing. When Src Tiling is enabled (Bit_15 - * enabled), this address must be 4KB-aligned. When Tiling is not - * enabled, this address should be CL (64byte) aligned." - * - * The offsets we get from ISL in the tiled case are already aligned. - * In the linear case, we need to do some of our own aligning. - */ - uint32_t delta = *tile_offset_B & 63; - assert(delta % mt->cpp == 0); - *tile_offset_B -= delta; - *x_offset_el += delta / mt->cpp; - } else { - assert(*tile_offset_B % 4096 == 0); - } -} - -static bool -alignment_valid(struct brw_context *brw, unsigned offset, - enum isl_tiling tiling) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - /* Tiled buffers must be page-aligned (4K). */ - if (tiling != ISL_TILING_LINEAR) - return (offset & 4095) == 0; - - /* On Gfx8+, linear buffers must be cacheline-aligned. */ - if (devinfo->ver >= 8) - return (offset & 63) == 0; - - return true; -} - -static uint32_t -xy_blit_cmd(enum isl_tiling src_tiling, enum isl_tiling dst_tiling, - uint32_t cpp) -{ - uint32_t CMD = 0; - - assert(cpp <= 4); - switch (cpp) { - case 1: - case 2: - CMD = XY_SRC_COPY_BLT_CMD; - break; - case 4: - CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; - break; - default: - unreachable("not reached"); - } - - if (dst_tiling != ISL_TILING_LINEAR) - CMD |= XY_DST_TILED; - - if (src_tiling != ISL_TILING_LINEAR) - CMD |= XY_SRC_TILED; - - return CMD; -} - -/* Copy BitBlt - */ -static bool -emit_copy_blit(struct brw_context *brw, - GLuint cpp, - int32_t src_pitch, - struct brw_bo *src_buffer, - GLuint src_offset, - enum isl_tiling src_tiling, - int32_t dst_pitch, - struct brw_bo *dst_buffer, - GLuint dst_offset, - enum isl_tiling dst_tiling, - GLshort src_x, GLshort src_y, - GLshort dst_x, GLshort dst_y, - GLshort w, GLshort h, - enum gl_logicop_mode logic_op) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - GLuint CMD, BR13; - int dst_y2 = dst_y + h; - int dst_x2 = dst_x + w; - bool dst_y_tiled = dst_tiling == ISL_TILING_Y0; - bool src_y_tiled = src_tiling == ISL_TILING_Y0; - uint32_t src_tile_w, src_tile_h; - uint32_t dst_tile_w, dst_tile_h; - - if ((dst_y_tiled || src_y_tiled) && devinfo->ver < 6) - return false; - - const unsigned bo_sizes = dst_buffer->size + src_buffer->size; - - /* do space check before going any further */ - if (!brw_batch_has_aperture_space(brw, bo_sizes)) - brw_batch_flush(brw); - - if (!brw_batch_has_aperture_space(brw, bo_sizes)) - return false; - - unsigned length = devinfo->ver >= 8 ? 10 : 8; - - brw_batch_require_space(brw, length * 4); - DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", - __func__, - src_buffer, src_pitch, src_offset, src_x, src_y, - dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h); - - isl_get_tile_dims(src_tiling, cpp, &src_tile_w, &src_tile_h); - isl_get_tile_dims(dst_tiling, cpp, &dst_tile_w, &dst_tile_h); - - /* For Tiled surfaces, the pitch has to be a multiple of the Tile width - * (X direction width of the Tile). This is ensured while allocating the - * buffer object. - */ - assert(src_tiling == ISL_TILING_LINEAR || (src_pitch % src_tile_w) == 0); - assert(dst_tiling == ISL_TILING_LINEAR || (dst_pitch % dst_tile_w) == 0); - - /* For big formats (such as floating point), do the copy using 16 or - * 32bpp and multiply the coordinates. - */ - if (cpp > 4) { - if (cpp % 4 == 2) { - dst_x *= cpp / 2; - dst_x2 *= cpp / 2; - src_x *= cpp / 2; - cpp = 2; - } else { - assert(cpp % 4 == 0); - dst_x *= cpp / 4; - dst_x2 *= cpp / 4; - src_x *= cpp / 4; - cpp = 4; - } - } - - if (!alignment_valid(brw, dst_offset, dst_tiling)) - return false; - if (!alignment_valid(brw, src_offset, src_tiling)) - return false; - - /* Blit pitch must be dword-aligned. Otherwise, the hardware appears to drop - * the low bits. Offsets must be naturally aligned. - */ - if (src_pitch % 4 != 0 || src_offset % cpp != 0 || - dst_pitch % 4 != 0 || dst_offset % cpp != 0) - return false; - - assert(cpp <= 4); - BR13 = br13_for_cpp(cpp) | translate_raster_op(logic_op) << 16; - - CMD = xy_blit_cmd(src_tiling, dst_tiling, cpp); - - /* For tiled source and destination, pitch value should be specified - * as a number of Dwords. - */ - if (dst_tiling != ISL_TILING_LINEAR) - dst_pitch /= 4; - - if (src_tiling != ISL_TILING_LINEAR) - src_pitch /= 4; - - if (dst_y2 <= dst_y || dst_x2 <= dst_x) - return true; - - assert(dst_x < dst_x2); - assert(dst_y < dst_y2); - - BEGIN_BATCH_BLT_TILED(length, dst_y_tiled, src_y_tiled); - OUT_BATCH(CMD | (length - 2)); - OUT_BATCH(BR13 | (uint16_t)dst_pitch); - OUT_BATCH(SET_FIELD(dst_y, BLT_Y) | SET_FIELD(dst_x, BLT_X)); - OUT_BATCH(SET_FIELD(dst_y2, BLT_Y) | SET_FIELD(dst_x2, BLT_X)); - if (devinfo->ver >= 8) { - OUT_RELOC64(dst_buffer, RELOC_WRITE, dst_offset); - } else { - OUT_RELOC(dst_buffer, RELOC_WRITE, dst_offset); - } - OUT_BATCH(SET_FIELD(src_y, BLT_Y) | SET_FIELD(src_x, BLT_X)); - OUT_BATCH((uint16_t)src_pitch); - if (devinfo->ver >= 8) { - OUT_RELOC64(src_buffer, 0, src_offset); - } else { - OUT_RELOC(src_buffer, 0, src_offset); - } - - ADVANCE_BATCH_TILED(dst_y_tiled, src_y_tiled); - - brw_emit_mi_flush(brw); - - return true; -} - -static bool -emit_miptree_blit(struct brw_context *brw, - struct brw_mipmap_tree *src_mt, - uint32_t src_x, uint32_t src_y, - struct brw_mipmap_tree *dst_mt, - uint32_t dst_x, uint32_t dst_y, - uint32_t width, uint32_t height, - bool reverse, enum gl_logicop_mode logicop) -{ - /* According to the Ivy Bridge PRM, Vol1 Part4, section 1.2.1.2 (Graphics - * Data Size Limitations): - * - * The BLT engine is capable of transferring very large quantities of - * graphics data. Any graphics data read from and written to the - * destination is permitted to represent a number of pixels that - * occupies up to 65,536 scan lines and up to 32,768 bytes per scan line - * at the destination. The maximum number of pixels that may be - * represented per scan line’s worth of graphics data depends on the - * color depth. - * - * The blitter's pitch is a signed 16-bit integer, but measured in bytes - * for linear surfaces and DWords for tiled surfaces. So the maximum - * pitch is 32k linear and 128k tiled. - */ - if (brw_miptree_blt_pitch(src_mt) >= 32768 || - brw_miptree_blt_pitch(dst_mt) >= 32768) { - perf_debug("Falling back due to >= 32k/128k pitch\n"); - return false; - } - - /* We need to split the blit into chunks that each fit within the blitter's - * restrictions. We can't use a chunk size of 32768 because we need to - * ensure that src_tile_x + chunk_size fits. We choose 16384 because it's - * a nice round power of two, big enough that performance won't suffer, and - * small enough to guarantee everything fits. - */ - const uint32_t max_chunk_size = 16384; - - for (uint32_t chunk_x = 0; chunk_x < width; chunk_x += max_chunk_size) { - for (uint32_t chunk_y = 0; chunk_y < height; chunk_y += max_chunk_size) { - const uint32_t chunk_w = MIN2(max_chunk_size, width - chunk_x); - const uint32_t chunk_h = MIN2(max_chunk_size, height - chunk_y); - - uint64_t src_offset; - uint32_t src_tile_x, src_tile_y; - get_blit_intratile_offset_el(brw, src_mt, - src_x + chunk_x, src_y + chunk_y, - &src_offset, &src_tile_x, &src_tile_y); - - uint64_t dst_offset; - uint32_t dst_tile_x, dst_tile_y; - get_blit_intratile_offset_el(brw, dst_mt, - dst_x + chunk_x, dst_y + chunk_y, - &dst_offset, &dst_tile_x, &dst_tile_y); - - if (!emit_copy_blit(brw, - src_mt->cpp, - reverse ? -src_mt->surf.row_pitch_B : - src_mt->surf.row_pitch_B, - src_mt->bo, src_mt->offset + src_offset, - src_mt->surf.tiling, - dst_mt->surf.row_pitch_B, - dst_mt->bo, dst_mt->offset + dst_offset, - dst_mt->surf.tiling, - src_tile_x, src_tile_y, - dst_tile_x, dst_tile_y, - chunk_w, chunk_h, - logicop)) { - /* If this is ever going to fail, it will fail on the first chunk */ - assert(chunk_x == 0 && chunk_y == 0); - return false; - } - } - } - - return true; -} - -/** - * Implements a rectangular block transfer (blit) of pixels between two - * miptrees. - * - * Our blitter can operate on 1, 2, or 4-byte-per-pixel data, with generous, - * but limited, pitches and sizes allowed. - * - * The src/dst coordinates are relative to the given level/slice of the - * miptree. - * - * If @src_flip or @dst_flip is set, then the rectangle within that miptree - * will be inverted (including scanline order) when copying. This is common - * in GL when copying between window system and user-created - * renderbuffers/textures. - */ -bool -brw_miptree_blit(struct brw_context *brw, - struct brw_mipmap_tree *src_mt, - int src_level, int src_slice, - uint32_t src_x, uint32_t src_y, bool src_flip, - struct brw_mipmap_tree *dst_mt, - int dst_level, int dst_slice, - uint32_t dst_x, uint32_t dst_y, bool dst_flip, - uint32_t width, uint32_t height, - enum gl_logicop_mode logicop) -{ - /* The blitter doesn't understand multisampling at all. */ - if (src_mt->surf.samples > 1 || dst_mt->surf.samples > 1) - return false; - - /* No sRGB decode or encode is done by the hardware blitter, which is - * consistent with what we want in many callers (glCopyTexSubImage(), - * texture validation, etc.). - */ - mesa_format src_format = _mesa_get_srgb_format_linear(src_mt->format); - mesa_format dst_format = _mesa_get_srgb_format_linear(dst_mt->format); - - /* The blitter doesn't support doing any format conversions. We do also - * support blitting ARGB8888 to XRGB8888 (trivial, the values dropped into - * the X channel don't matter), and XRGB8888 to ARGB8888 by setting the A - * channel to 1.0 at the end. Also trivially ARGB2101010 to XRGB2101010, - * but not XRGB2101010 to ARGB2101010 yet. - */ - if (!brw_miptree_blit_compatible_formats(src_format, dst_format)) { - perf_debug("%s: Can't use hardware blitter from %s to %s, " - "falling back.\n", __func__, - _mesa_get_format_name(src_format), - _mesa_get_format_name(dst_format)); - return false; - } - - /* The blitter has no idea about HiZ or fast color clears, so we need to - * resolve the miptrees before we do anything. - */ - brw_miptree_access_raw(brw, src_mt, src_level, src_slice, false); - brw_miptree_access_raw(brw, dst_mt, dst_level, dst_slice, true); - - if (src_flip) { - const unsigned h0 = src_mt->surf.phys_level0_sa.height; - src_y = minify(h0, src_level - src_mt->first_level) - src_y - height; - } - - if (dst_flip) { - const unsigned h0 = dst_mt->surf.phys_level0_sa.height; - dst_y = minify(h0, dst_level - dst_mt->first_level) - dst_y - height; - } - - uint32_t src_image_x, src_image_y, dst_image_x, dst_image_y; - brw_miptree_get_image_offset(src_mt, src_level, src_slice, - &src_image_x, &src_image_y); - brw_miptree_get_image_offset(dst_mt, dst_level, dst_slice, - &dst_image_x, &dst_image_y); - src_x += src_image_x; - src_y += src_image_y; - dst_x += dst_image_x; - dst_y += dst_image_y; - - if (!emit_miptree_blit(brw, src_mt, src_x, src_y, - dst_mt, dst_x, dst_y, width, height, - src_flip != dst_flip, logicop)) { - return false; - } - - /* XXX This could be done in a single pass using XY_FULL_MONO_PATTERN_BLT */ - if (_mesa_get_format_bits(src_format, GL_ALPHA_BITS) == 0 && - _mesa_get_format_bits(dst_format, GL_ALPHA_BITS) > 0) { - brw_miptree_set_alpha_to_one(brw, dst_mt, dst_x, dst_y, width, height); - } - - return true; -} - -bool -brw_miptree_copy(struct brw_context *brw, - struct brw_mipmap_tree *src_mt, - int src_level, int src_slice, - uint32_t src_x, uint32_t src_y, - struct brw_mipmap_tree *dst_mt, - int dst_level, int dst_slice, - uint32_t dst_x, uint32_t dst_y, - uint32_t src_width, uint32_t src_height) -{ - /* The blitter doesn't understand multisampling at all. */ - if (src_mt->surf.samples > 1 || dst_mt->surf.samples > 1) - return false; - - if (src_mt->format == MESA_FORMAT_S_UINT8) - return false; - - /* The blitter has no idea about HiZ or fast color clears, so we need to - * resolve the miptrees before we do anything. - */ - brw_miptree_access_raw(brw, src_mt, src_level, src_slice, false); - brw_miptree_access_raw(brw, dst_mt, dst_level, dst_slice, true); - - uint32_t src_image_x, src_image_y; - brw_miptree_get_image_offset(src_mt, src_level, src_slice, - &src_image_x, &src_image_y); - - if (_mesa_is_format_compressed(src_mt->format)) { - GLuint bw, bh; - _mesa_get_format_block_size(src_mt->format, &bw, &bh); - - /* Compressed textures need not have dimensions that are a multiple of - * the block size. Rectangles in compressed textures do need to be a - * multiple of the block size. The one exception is that the right and - * bottom edges may be at the right or bottom edge of the miplevel even - * if it's not aligned. - */ - assert(src_x % bw == 0); - assert(src_y % bh == 0); - - assert(src_width % bw == 0 || - src_x + src_width == - minify(src_mt->surf.logical_level0_px.width, src_level)); - assert(src_height % bh == 0 || - src_y + src_height == - minify(src_mt->surf.logical_level0_px.height, src_level)); - - src_x /= (int)bw; - src_y /= (int)bh; - src_width = DIV_ROUND_UP(src_width, (int)bw); - src_height = DIV_ROUND_UP(src_height, (int)bh); - } - src_x += src_image_x; - src_y += src_image_y; - - uint32_t dst_image_x, dst_image_y; - brw_miptree_get_image_offset(dst_mt, dst_level, dst_slice, - &dst_image_x, &dst_image_y); - - if (_mesa_is_format_compressed(dst_mt->format)) { - GLuint bw, bh; - _mesa_get_format_block_size(dst_mt->format, &bw, &bh); - - assert(dst_x % bw == 0); - assert(dst_y % bh == 0); - - dst_x /= (int)bw; - dst_y /= (int)bh; - } - dst_x += dst_image_x; - dst_y += dst_image_y; - - return emit_miptree_blit(brw, src_mt, src_x, src_y, - dst_mt, dst_x, dst_y, - src_width, src_height, false, COLOR_LOGICOP_COPY); -} - -bool -brw_emit_immediate_color_expand_blit(struct brw_context *brw, - GLuint cpp, - GLubyte *src_bits, GLuint src_size, - GLuint fg_color, - GLshort dst_pitch, - struct brw_bo *dst_buffer, - GLuint dst_offset, - enum isl_tiling dst_tiling, - GLshort x, GLshort y, - GLshort w, GLshort h, - enum gl_logicop_mode logic_op) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - int dwords = ALIGN(src_size, 8) / 4; - uint32_t opcode, br13, blit_cmd; - - if (dst_tiling != ISL_TILING_LINEAR) { - if (dst_offset & 4095) - return false; - if (dst_tiling == ISL_TILING_Y0) - return false; - } - - assert((unsigned) logic_op <= 0x0f); - assert(dst_pitch > 0); - - if (w < 0 || h < 0) - return true; - - DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d, %d bytes %d dwords\n", - __func__, - dst_buffer, dst_pitch, dst_offset, x, y, w, h, src_size, dwords); - - unsigned xy_setup_blt_length = devinfo->ver >= 8 ? 10 : 8; - brw_batch_require_space(brw, (xy_setup_blt_length * 4) + - (3 * 4) + dwords * 4); - - opcode = XY_SETUP_BLT_CMD; - if (cpp == 4) - opcode |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; - if (dst_tiling != ISL_TILING_LINEAR) { - opcode |= XY_DST_TILED; - dst_pitch /= 4; - } - - br13 = dst_pitch | (translate_raster_op(logic_op) << 16) | (1 << 29); - br13 |= br13_for_cpp(cpp); - - blit_cmd = XY_TEXT_IMMEDIATE_BLIT_CMD | XY_TEXT_BYTE_PACKED; /* packing? */ - if (dst_tiling != ISL_TILING_LINEAR) - blit_cmd |= XY_DST_TILED; - - BEGIN_BATCH_BLT(xy_setup_blt_length + 3); - OUT_BATCH(opcode | (xy_setup_blt_length - 2)); - OUT_BATCH(br13); - OUT_BATCH((0 << 16) | 0); /* clip x1, y1 */ - OUT_BATCH((100 << 16) | 100); /* clip x2, y2 */ - if (devinfo->ver >= 8) { - OUT_RELOC64(dst_buffer, RELOC_WRITE, dst_offset); - } else { - OUT_RELOC(dst_buffer, RELOC_WRITE, dst_offset); - } - OUT_BATCH(0); /* bg */ - OUT_BATCH(fg_color); /* fg */ - OUT_BATCH(0); /* pattern base addr */ - if (devinfo->ver >= 8) - OUT_BATCH(0); - - OUT_BATCH(blit_cmd | ((3 - 2) + dwords)); - OUT_BATCH(SET_FIELD(y, BLT_Y) | SET_FIELD(x, BLT_X)); - OUT_BATCH(SET_FIELD(y + h, BLT_Y) | SET_FIELD(x + w, BLT_X)); - ADVANCE_BATCH(); - - brw_batch_data(brw, src_bits, dwords * 4); - - brw_emit_mi_flush(brw); - - return true; -} - -/** - * Used to initialize the alpha value of an ARGB8888 miptree after copying - * into it from an XRGB8888 source. - * - * This is very common with glCopyTexImage2D(). Note that the coordinates are - * relative to the start of the miptree, not relative to a slice within the - * miptree. - */ -static void -brw_miptree_set_alpha_to_one(struct brw_context *brw, - struct brw_mipmap_tree *mt, - int x, int y, int width, int height) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - uint32_t BR13, CMD; - int pitch, cpp; - - pitch = mt->surf.row_pitch_B; - cpp = mt->cpp; - - DBG("%s dst:buf(%p)/%d %d,%d sz:%dx%d\n", - __func__, mt->bo, pitch, x, y, width, height); - - /* Note: Currently only handles 8 bit alpha channel. Extension to < 8 Bit - * alpha channel would be likely possible via ROP code 0xfa instead of 0xf0 - * and writing a suitable bit-mask instead of 0xffffffff. - */ - BR13 = br13_for_cpp(cpp) | 0xf0 << 16; - CMD = XY_COLOR_BLT_CMD; - CMD |= XY_BLT_WRITE_ALPHA; - - if (mt->surf.tiling != ISL_TILING_LINEAR) { - CMD |= XY_DST_TILED; - pitch /= 4; - } - BR13 |= pitch; - - /* do space check before going any further */ - if (!brw_batch_has_aperture_space(brw, mt->bo->size)) - brw_batch_flush(brw); - - unsigned length = devinfo->ver >= 8 ? 7 : 6; - const bool dst_y_tiled = mt->surf.tiling == ISL_TILING_Y0; - - /* We need to split the blit into chunks that each fit within the blitter's - * restrictions. We can't use a chunk size of 32768 because we need to - * ensure that src_tile_x + chunk_size fits. We choose 16384 because it's - * a nice round power of two, big enough that performance won't suffer, and - * small enough to guarantee everything fits. - */ - const uint32_t max_chunk_size = 16384; - - for (uint32_t chunk_x = 0; chunk_x < width; chunk_x += max_chunk_size) { - for (uint32_t chunk_y = 0; chunk_y < height; chunk_y += max_chunk_size) { - const uint32_t chunk_w = MIN2(max_chunk_size, width - chunk_x); - const uint32_t chunk_h = MIN2(max_chunk_size, height - chunk_y); - - uint64_t offset_B; - uint32_t tile_x, tile_y; - get_blit_intratile_offset_el(brw, mt, - x + chunk_x, y + chunk_y, - &offset_B, &tile_x, &tile_y); - - BEGIN_BATCH_BLT_TILED(length, dst_y_tiled, false); - OUT_BATCH(CMD | (length - 2)); - OUT_BATCH(BR13); - OUT_BATCH(SET_FIELD(y + chunk_y, BLT_Y) | - SET_FIELD(x + chunk_x, BLT_X)); - OUT_BATCH(SET_FIELD(y + chunk_y + chunk_h, BLT_Y) | - SET_FIELD(x + chunk_x + chunk_w, BLT_X)); - if (devinfo->ver >= 8) { - OUT_RELOC64(mt->bo, RELOC_WRITE, mt->offset + offset_B); - } else { - OUT_RELOC(mt->bo, RELOC_WRITE, mt->offset + offset_B); - } - OUT_BATCH(0xffffffff); /* white, but only alpha gets written */ - ADVANCE_BATCH_TILED(dst_y_tiled, false); - } - } - - brw_emit_mi_flush(brw); -} diff --git a/src/mesa/drivers/dri/i965/brw_blit.h b/src/mesa/drivers/dri/i965/brw_blit.h deleted file mode 100644 index ab71420..0000000 --- a/src/mesa/drivers/dri/i965/brw_blit.h +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright 2003 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef BRW_BLIT_H -#define BRW_BLIT_H - -#include "brw_context.h" - -bool brw_miptree_blit_compatible_formats(mesa_format src, mesa_format dst); - -bool brw_miptree_blit(struct brw_context *brw, - struct brw_mipmap_tree *src_mt, - int src_level, int src_slice, - uint32_t src_x, uint32_t src_y, bool src_flip, - struct brw_mipmap_tree *dst_mt, - int dst_level, int dst_slice, - uint32_t dst_x, uint32_t dst_y, bool dst_flip, - uint32_t width, uint32_t height, - enum gl_logicop_mode logicop); - -bool brw_miptree_copy(struct brw_context *brw, - struct brw_mipmap_tree *src_mt, - int src_level, int src_slice, - uint32_t src_x, uint32_t src_y, - struct brw_mipmap_tree *dst_mt, - int dst_level, int dst_slice, - uint32_t dst_x, uint32_t dst_y, - uint32_t src_width, uint32_t src_height); - -bool -brw_emit_immediate_color_expand_blit(struct brw_context *brw, - GLuint cpp, - GLubyte *src_bits, GLuint src_size, - GLuint fg_color, - GLshort dst_pitch, - struct brw_bo *dst_buffer, - GLuint dst_offset, - enum isl_tiling dst_tiling, - GLshort x, GLshort y, - GLshort w, GLshort h, - enum gl_logicop_mode logic_op); - -#endif diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c b/src/mesa/drivers/dri/i965/brw_blorp.c deleted file mode 100644 index 1b8ec47..0000000 --- a/src/mesa/drivers/dri/i965/brw_blorp.c +++ /dev/null @@ -1,1678 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "main/context.h" -#include "main/teximage.h" -#include "main/blend.h" -#include "main/bufferobj.h" -#include "main/enums.h" -#include "main/fbobject.h" -#include "main/image.h" -#include "main/renderbuffer.h" -#include "main/glformats.h" - -#include "brw_blorp.h" -#include "brw_context.h" -#include "brw_defines.h" -#include "brw_meta_util.h" -#include "brw_state.h" -#include "brw_buffer_objects.h" -#include "brw_fbo.h" -#include "dev/intel_debug.h" - -#define FILE_DEBUG_FLAG DEBUG_BLORP - -static bool -brw_blorp_lookup_shader(struct blorp_batch *batch, - const void *key, uint32_t key_size, - uint32_t *kernel_out, void *prog_data_out) -{ - struct brw_context *brw = batch->driver_batch; - return brw_search_cache(&brw->cache, BRW_CACHE_BLORP_PROG, key, key_size, - kernel_out, prog_data_out, true); -} - -static bool -brw_blorp_upload_shader(struct blorp_batch *batch, uint32_t stage, - const void *key, uint32_t key_size, - const void *kernel, uint32_t kernel_size, - const struct brw_stage_prog_data *prog_data, - uint32_t prog_data_size, - uint32_t *kernel_out, void *prog_data_out) -{ - struct brw_context *brw = batch->driver_batch; - brw_upload_cache(&brw->cache, BRW_CACHE_BLORP_PROG, key, key_size, - kernel, kernel_size, prog_data, prog_data_size, - kernel_out, prog_data_out); - return true; -} - -void -brw_blorp_init(struct brw_context *brw) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - blorp_init(&brw->blorp, brw, &brw->isl_dev, NULL); - - brw->blorp.compiler = brw->screen->compiler; - - switch (devinfo->ver) { - case 4: - if (devinfo->verx10 == 45) { - brw->blorp.exec = gfx45_blorp_exec; - } else { - brw->blorp.exec = gfx4_blorp_exec; - } - break; - case 5: - brw->blorp.exec = gfx5_blorp_exec; - break; - case 6: - brw->blorp.exec = gfx6_blorp_exec; - break; - case 7: - if (devinfo->verx10 == 75) { - brw->blorp.exec = gfx75_blorp_exec; - } else { - brw->blorp.exec = gfx7_blorp_exec; - } - break; - case 8: - brw->blorp.exec = gfx8_blorp_exec; - break; - case 9: - brw->blorp.exec = gfx9_blorp_exec; - break; - case 11: - brw->blorp.exec = gfx11_blorp_exec; - break; - - default: - unreachable("Invalid gen"); - } - - brw->blorp.lookup_shader = brw_blorp_lookup_shader; - brw->blorp.upload_shader = brw_blorp_upload_shader; -} - -static void -blorp_surf_for_miptree(struct brw_context *brw, - struct blorp_surf *surf, - const struct brw_mipmap_tree *mt, - enum isl_aux_usage aux_usage, - bool is_render_target, - unsigned *level, - unsigned start_layer, unsigned num_layers) -{ - if (mt->surf.msaa_layout == ISL_MSAA_LAYOUT_ARRAY) { - const unsigned num_samples = mt->surf.samples; - for (unsigned i = 0; i < num_layers; i++) { - for (unsigned s = 0; s < num_samples; s++) { - const unsigned phys_layer = (start_layer + i) * num_samples + s; - brw_miptree_check_level_layer(mt, *level, phys_layer); - } - } - } else { - for (unsigned i = 0; i < num_layers; i++) - brw_miptree_check_level_layer(mt, *level, start_layer + i); - } - - *surf = (struct blorp_surf) { - .surf = &mt->surf, - .addr = (struct blorp_address) { - .buffer = mt->bo, - .offset = mt->offset, - .reloc_flags = is_render_target ? EXEC_OBJECT_WRITE : 0, - .mocs = brw_mocs(&brw->isl_dev, mt->bo), - }, - .aux_usage = aux_usage, - .tile_x_sa = mt->level[*level].level_x, - .tile_y_sa = mt->level[*level].level_y, - }; - - if (surf->aux_usage == ISL_AUX_USAGE_HIZ && - !brw_miptree_level_has_hiz(mt, *level)) - surf->aux_usage = ISL_AUX_USAGE_NONE; - - if (surf->aux_usage != ISL_AUX_USAGE_NONE) { - /* We only really need a clear color if we also have an auxiliary - * surface. Without one, it does nothing. - */ - surf->clear_color = - brw_miptree_get_clear_color(mt, (struct brw_bo **) - &surf->clear_color_addr.buffer, - &surf->clear_color_addr.offset); - - surf->aux_surf = &mt->aux_buf->surf; - surf->aux_addr = (struct blorp_address) { - .reloc_flags = is_render_target ? EXEC_OBJECT_WRITE : 0, - .mocs = surf->addr.mocs, - }; - - surf->aux_addr.buffer = mt->aux_buf->bo; - surf->aux_addr.offset = mt->aux_buf->offset; - } else { - surf->aux_addr = (struct blorp_address) { - .buffer = NULL, - }; - memset(&surf->clear_color, 0, sizeof(surf->clear_color)); - } - assert((surf->aux_usage == ISL_AUX_USAGE_NONE) == - (surf->aux_addr.buffer == NULL)); - - if (!is_render_target && brw->screen->devinfo.ver == 9) - gfx9_apply_single_tex_astc5x5_wa(brw, mt->format, surf->aux_usage); - - /* ISL wants real levels, not offset ones. */ - *level -= mt->first_level; -} - -static bool -brw_blorp_supports_dst_format(struct brw_context *brw, mesa_format format) -{ - /* If it's renderable, it's definitely supported. */ - if (brw->mesa_format_supports_render[format]) - return true; - - /* BLORP can't compress anything */ - if (_mesa_is_format_compressed(format)) - return false; - - /* No exotic formats such as GL_LUMINANCE_ALPHA */ - if (_mesa_get_format_bits(format, GL_RED_BITS) == 0 && - _mesa_get_format_bits(format, GL_DEPTH_BITS) == 0 && - _mesa_get_format_bits(format, GL_STENCIL_BITS) == 0) - return false; - - return true; -} - -static enum isl_format -brw_blorp_to_isl_format(struct brw_context *brw, mesa_format format, - bool is_render_target) -{ - switch (format) { - case MESA_FORMAT_NONE: - return ISL_FORMAT_UNSUPPORTED; - case MESA_FORMAT_S_UINT8: - return ISL_FORMAT_R8_UINT; - case MESA_FORMAT_Z24_UNORM_X8_UINT: - case MESA_FORMAT_Z24_UNORM_S8_UINT: - return ISL_FORMAT_R24_UNORM_X8_TYPELESS; - case MESA_FORMAT_Z_FLOAT32: - case MESA_FORMAT_Z32_FLOAT_S8X24_UINT: - return ISL_FORMAT_R32_FLOAT; - case MESA_FORMAT_Z_UNORM16: - return ISL_FORMAT_R16_UNORM; - default: - if (is_render_target) { - assert(brw_blorp_supports_dst_format(brw, format)); - if (brw->mesa_format_supports_render[format]) { - return brw->mesa_to_isl_render_format[format]; - } else { - return brw_isl_format_for_mesa_format(format); - } - } else { - /* Some destinations (is_render_target == true) are supported by - * blorp even though we technically can't render to them. - */ - return brw_isl_format_for_mesa_format(format); - } - } -} - -/** - * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gfx7.5+ - * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED). The mappings are - * - * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE - * 0 1 2 3 4 5 - * 4 5 6 7 0 1 - * SCS_RED, SCS_GREEN, SCS_BLUE, SCS_ALPHA, SCS_ZERO, SCS_ONE - * - * which is simply adding 4 then modding by 8 (or anding with 7). - * - * We then may need to apply workarounds for textureGather hardware bugs. - */ -static enum isl_channel_select -swizzle_to_scs(GLenum swizzle) -{ - return (enum isl_channel_select)((swizzle + 4) & 7); -} - -/** - * Note: if the src (or dst) is a 2D multisample array texture on Gfx7+ using - * INTEL_MSAA_LAYOUT_UMS or INTEL_MSAA_LAYOUT_CMS, src_layer (dst_layer) is - * the physical layer holding sample 0. So, for example, if - * src_mt->surf.samples == 4, then logical layer n corresponds to src_layer == - * 4*n. - */ -void -brw_blorp_blit_miptrees(struct brw_context *brw, - struct brw_mipmap_tree *src_mt, - unsigned src_level, unsigned src_layer, - mesa_format src_format, int src_swizzle, - struct brw_mipmap_tree *dst_mt, - unsigned dst_level, unsigned dst_layer, - mesa_format dst_format, - float src_x0, float src_y0, - float src_x1, float src_y1, - float dst_x0, float dst_y0, - float dst_x1, float dst_y1, - GLenum gl_filter, bool mirror_x, bool mirror_y, - bool decode_srgb, bool encode_srgb) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - DBG("%s from %dx %s mt %p %d %d (%f,%f) (%f,%f) " - "to %dx %s mt %p %d %d (%f,%f) (%f,%f) (flip %d,%d)\n", - __func__, - src_mt->surf.samples, _mesa_get_format_name(src_mt->format), src_mt, - src_level, src_layer, src_x0, src_y0, src_x1, src_y1, - dst_mt->surf.samples, _mesa_get_format_name(dst_mt->format), dst_mt, - dst_level, dst_layer, dst_x0, dst_y0, dst_x1, dst_y1, - mirror_x, mirror_y); - - if (src_format == MESA_FORMAT_NONE) - src_format = src_mt->format; - - if (dst_format == MESA_FORMAT_NONE) - dst_format = dst_mt->format; - - if (!decode_srgb) - src_format = _mesa_get_srgb_format_linear(src_format); - - if (!encode_srgb) - dst_format = _mesa_get_srgb_format_linear(dst_format); - - /* When doing a multisample resolve of a GL_LUMINANCE32F or GL_INTENSITY32F - * texture, the above code configures the source format for L32_FLOAT or - * I32_FLOAT, and the destination format for R32_FLOAT. On Sandy Bridge, - * the SAMPLE message appears to handle multisampled L32_FLOAT and - * I32_FLOAT textures incorrectly, resulting in blocky artifacts. So work - * around the problem by using a source format of R32_FLOAT. This - * shouldn't affect rendering correctness, since the destination format is - * R32_FLOAT, so only the contents of the red channel matters. - */ - if (devinfo->ver == 6 && - src_mt->surf.samples > 1 && dst_mt->surf.samples <= 1 && - src_mt->format == dst_mt->format && - (dst_format == MESA_FORMAT_L_FLOAT32 || - dst_format == MESA_FORMAT_I_FLOAT32)) { - src_format = dst_format = MESA_FORMAT_R_FLOAT32; - } - - enum blorp_filter blorp_filter; - if (fabsf(dst_x1 - dst_x0) == fabsf(src_x1 - src_x0) && - fabsf(dst_y1 - dst_y0) == fabsf(src_y1 - src_y0)) { - if (src_mt->surf.samples > 1 && dst_mt->surf.samples <= 1) { - /* From the OpenGL ES 3.2 specification, section 16.2.1: - * - * "If the read framebuffer is multisampled (its effective value - * of SAMPLE_BUFFERS is one) and the draw framebuffer is not (its - * value of SAMPLE_BUFFERS is zero), the samples corresponding to - * each pixel location in the source are converted to a single - * sample before being written to the destination. The filter - * parameter is ignored. If the source formats are integer types - * or stencil values, a single sample’s value is selected for each - * pixel. If the source formats are floating-point or normalized - * types, the sample values for each pixel are resolved in an - * implementation-dependent manner. If the source formats are - * depth values, sample values are resolved in an implementation- - * dependent manner where the result will be between the minimum - * and maximum depth values in the pixel." - * - * For depth and stencil resolves, we choose to always use the value - * at sample 0. - */ - GLenum base_format = _mesa_get_format_base_format(src_mt->format); - if (base_format == GL_DEPTH_COMPONENT || - base_format == GL_STENCIL_INDEX || - base_format == GL_DEPTH_STENCIL || - _mesa_is_format_integer(src_mt->format)) { - /* The OpenGL ES 3.2 spec says: - * - * "If the source formats are integer types or stencil values, - * a single sample's value is selected for each pixel." - * - * Just take sample 0 in this case. - */ - blorp_filter = BLORP_FILTER_SAMPLE_0; - } else { - blorp_filter = BLORP_FILTER_AVERAGE; - } - } else { - /* From the OpenGL 4.6 specification, section 18.3.1: - * - * "If the source and destination dimensions are identical, no - * filtering is applied." - * - * Using BLORP_FILTER_NONE will also handle the upsample case by - * replicating the one value in the source to all values in the - * destination. - */ - blorp_filter = BLORP_FILTER_NONE; - } - } else if (gl_filter == GL_LINEAR || - gl_filter == GL_SCALED_RESOLVE_FASTEST_EXT || - gl_filter == GL_SCALED_RESOLVE_NICEST_EXT) { - blorp_filter = BLORP_FILTER_BILINEAR; - } else { - blorp_filter = BLORP_FILTER_NEAREST; - } - - enum isl_format src_isl_format = - brw_blorp_to_isl_format(brw, src_format, false); - enum isl_aux_usage src_aux_usage = - brw_miptree_texture_aux_usage(brw, src_mt, src_isl_format, - 0 /* The astc5x5 WA isn't needed */); - /* We do format workarounds for some depth formats so we can't reliably - * sample with HiZ. One of these days, we should fix that. - */ - if (src_aux_usage == ISL_AUX_USAGE_HIZ && src_mt->format != src_format) - src_aux_usage = ISL_AUX_USAGE_NONE; - const bool src_clear_supported = - src_aux_usage != ISL_AUX_USAGE_NONE && src_mt->format == src_format; - brw_miptree_prepare_access(brw, src_mt, src_level, 1, src_layer, 1, - src_aux_usage, src_clear_supported); - - enum isl_format dst_isl_format = - brw_blorp_to_isl_format(brw, dst_format, true); - enum isl_aux_usage dst_aux_usage = - brw_miptree_render_aux_usage(brw, dst_mt, dst_isl_format, false, false); - const bool dst_clear_supported = dst_aux_usage != ISL_AUX_USAGE_NONE; - brw_miptree_prepare_access(brw, dst_mt, dst_level, 1, dst_layer, 1, - dst_aux_usage, dst_clear_supported); - - struct blorp_surf src_surf, dst_surf; - blorp_surf_for_miptree(brw, &src_surf, src_mt, src_aux_usage, false, - &src_level, src_layer, 1); - blorp_surf_for_miptree(brw, &dst_surf, dst_mt, dst_aux_usage, true, - &dst_level, dst_layer, 1); - - struct isl_swizzle src_isl_swizzle = { - .r = swizzle_to_scs(GET_SWZ(src_swizzle, 0)), - .g = swizzle_to_scs(GET_SWZ(src_swizzle, 1)), - .b = swizzle_to_scs(GET_SWZ(src_swizzle, 2)), - .a = swizzle_to_scs(GET_SWZ(src_swizzle, 3)), - }; - - struct blorp_batch batch; - blorp_batch_init(&brw->blorp, &batch, brw, 0); - blorp_blit(&batch, &src_surf, src_level, src_layer, - src_isl_format, src_isl_swizzle, - &dst_surf, dst_level, dst_layer, - dst_isl_format, ISL_SWIZZLE_IDENTITY, - src_x0, src_y0, src_x1, src_y1, - dst_x0, dst_y0, dst_x1, dst_y1, - blorp_filter, mirror_x, mirror_y); - blorp_batch_finish(&batch); - - brw_miptree_finish_write(brw, dst_mt, dst_level, dst_layer, 1, - dst_aux_usage); -} - -void -brw_blorp_copy_miptrees(struct brw_context *brw, - struct brw_mipmap_tree *src_mt, - unsigned src_level, unsigned src_layer, - struct brw_mipmap_tree *dst_mt, - unsigned dst_level, unsigned dst_layer, - unsigned src_x, unsigned src_y, - unsigned dst_x, unsigned dst_y, - unsigned src_width, unsigned src_height) -{ - DBG("%s from %dx %s mt %p %d %d (%d,%d) %dx%d" - "to %dx %s mt %p %d %d (%d,%d)\n", - __func__, - src_mt->surf.samples, _mesa_get_format_name(src_mt->format), src_mt, - src_level, src_layer, src_x, src_y, src_width, src_height, - dst_mt->surf.samples, _mesa_get_format_name(dst_mt->format), dst_mt, - dst_level, dst_layer, dst_x, dst_y); - - enum isl_aux_usage src_aux_usage, dst_aux_usage; - bool src_clear_supported, dst_clear_supported; - - switch (src_mt->aux_usage) { - case ISL_AUX_USAGE_HIZ: - if (brw_miptree_sample_with_hiz(brw, src_mt)) { - src_aux_usage = src_mt->aux_usage; - src_clear_supported = true; - } else { - src_aux_usage = ISL_AUX_USAGE_NONE; - src_clear_supported = false; - } - break; - case ISL_AUX_USAGE_MCS: - case ISL_AUX_USAGE_CCS_E: - src_aux_usage = src_mt->aux_usage; - src_clear_supported = false; - break; - default: - src_aux_usage = ISL_AUX_USAGE_NONE; - src_clear_supported = false; - break; - } - - switch (dst_mt->aux_usage) { - case ISL_AUX_USAGE_MCS: - case ISL_AUX_USAGE_CCS_E: - dst_aux_usage = dst_mt->aux_usage; - dst_clear_supported = false; - break; - default: - dst_aux_usage = ISL_AUX_USAGE_NONE; - dst_clear_supported = false; - break; - } - - brw_miptree_prepare_access(brw, src_mt, src_level, 1, src_layer, 1, - src_aux_usage, src_clear_supported); - brw_miptree_prepare_access(brw, dst_mt, dst_level, 1, dst_layer, 1, - dst_aux_usage, dst_clear_supported); - - struct blorp_surf src_surf, dst_surf; - blorp_surf_for_miptree(brw, &src_surf, src_mt, src_aux_usage, false, - &src_level, src_layer, 1); - blorp_surf_for_miptree(brw, &dst_surf, dst_mt, dst_aux_usage, true, - &dst_level, dst_layer, 1); - - /* The hardware seems to have issues with having a two different format - * views of the same texture in the sampler cache at the same time. It's - * unclear exactly what the issue is but it hurts glCopyImageSubData - * particularly badly because it does a lot of format reinterprets. We - * badly need better understanding of the issue and a better fix but this - * works for now and fixes CTS tests. - * - * TODO: Remove this hack! - */ - brw_emit_pipe_control_flush(brw, PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE); - - struct blorp_batch batch; - blorp_batch_init(&brw->blorp, &batch, brw, 0); - blorp_copy(&batch, &src_surf, src_level, src_layer, - &dst_surf, dst_level, dst_layer, - src_x, src_y, dst_x, dst_y, src_width, src_height); - blorp_batch_finish(&batch); - - brw_emit_pipe_control_flush(brw, PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE); - - brw_miptree_finish_write(brw, dst_mt, dst_level, dst_layer, 1, - dst_aux_usage); -} - -void -brw_blorp_copy_buffers(struct brw_context *brw, - struct brw_bo *src_bo, - unsigned src_offset, - struct brw_bo *dst_bo, - unsigned dst_offset, - unsigned size) -{ - DBG("%s %d bytes from %p[%d] to %p[%d]", - __func__, size, src_bo, src_offset, dst_bo, dst_offset); - - struct blorp_batch batch; - struct blorp_address src = { - .buffer = src_bo, .offset = src_offset, - .mocs = brw_mocs(&brw->isl_dev, src_bo), - }; - struct blorp_address dst = { - .buffer = dst_bo, .offset = dst_offset, - .mocs = brw_mocs(&brw->isl_dev, dst_bo), - }; - - blorp_batch_init(&brw->blorp, &batch, brw, 0); - blorp_buffer_copy(&batch, src, dst, size); - blorp_batch_finish(&batch); -} - - -static struct brw_mipmap_tree * -find_miptree(GLbitfield buffer_bit, struct brw_renderbuffer *irb) -{ - struct brw_mipmap_tree *mt = irb->mt; - if (buffer_bit == GL_STENCIL_BUFFER_BIT && mt->stencil_mt) - mt = mt->stencil_mt; - return mt; -} - -static int -blorp_get_texture_swizzle(const struct brw_renderbuffer *irb) -{ - return irb->Base.Base._BaseFormat == GL_RGB ? - MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE) : - SWIZZLE_XYZW; -} - -static void -do_blorp_blit(struct brw_context *brw, GLbitfield buffer_bit, - struct brw_renderbuffer *src_irb, mesa_format src_format, - struct brw_renderbuffer *dst_irb, mesa_format dst_format, - GLfloat srcX0, GLfloat srcY0, GLfloat srcX1, GLfloat srcY1, - GLfloat dstX0, GLfloat dstY0, GLfloat dstX1, GLfloat dstY1, - GLenum filter, bool mirror_x, bool mirror_y) -{ - const struct gl_context *ctx = &brw->ctx; - - /* Find source/dst miptrees */ - struct brw_mipmap_tree *src_mt = find_miptree(buffer_bit, src_irb); - struct brw_mipmap_tree *dst_mt = find_miptree(buffer_bit, dst_irb); - - const bool do_srgb = ctx->Color.sRGBEnabled; - - /* Do the blit */ - brw_blorp_blit_miptrees(brw, - src_mt, src_irb->mt_level, src_irb->mt_layer, - src_format, blorp_get_texture_swizzle(src_irb), - dst_mt, dst_irb->mt_level, dst_irb->mt_layer, - dst_format, - srcX0, srcY0, srcX1, srcY1, - dstX0, dstY0, dstX1, dstY1, - filter, mirror_x, mirror_y, - do_srgb, do_srgb); - - dst_irb->need_downsample = true; -} - -static bool -try_blorp_blit(struct brw_context *brw, - const struct gl_framebuffer *read_fb, - const struct gl_framebuffer *draw_fb, - GLfloat srcX0, GLfloat srcY0, GLfloat srcX1, GLfloat srcY1, - GLfloat dstX0, GLfloat dstY0, GLfloat dstX1, GLfloat dstY1, - GLenum filter, GLbitfield buffer_bit) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - struct gl_context *ctx = &brw->ctx; - - /* Sync up the state of window system buffers. We need to do this before - * we go looking for the buffers. - */ - brw_prepare_render(brw); - - bool mirror_x, mirror_y; - if (brw_meta_mirror_clip_and_scissor(ctx, read_fb, draw_fb, - &srcX0, &srcY0, &srcX1, &srcY1, - &dstX0, &dstY0, &dstX1, &dstY1, - &mirror_x, &mirror_y)) - return true; - - /* Find buffers */ - struct brw_renderbuffer *src_irb; - struct brw_renderbuffer *dst_irb; - struct brw_mipmap_tree *src_mt; - struct brw_mipmap_tree *dst_mt; - switch (buffer_bit) { - case GL_COLOR_BUFFER_BIT: - src_irb = brw_renderbuffer(read_fb->_ColorReadBuffer); - for (unsigned i = 0; i < draw_fb->_NumColorDrawBuffers; ++i) { - dst_irb = brw_renderbuffer(draw_fb->_ColorDrawBuffers[i]); - if (dst_irb) - do_blorp_blit(brw, buffer_bit, - src_irb, src_irb->Base.Base.Format, - dst_irb, dst_irb->Base.Base.Format, - srcX0, srcY0, srcX1, srcY1, - dstX0, dstY0, dstX1, dstY1, - filter, mirror_x, mirror_y); - } - break; - case GL_DEPTH_BUFFER_BIT: - src_irb = - brw_renderbuffer(read_fb->Attachment[BUFFER_DEPTH].Renderbuffer); - dst_irb = - brw_renderbuffer(draw_fb->Attachment[BUFFER_DEPTH].Renderbuffer); - src_mt = find_miptree(buffer_bit, src_irb); - dst_mt = find_miptree(buffer_bit, dst_irb); - - /* We also can't handle any combined depth-stencil formats because we - * have to reinterpret as a color format. - */ - if (_mesa_get_format_base_format(src_mt->format) == GL_DEPTH_STENCIL || - _mesa_get_format_base_format(dst_mt->format) == GL_DEPTH_STENCIL) - return false; - - do_blorp_blit(brw, buffer_bit, src_irb, MESA_FORMAT_NONE, - dst_irb, MESA_FORMAT_NONE, srcX0, srcY0, - srcX1, srcY1, dstX0, dstY0, dstX1, dstY1, - filter, mirror_x, mirror_y); - break; - case GL_STENCIL_BUFFER_BIT: - /* Blorp doesn't support combined depth stencil which is all we have - * prior to gfx6. - */ - if (devinfo->ver < 6) - return false; - - src_irb = - brw_renderbuffer(read_fb->Attachment[BUFFER_STENCIL].Renderbuffer); - dst_irb = - brw_renderbuffer(draw_fb->Attachment[BUFFER_STENCIL].Renderbuffer); - do_blorp_blit(brw, buffer_bit, src_irb, MESA_FORMAT_NONE, - dst_irb, MESA_FORMAT_NONE, srcX0, srcY0, - srcX1, srcY1, dstX0, dstY0, dstX1, dstY1, - filter, mirror_x, mirror_y); - break; - default: - unreachable("not reached"); - } - - return true; -} - -static void -apply_y_flip(int *y0, int *y1, int height) -{ - int tmp = height - *y0; - *y0 = height - *y1; - *y1 = tmp; -} - -bool -brw_blorp_copytexsubimage(struct brw_context *brw, - struct gl_renderbuffer *src_rb, - struct gl_texture_image *dst_image, - int slice, - int srcX0, int srcY0, - int dstX0, int dstY0, - int width, int height) -{ - struct gl_context *ctx = &brw->ctx; - struct brw_renderbuffer *src_irb = brw_renderbuffer(src_rb); - struct brw_texture_image *intel_image = brw_texture_image(dst_image); - - /* No pixel transfer operations (zoom, bias, mapping), just a blit */ - if (brw->ctx._ImageTransferState) - return false; - - /* Sync up the state of window system buffers. We need to do this before - * we go looking at the src renderbuffer's miptree. - */ - brw_prepare_render(brw); - - struct brw_mipmap_tree *src_mt = src_irb->mt; - struct brw_mipmap_tree *dst_mt = intel_image->mt; - - /* We can't handle any combined depth-stencil formats because we have to - * reinterpret as a color format. - */ - if (_mesa_get_format_base_format(src_mt->format) == GL_DEPTH_STENCIL || - _mesa_get_format_base_format(dst_mt->format) == GL_DEPTH_STENCIL) - return false; - - if (!brw_blorp_supports_dst_format(brw, dst_image->TexFormat)) - return false; - - /* Source clipping shouldn't be necessary, since copytexsubimage (in - * src/mesa/main/teximage.c) calls _mesa_clip_copytexsubimage() which - * takes care of it. - * - * Destination clipping shouldn't be necessary since the restrictions on - * glCopyTexSubImage prevent the user from specifying a destination rectangle - * that falls outside the bounds of the destination texture. - * See error_check_subtexture_dimensions(). - */ - - int srcY1 = srcY0 + height; - int srcX1 = srcX0 + width; - int dstX1 = dstX0 + width; - int dstY1 = dstY0 + height; - - /* Account for the fact that in the system framebuffer, the origin is at - * the lower left. - */ - bool mirror_y = ctx->ReadBuffer->FlipY; - if (mirror_y) - apply_y_flip(&srcY0, &srcY1, src_rb->Height); - - /* Account for face selection and texture view MinLayer */ - int dst_slice = slice + dst_image->TexObject->Attrib.MinLayer + dst_image->Face; - int dst_level = dst_image->Level + dst_image->TexObject->Attrib.MinLevel; - - brw_blorp_blit_miptrees(brw, - src_mt, src_irb->mt_level, src_irb->mt_layer, - src_rb->Format, blorp_get_texture_swizzle(src_irb), - dst_mt, dst_level, dst_slice, - dst_image->TexFormat, - srcX0, srcY0, srcX1, srcY1, - dstX0, dstY0, dstX1, dstY1, - GL_NEAREST, false, mirror_y, - false, false); - - /* If we're copying to a packed depth stencil texture and the source - * framebuffer has separate stencil, we need to also copy the stencil data - * over. - */ - src_rb = ctx->ReadBuffer->Attachment[BUFFER_STENCIL].Renderbuffer; - if (_mesa_get_format_bits(dst_image->TexFormat, GL_STENCIL_BITS) > 0 && - src_rb != NULL) { - src_irb = brw_renderbuffer(src_rb); - src_mt = src_irb->mt; - - if (src_mt->stencil_mt) - src_mt = src_mt->stencil_mt; - if (dst_mt->stencil_mt) - dst_mt = dst_mt->stencil_mt; - - if (src_mt != dst_mt) { - brw_blorp_blit_miptrees(brw, - src_mt, src_irb->mt_level, src_irb->mt_layer, - src_mt->format, - blorp_get_texture_swizzle(src_irb), - dst_mt, dst_level, dst_slice, - dst_mt->format, - srcX0, srcY0, srcX1, srcY1, - dstX0, dstY0, dstX1, dstY1, - GL_NEAREST, false, mirror_y, - false, false); - } - } - - return true; -} - - -GLbitfield -brw_blorp_framebuffer(struct brw_context *brw, - struct gl_framebuffer *readFb, - struct gl_framebuffer *drawFb, - GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, - GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, - GLbitfield mask, GLenum filter) -{ - static GLbitfield buffer_bits[] = { - GL_COLOR_BUFFER_BIT, - GL_DEPTH_BUFFER_BIT, - GL_STENCIL_BUFFER_BIT, - }; - - for (unsigned int i = 0; i < ARRAY_SIZE(buffer_bits); ++i) { - if ((mask & buffer_bits[i]) && - try_blorp_blit(brw, readFb, drawFb, - srcX0, srcY0, srcX1, srcY1, - dstX0, dstY0, dstX1, dstY1, - filter, buffer_bits[i])) { - mask &= ~buffer_bits[i]; - } - } - - /* try_blorp_blit should always be successful for color blits. */ - assert(!(mask & GL_COLOR_BUFFER_BIT)); - return mask; -} - -static struct brw_bo * -blorp_get_client_bo(struct brw_context *brw, - unsigned w, unsigned h, unsigned d, - GLenum target, GLenum format, GLenum type, - const void *pixels, - const struct gl_pixelstore_attrib *packing, - uint32_t *offset_out, uint32_t *row_stride_out, - uint32_t *image_stride_out, bool read_only) -{ - /* Account for SKIP_PIXELS, SKIP_ROWS, ALIGNMENT, and SKIP_IMAGES */ - const GLuint dims = _mesa_get_texture_dimensions(target); - const uint32_t first_pixel = _mesa_image_offset(dims, packing, w, h, - format, type, 0, 0, 0); - const uint32_t last_pixel = _mesa_image_offset(dims, packing, w, h, - format, type, - d - 1, h - 1, w); - const uint32_t stride = _mesa_image_row_stride(packing, w, format, type); - const uint32_t size = last_pixel - first_pixel; - - *row_stride_out = stride; - *image_stride_out = _mesa_image_image_stride(packing, w, h, format, type); - - if (packing->BufferObj) { - const uint32_t offset = first_pixel + (intptr_t)pixels; - - if (!read_only) { - const int32_t cpp = _mesa_bytes_per_pixel(format, type); - assert(cpp > 0); - - if ((offset % cpp) || (stride % cpp)) { - perf_debug("Bad PBO alignment; fallback to CPU mapping\n"); - return NULL; - } - } - - /* This is a user-provided PBO. We just need to get the BO out */ - struct brw_buffer_object *intel_pbo = - brw_buffer_object(packing->BufferObj); - struct brw_bo *bo = - brw_bufferobj_buffer(brw, intel_pbo, offset, size, !read_only); - - /* We take a reference to the BO so that the caller can just always - * unref without having to worry about whether it's a user PBO or one - * we created. - */ - brw_bo_reference(bo); - - *offset_out = offset; - return bo; - } else { - /* Someone should have already checked that there is data to upload. */ - assert(pixels); - - /* Creating a temp buffer currently only works for upload */ - assert(read_only); - - /* This is not a user-provided PBO. Instead, pixels is a pointer to CPU - * data which we need to copy into a BO. - */ - struct brw_bo *bo = - brw_bo_alloc(brw->bufmgr, "tmp_tex_subimage_src", size, - BRW_MEMZONE_OTHER); - if (bo == NULL) { - perf_debug("%s: temp bo creation failed: size = %u\n", __func__, - size); - return NULL; - } - - if (brw_bo_subdata(bo, 0, size, pixels + first_pixel)) { - perf_debug("%s: temp bo upload failed\n", __func__); - brw_bo_unreference(bo); - return NULL; - } - - *offset_out = 0; - return bo; - } -} - -/* Consider all the restrictions and determine the format of the source. */ -static mesa_format -blorp_get_client_format(struct brw_context *brw, - GLenum format, GLenum type, - const struct gl_pixelstore_attrib *packing) -{ - if (brw->ctx._ImageTransferState) - return MESA_FORMAT_NONE; - - if (packing->SwapBytes || packing->LsbFirst || packing->Invert) { - perf_debug("%s: unsupported gl_pixelstore_attrib\n", __func__); - return MESA_FORMAT_NONE; - } - - if (format != GL_RED && - format != GL_RG && - format != GL_RGB && - format != GL_BGR && - format != GL_RGBA && - format != GL_BGRA && - format != GL_ALPHA && - format != GL_RED_INTEGER && - format != GL_RG_INTEGER && - format != GL_RGB_INTEGER && - format != GL_BGR_INTEGER && - format != GL_RGBA_INTEGER && - format != GL_BGRA_INTEGER) { - perf_debug("%s: %s not supported", __func__, - _mesa_enum_to_string(format)); - return MESA_FORMAT_NONE; - } - - return _mesa_tex_format_from_format_and_type(&brw->ctx, format, type); -} - -bool -brw_blorp_upload_miptree(struct brw_context *brw, - struct brw_mipmap_tree *dst_mt, - mesa_format dst_format, - uint32_t level, uint32_t x, uint32_t y, uint32_t z, - uint32_t width, uint32_t height, uint32_t depth, - GLenum target, GLenum format, GLenum type, - const void *pixels, - const struct gl_pixelstore_attrib *packing) -{ - const mesa_format src_format = - blorp_get_client_format(brw, format, type, packing); - if (src_format == MESA_FORMAT_NONE) - return false; - - if (!brw->mesa_format_supports_render[dst_format]) { - perf_debug("%s: can't use %s as render target\n", __func__, - _mesa_get_format_name(dst_format)); - return false; - } - - uint32_t src_offset, src_row_stride, src_image_stride; - struct brw_bo *src_bo = - blorp_get_client_bo(brw, width, height, depth, - target, format, type, pixels, packing, - &src_offset, &src_row_stride, - &src_image_stride, true); - if (src_bo == NULL) - return false; - - /* Now that source is offset to correct starting point, adjust the - * given dimensions to treat 1D arrays as 2D. - */ - if (target == GL_TEXTURE_1D_ARRAY) { - assert(depth == 1); - assert(z == 0); - depth = height; - height = 1; - z = y; - y = 0; - src_image_stride = src_row_stride; - } - - brw_miptree_check_level_layer(dst_mt, level, z + depth - 1); - - bool result = false; - - /* Blit slice-by-slice creating a single-slice miptree for each layer. Even - * in case of linear buffers hardware wants image arrays to be aligned by - * four rows. This way hardware only gets one image at a time and any - * source alignment will do. - */ - for (unsigned i = 0; i < depth; ++i) { - struct brw_mipmap_tree *src_mt = - brw_miptree_create_for_bo(brw, src_bo, src_format, - src_offset + i * src_image_stride, - width, height, 1, - src_row_stride, - ISL_TILING_LINEAR, 0); - - if (!src_mt) { - perf_debug("%s: miptree creation for src failed\n", __func__); - goto err; - } - - /* In case exact match is needed, copy using equivalent UINT formats - * preventing hardware from changing presentation for SNORM -1. - */ - if (src_mt->format == dst_format) { - brw_blorp_copy_miptrees(brw, src_mt, 0, 0, - dst_mt, level, z + i, - 0, 0, x, y, width, height); - } else { - brw_blorp_blit_miptrees(brw, src_mt, 0, 0, - src_format, SWIZZLE_XYZW, - dst_mt, level, z + i, - dst_format, - 0, 0, width, height, - x, y, x + width, y + height, - GL_NEAREST, false, false, false, false); - } - - brw_miptree_release(&src_mt); - } - - result = true; - -err: - brw_bo_unreference(src_bo); - - return result; -} - -bool -brw_blorp_download_miptree(struct brw_context *brw, - struct brw_mipmap_tree *src_mt, - mesa_format src_format, uint32_t src_swizzle, - uint32_t level, uint32_t x, uint32_t y, uint32_t z, - uint32_t width, uint32_t height, uint32_t depth, - GLenum target, GLenum format, GLenum type, - bool y_flip, const void *pixels, - const struct gl_pixelstore_attrib *packing) -{ - const mesa_format dst_format = - blorp_get_client_format(brw, format, type, packing); - if (dst_format == MESA_FORMAT_NONE) - return false; - - if (!brw->mesa_format_supports_render[dst_format]) { - perf_debug("%s: can't use %s as render target\n", __func__, - _mesa_get_format_name(dst_format)); - return false; - } - - /* We can't fetch from LUMINANCE or intensity as that would require a - * non-trivial swizzle. - */ - switch (_mesa_get_format_base_format(src_format)) { - case GL_LUMINANCE: - case GL_LUMINANCE_ALPHA: - case GL_INTENSITY: - return false; - default: - break; - } - - /* This pass only works for PBOs */ - assert(packing->BufferObj); - - uint32_t dst_offset, dst_row_stride, dst_image_stride; - struct brw_bo *dst_bo = - blorp_get_client_bo(brw, width, height, depth, - target, format, type, pixels, packing, - &dst_offset, &dst_row_stride, - &dst_image_stride, false); - if (dst_bo == NULL) - return false; - - /* Now that source is offset to correct starting point, adjust the - * given dimensions to treat 1D arrays as 2D. - */ - if (target == GL_TEXTURE_1D_ARRAY) { - assert(depth == 1); - assert(z == 0); - depth = height; - height = 1; - z = y; - y = 0; - dst_image_stride = dst_row_stride; - } - - brw_miptree_check_level_layer(src_mt, level, z + depth - 1); - - int y0 = y; - int y1 = y + height; - if (y_flip) { - apply_y_flip(&y0, &y1, minify(src_mt->surf.phys_level0_sa.height, - level - src_mt->first_level)); - } - - bool result = false; - - /* Blit slice-by-slice creating a single-slice miptree for each layer. Even - * in case of linear buffers hardware wants image arrays to be aligned by - * four rows. This way hardware only gets one image at a time and any - * source alignment will do. - */ - for (unsigned i = 0; i < depth; ++i) { - struct brw_mipmap_tree *dst_mt = - brw_miptree_create_for_bo(brw, dst_bo, dst_format, - dst_offset + i * dst_image_stride, - width, height, 1, - dst_row_stride, - ISL_TILING_LINEAR, 0); - - if (!dst_mt) { - perf_debug("%s: miptree creation for src failed\n", __func__); - goto err; - } - - /* In case exact match is needed, copy using equivalent UINT formats - * preventing hardware from changing presentation for SNORM -1. - */ - if (dst_mt->format == src_format && !y_flip && - src_swizzle == SWIZZLE_XYZW) { - brw_blorp_copy_miptrees(brw, src_mt, level, z + i, - dst_mt, 0, 0, - x, y, 0, 0, width, height); - } else { - brw_blorp_blit_miptrees(brw, src_mt, level, z + i, - src_format, src_swizzle, - dst_mt, 0, 0, dst_format, - x, y0, x + width, y1, - 0, 0, width, height, - GL_NEAREST, false, y_flip, false, false); - } - - brw_miptree_release(&dst_mt); - } - - result = true; - - /* As we implement PBO transfers by binding the user-provided BO as a - * fake framebuffer and rendering to it. This breaks the invariant of the - * GL that nothing is able to render to a BO, causing nondeterministic - * corruption issues because the render cache is not coherent with a - * number of other caches that the BO could potentially be bound to - * afterwards. - * - * This could be solved in the same way that we guarantee texture - * coherency after a texture is attached to a framebuffer and - * rendered to, but that would involve checking *all* BOs bound to - * the pipeline for the case we need to emit a cache flush due to - * previous rendering to any of them -- Including vertex, index, - * uniform, atomic counter, shader image, transform feedback, - * indirect draw buffers, etc. - * - * That would increase the per-draw call overhead even though it's - * very unlikely that any of the BOs bound to the pipeline has been - * rendered to via a PBO at any point, so it seems better to just - * flush here unconditionally. - */ - brw_emit_mi_flush(brw); - -err: - brw_bo_unreference(dst_bo); - - return result; -} - -static bool -set_write_disables(const struct brw_renderbuffer *irb, - const unsigned color_mask, uint8_t *color_write_disable) -{ - /* Format information in the renderbuffer represents the requirements - * given by the client. There are cases where the backing miptree uses, - * for example, RGBA to represent RGBX. Since the client is only expecting - * RGB we can treat alpha as not used and write whatever we like into it. - */ - const GLenum base_format = irb->Base.Base._BaseFormat; - const int components = _mesa_components_in_format(base_format); - assert(components > 0); - *color_write_disable = ~color_mask & BITFIELD_MASK(components); - return *color_write_disable; -} - -static void -do_single_blorp_clear(struct brw_context *brw, struct gl_framebuffer *fb, - struct gl_renderbuffer *rb, unsigned buf, - bool partial_clear, bool encode_srgb) -{ - struct gl_context *ctx = &brw->ctx; - struct brw_renderbuffer *irb = brw_renderbuffer(rb); - uint32_t x0, x1, y0, y1; - - mesa_format format = irb->Base.Base.Format; - if (!encode_srgb) - format = _mesa_get_srgb_format_linear(format); - enum isl_format isl_format = brw->mesa_to_isl_render_format[format]; - - x0 = fb->_Xmin; - x1 = fb->_Xmax; - if (fb->FlipY) { - y0 = rb->Height - fb->_Ymax; - y1 = rb->Height - fb->_Ymin; - } else { - y0 = fb->_Ymin; - y1 = fb->_Ymax; - } - - /* If the clear region is empty, just return. */ - if (x0 == x1 || y0 == y1) - return; - - bool can_fast_clear = !partial_clear; - - if (INTEL_DEBUG(DEBUG_NO_FAST_CLEAR)) - can_fast_clear = false; - - uint8_t color_write_disable = 0; - if (set_write_disables(irb, GET_COLORMASK(ctx->Color.ColorMask, buf), - &color_write_disable)) - can_fast_clear = false; - - /* We store clear colors as floats or uints as needed. If there are - * texture views in play, the formats will not properly be respected - * during resolves because the resolve operations only know about the - * miptree and not the renderbuffer. - */ - if (irb->Base.Base.Format != irb->mt->format) - can_fast_clear = false; - - if (!irb->mt->supports_fast_clear || - !brw_is_color_fast_clear_compatible(brw, irb->mt, &ctx->Color.ClearColor)) - can_fast_clear = false; - - /* Surface state can only record one fast clear color value. Therefore - * unless different levels/layers agree on the color it can be used to - * represent only single level/layer. Here it will be reserved for the - * first slice (level 0, layer 0). - */ - if (irb->layer_count > 1 || irb->mt_level || irb->mt_layer) - can_fast_clear = false; - - unsigned level = irb->mt_level; - const unsigned num_layers = fb->MaxNumLayers ? irb->layer_count : 1; - - /* If the MCS buffer hasn't been allocated yet, we need to allocate it now. - */ - if (can_fast_clear && !irb->mt->aux_buf) { - assert(irb->mt->aux_usage == ISL_AUX_USAGE_CCS_D); - if (!brw_miptree_alloc_aux(brw, irb->mt)) { - /* We're out of memory. Fall back to a non-fast clear. */ - can_fast_clear = false; - } - } - - if (can_fast_clear) { - const enum isl_aux_state aux_state = - brw_miptree_get_aux_state(irb->mt, irb->mt_level, irb->mt_layer); - union isl_color_value clear_color = - brw_meta_convert_fast_clear_color(brw, irb->mt, - &ctx->Color.ClearColor); - - /* If the buffer is already in ISL_AUX_STATE_CLEAR and the clear color - * hasn't changed, the clear is redundant and can be skipped. - */ - if (!brw_miptree_set_clear_color(brw, irb->mt, clear_color) && - aux_state == ISL_AUX_STATE_CLEAR) { - return; - } - - DBG("%s (fast) to mt %p level %d layers %d+%d\n", __FUNCTION__, - irb->mt, irb->mt_level, irb->mt_layer, num_layers); - - /* We can't setup the blorp_surf until we've allocated the MCS above */ - struct blorp_surf surf; - blorp_surf_for_miptree(brw, &surf, irb->mt, irb->mt->aux_usage, true, - &level, irb->mt_layer, num_layers); - - /* Ivybrigde PRM Vol 2, Part 1, "11.7 MCS Buffer for Render Target(s)": - * - * "Any transition from any value in {Clear, Render, Resolve} to a - * different value in {Clear, Render, Resolve} requires end of pipe - * synchronization." - * - * In other words, fast clear ops are not properly synchronized with - * other drawing. We need to use a PIPE_CONTROL to ensure that the - * contents of the previous draw hit the render target before we resolve - * and again afterwards to ensure that the resolve is complete before we - * do any more regular drawing. - */ - brw_emit_end_of_pipe_sync(brw, PIPE_CONTROL_RENDER_TARGET_FLUSH); - - struct blorp_batch batch; - blorp_batch_init(&brw->blorp, &batch, brw, 0); - blorp_fast_clear(&batch, &surf, isl_format_srgb_to_linear(isl_format), - ISL_SWIZZLE_IDENTITY, - level, irb->mt_layer, num_layers, x0, y0, x1, y1); - blorp_batch_finish(&batch); - - brw_emit_end_of_pipe_sync(brw, PIPE_CONTROL_RENDER_TARGET_FLUSH); - - /* Now that the fast clear has occurred, put the buffer in - * INTEL_FAST_CLEAR_STATE_CLEAR so that we won't waste time doing - * redundant clears. - */ - brw_miptree_set_aux_state(brw, irb->mt, irb->mt_level, - irb->mt_layer, num_layers, - ISL_AUX_STATE_CLEAR); - } else { - DBG("%s (slow) to mt %p level %d layer %d+%d\n", __FUNCTION__, - irb->mt, irb->mt_level, irb->mt_layer, num_layers); - - enum isl_aux_usage aux_usage = - brw_miptree_render_aux_usage(brw, irb->mt, isl_format, false, false); - brw_miptree_prepare_render(brw, irb->mt, level, irb->mt_layer, - num_layers, aux_usage); - - struct blorp_surf surf; - blorp_surf_for_miptree(brw, &surf, irb->mt, aux_usage, true, - &level, irb->mt_layer, num_layers); - - union isl_color_value clear_color; - memcpy(clear_color.f32, ctx->Color.ClearColor.f, sizeof(float) * 4); - - struct blorp_batch batch; - blorp_batch_init(&brw->blorp, &batch, brw, 0); - blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY, - level, irb->mt_layer, num_layers, - x0, y0, x1, y1, - clear_color, color_write_disable); - blorp_batch_finish(&batch); - - brw_miptree_finish_render(brw, irb->mt, level, irb->mt_layer, - num_layers, aux_usage); - } - - return; -} - -void -brw_blorp_clear_color(struct brw_context *brw, struct gl_framebuffer *fb, - GLbitfield mask, bool partial_clear, bool encode_srgb) -{ - for (unsigned buf = 0; buf < fb->_NumColorDrawBuffers; buf++) { - struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[buf]; - struct brw_renderbuffer *irb = brw_renderbuffer(rb); - - /* Only clear the buffers present in the provided mask */ - if (((1 << fb->_ColorDrawBufferIndexes[buf]) & mask) == 0) - continue; - - /* If this is an ES2 context or GL_ARB_ES2_compatibility is supported, - * the framebuffer can be complete with some attachments missing. In - * this case the _ColorDrawBuffers pointer will be NULL. - */ - if (rb == NULL) - continue; - - do_single_blorp_clear(brw, fb, rb, buf, partial_clear, encode_srgb); - irb->need_downsample = true; - } - - return; -} - -void -brw_blorp_clear_depth_stencil(struct brw_context *brw, - struct gl_framebuffer *fb, - GLbitfield mask, bool partial_clear) -{ - const struct gl_context *ctx = &brw->ctx; - struct gl_renderbuffer *depth_rb = - fb->Attachment[BUFFER_DEPTH].Renderbuffer; - struct gl_renderbuffer *stencil_rb = - fb->Attachment[BUFFER_STENCIL].Renderbuffer; - - if (!depth_rb || ctx->Depth.Mask == GL_FALSE) - mask &= ~BUFFER_BIT_DEPTH; - - if (!stencil_rb || (ctx->Stencil.WriteMask[0] & 0xff) == 0) - mask &= ~BUFFER_BIT_STENCIL; - - if (!(mask & (BUFFER_BITS_DEPTH_STENCIL))) - return; - - uint32_t x0, x1, y0, y1, rb_height; - if (depth_rb) { - rb_height = depth_rb->Height; - if (stencil_rb) { - assert(depth_rb->Width == stencil_rb->Width); - assert(depth_rb->Height == stencil_rb->Height); - } - } else { - assert(stencil_rb); - rb_height = stencil_rb->Height; - } - - x0 = fb->_Xmin; - x1 = fb->_Xmax; - if (fb->FlipY) { - y0 = rb_height - fb->_Ymax; - y1 = rb_height - fb->_Ymin; - } else { - y0 = fb->_Ymin; - y1 = fb->_Ymax; - } - - /* If the clear region is empty, just return. */ - if (x0 == x1 || y0 == y1) - return; - - uint32_t level = 0, start_layer = 0, num_layers; - struct blorp_surf depth_surf, stencil_surf; - - struct brw_mipmap_tree *depth_mt = NULL; - if (mask & BUFFER_BIT_DEPTH) { - struct brw_renderbuffer *irb = brw_renderbuffer(depth_rb); - depth_mt = find_miptree(GL_DEPTH_BUFFER_BIT, irb); - - level = irb->mt_level; - start_layer = irb->mt_layer; - num_layers = fb->MaxNumLayers ? irb->layer_count : 1; - - brw_miptree_prepare_depth(brw, depth_mt, level, start_layer, num_layers); - - unsigned depth_level = level; - blorp_surf_for_miptree(brw, &depth_surf, depth_mt, depth_mt->aux_usage, - true, &depth_level, start_layer, num_layers); - assert(depth_level == level); - } - - uint8_t stencil_mask = 0; - struct brw_mipmap_tree *stencil_mt = NULL; - if (mask & BUFFER_BIT_STENCIL) { - struct brw_renderbuffer *irb = brw_renderbuffer(stencil_rb); - stencil_mt = find_miptree(GL_STENCIL_BUFFER_BIT, irb); - - if (mask & BUFFER_BIT_DEPTH) { - assert(level == irb->mt_level); - assert(start_layer == irb->mt_layer); - assert(num_layers == fb->MaxNumLayers ? irb->layer_count : 1); - } - - level = irb->mt_level; - start_layer = irb->mt_layer; - num_layers = fb->MaxNumLayers ? irb->layer_count : 1; - - stencil_mask = ctx->Stencil.WriteMask[0] & 0xff; - - brw_miptree_prepare_access(brw, stencil_mt, level, 1, - start_layer, num_layers, - ISL_AUX_USAGE_NONE, false); - - unsigned stencil_level = level; - blorp_surf_for_miptree(brw, &stencil_surf, stencil_mt, - ISL_AUX_USAGE_NONE, true, - &stencil_level, start_layer, num_layers); - } - - assert((mask & BUFFER_BIT_DEPTH) || stencil_mask); - - struct blorp_batch batch; - blorp_batch_init(&brw->blorp, &batch, brw, 0); - blorp_clear_depth_stencil(&batch, &depth_surf, &stencil_surf, - level, start_layer, num_layers, - x0, y0, x1, y1, - (mask & BUFFER_BIT_DEPTH), ctx->Depth.Clear, - stencil_mask, ctx->Stencil.Clear); - blorp_batch_finish(&batch); - - if (mask & BUFFER_BIT_DEPTH) { - brw_miptree_finish_depth(brw, depth_mt, level, - start_layer, num_layers, true); - } - - if (stencil_mask) { - brw_miptree_finish_write(brw, stencil_mt, level, - start_layer, num_layers, - ISL_AUX_USAGE_NONE); - } -} - -void -brw_blorp_resolve_color(struct brw_context *brw, struct brw_mipmap_tree *mt, - unsigned level, unsigned layer, - enum isl_aux_op resolve_op) -{ - DBG("%s to mt %p level %u layer %u\n", __FUNCTION__, mt, level, layer); - - const mesa_format format = _mesa_get_srgb_format_linear(mt->format); - - struct blorp_surf surf; - blorp_surf_for_miptree(brw, &surf, mt, mt->aux_usage, true, - &level, layer, 1 /* num_layers */); - - /* Ivybrigde PRM Vol 2, Part 1, "11.7 MCS Buffer for Render Target(s)": - * - * "Any transition from any value in {Clear, Render, Resolve} to a - * different value in {Clear, Render, Resolve} requires end of pipe - * synchronization." - * - * In other words, fast clear ops are not properly synchronized with - * other drawing. We need to use a PIPE_CONTROL to ensure that the - * contents of the previous draw hit the render target before we resolve - * and again afterwards to ensure that the resolve is complete before we - * do any more regular drawing. - */ - brw_emit_end_of_pipe_sync(brw, PIPE_CONTROL_RENDER_TARGET_FLUSH); - - - struct blorp_batch batch; - blorp_batch_init(&brw->blorp, &batch, brw, 0); - blorp_ccs_resolve(&batch, &surf, level, layer, 1, - brw_blorp_to_isl_format(brw, format, true), - resolve_op); - blorp_batch_finish(&batch); - - /* See comment above */ - brw_emit_end_of_pipe_sync(brw, PIPE_CONTROL_RENDER_TARGET_FLUSH); -} - -void -brw_blorp_mcs_partial_resolve(struct brw_context *brw, - struct brw_mipmap_tree *mt, - uint32_t start_layer, uint32_t num_layers) -{ - DBG("%s to mt %p layers %u-%u\n", __FUNCTION__, mt, - start_layer, start_layer + num_layers - 1); - - assert(mt->aux_usage == ISL_AUX_USAGE_MCS); - - const mesa_format format = _mesa_get_srgb_format_linear(mt->format); - enum isl_format isl_format = brw_blorp_to_isl_format(brw, format, true); - - struct blorp_surf surf; - uint32_t level = 0; - blorp_surf_for_miptree(brw, &surf, mt, ISL_AUX_USAGE_MCS, true, - &level, start_layer, num_layers); - - struct blorp_batch batch; - blorp_batch_init(&brw->blorp, &batch, brw, 0); - blorp_mcs_partial_resolve(&batch, &surf, isl_format, - start_layer, num_layers); - blorp_batch_finish(&batch); -} - -/** - * Perform a HiZ or depth resolve operation. - * - * For an overview of HiZ ops, see the following sections of the Sandy Bridge - * PRM, Volume 1, Part 2: - * - 7.5.3.1 Depth Buffer Clear - * - 7.5.3.2 Depth Buffer Resolve - * - 7.5.3.3 Hierarchical Depth Buffer Resolve - */ -void -brw_hiz_exec(struct brw_context *brw, struct brw_mipmap_tree *mt, - unsigned int level, unsigned int start_layer, - unsigned int num_layers, enum isl_aux_op op) -{ - assert(brw_miptree_level_has_hiz(mt, level)); - assert(op != ISL_AUX_OP_NONE); - const struct intel_device_info *devinfo = &brw->screen->devinfo; - const char *opname = NULL; - - switch (op) { - case ISL_AUX_OP_FULL_RESOLVE: - opname = "depth resolve"; - break; - case ISL_AUX_OP_AMBIGUATE: - opname = "hiz ambiguate"; - break; - case ISL_AUX_OP_FAST_CLEAR: - opname = "depth clear"; - break; - case ISL_AUX_OP_PARTIAL_RESOLVE: - case ISL_AUX_OP_NONE: - unreachable("Invalid HiZ op"); - } - - DBG("%s %s to mt %p level %d layers %d-%d\n", - __func__, opname, mt, level, start_layer, start_layer + num_layers - 1); - - /* The following stalls and flushes are only documented to be required for - * HiZ clear operations. However, they also seem to be required for - * resolve operations. - */ - if (devinfo->ver == 6) { - /* From the Sandy Bridge PRM, volume 2 part 1, page 313: - * - * "If other rendering operations have preceded this clear, a - * PIPE_CONTROL with write cache flush enabled and Z-inhibit - * disabled must be issued before the rectangle primitive used for - * the depth buffer clear operation. - */ - brw_emit_pipe_control_flush(brw, - PIPE_CONTROL_RENDER_TARGET_FLUSH | - PIPE_CONTROL_DEPTH_CACHE_FLUSH | - PIPE_CONTROL_CS_STALL); - } else if (devinfo->ver >= 7) { - /* - * From the Ivybridge PRM, volume 2, "Depth Buffer Clear": - * - * If other rendering operations have preceded this clear, a - * PIPE_CONTROL with depth cache flush enabled, Depth Stall bit - * enabled must be issued before the rectangle primitive used for - * the depth buffer clear operation. - * - * Same applies for Gfx8 and Gfx9. - * - * In addition, from the Ivybridge PRM, volume 2, 1.10.4.1 - * PIPE_CONTROL, Depth Cache Flush Enable: - * - * This bit must not be set when Depth Stall Enable bit is set in - * this packet. - * - * This is confirmed to hold for real, HSW gets immediate gpu hangs. - * - * Therefore issue two pipe control flushes, one for cache flush and - * another for depth stall. - */ - brw_emit_pipe_control_flush(brw, - PIPE_CONTROL_DEPTH_CACHE_FLUSH | - PIPE_CONTROL_CS_STALL); - - brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_STALL); - } - - assert(mt->aux_usage == ISL_AUX_USAGE_HIZ && mt->aux_buf); - - struct blorp_surf surf; - blorp_surf_for_miptree(brw, &surf, mt, ISL_AUX_USAGE_HIZ, true, - &level, start_layer, num_layers); - - struct blorp_batch batch; - blorp_batch_init(&brw->blorp, &batch, brw, - BLORP_BATCH_NO_UPDATE_CLEAR_COLOR); - blorp_hiz_op(&batch, &surf, level, start_layer, num_layers, op); - blorp_batch_finish(&batch); - - /* The following stalls and flushes are only documented to be required for - * HiZ clear operations. However, they also seem to be required for - * resolve operations. - */ - if (devinfo->ver == 6) { - /* From the Sandy Bridge PRM, volume 2 part 1, page 314: - * - * "DevSNB, DevSNB-B{W/A}]: Depth buffer clear pass must be - * followed by a PIPE_CONTROL command with DEPTH_STALL bit set - * and Then followed by Depth FLUSH' - */ - brw_emit_pipe_control_flush(brw, - PIPE_CONTROL_DEPTH_STALL); - - brw_emit_pipe_control_flush(brw, - PIPE_CONTROL_DEPTH_CACHE_FLUSH | - PIPE_CONTROL_CS_STALL); - } else if (devinfo->ver >= 8) { - /* - * From the Broadwell PRM, volume 7, "Depth Buffer Clear": - * - * "Depth buffer clear pass using any of the methods (WM_STATE, - * 3DSTATE_WM or 3DSTATE_WM_HZ_OP) must be followed by a - * PIPE_CONTROL command with DEPTH_STALL bit and Depth FLUSH bits - * "set" before starting to render. DepthStall and DepthFlush are - * not needed between consecutive depth clear passes nor is it - * required if the depth clear pass was done with - * 'full_surf_clear' bit set in the 3DSTATE_WM_HZ_OP." - * - * TODO: Such as the spec says, this could be conditional. - */ - brw_emit_pipe_control_flush(brw, - PIPE_CONTROL_DEPTH_CACHE_FLUSH | - PIPE_CONTROL_DEPTH_STALL); - - } -} diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h b/src/mesa/drivers/dri/i965/brw_blorp.h deleted file mode 100644 index 35822ab..0000000 --- a/src/mesa/drivers/dri/i965/brw_blorp.h +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#ifndef BRW_BLORP_H -#define BRW_BLORP_H - -#include "blorp/blorp.h" -#include "brw_mipmap_tree.h" -#include "program/prog_instruction.h" - -#ifdef __cplusplus -extern "C" { -#endif - -void brw_blorp_init(struct brw_context *brw); - -void -brw_blorp_blit_miptrees(struct brw_context *brw, - struct brw_mipmap_tree *src_mt, - unsigned src_level, unsigned src_layer, - mesa_format src_format, int src_swizzle, - struct brw_mipmap_tree *dst_mt, - unsigned dst_level, unsigned dst_layer, - mesa_format dst_format, - float src_x0, float src_y0, - float src_x1, float src_y1, - float dst_x0, float dst_y0, - float dst_x1, float dst_y1, - GLenum filter, bool mirror_x, bool mirror_y, - bool decode_srgb, bool encode_srgb); - -void -brw_blorp_copy_miptrees(struct brw_context *brw, - struct brw_mipmap_tree *src_mt, - unsigned src_level, unsigned src_logical_layer, - struct brw_mipmap_tree *dst_mt, - unsigned dst_level, unsigned dst_logical_layer, - unsigned src_x, unsigned src_y, - unsigned dst_x, unsigned dst_y, - unsigned src_width, unsigned src_height); - -void -brw_blorp_copy_buffers(struct brw_context *brw, - struct brw_bo *src_bo, - unsigned src_offset, - struct brw_bo *dst_bo, - unsigned dst_offset, - unsigned size); - -bool -brw_blorp_upload_miptree(struct brw_context *brw, - struct brw_mipmap_tree *dst_mt, - mesa_format dst_format, - uint32_t level, uint32_t x, uint32_t y, uint32_t z, - uint32_t width, uint32_t height, uint32_t depth, - GLenum target, GLenum format, GLenum type, - const void *pixels, - const struct gl_pixelstore_attrib *packing); - -bool -brw_blorp_download_miptree(struct brw_context *brw, - struct brw_mipmap_tree *src_mt, - mesa_format src_format, uint32_t src_swizzle, - uint32_t level, uint32_t x, uint32_t y, uint32_t z, - uint32_t width, uint32_t height, uint32_t depth, - GLenum target, GLenum format, GLenum type, - bool y_flip, const void *pixels, - const struct gl_pixelstore_attrib *packing); - -void -brw_blorp_clear_color(struct brw_context *brw, struct gl_framebuffer *fb, - GLbitfield mask, bool partial_clear, bool encode_srgb); -void -brw_blorp_clear_depth_stencil(struct brw_context *brw, - struct gl_framebuffer *fb, - GLbitfield mask, bool partial_clear); - -void -brw_blorp_resolve_color(struct brw_context *brw, - struct brw_mipmap_tree *mt, - unsigned level, unsigned layer, - enum isl_aux_op resolve_op); - -void -brw_blorp_mcs_partial_resolve(struct brw_context *brw, - struct brw_mipmap_tree *mt, - uint32_t start_layer, uint32_t num_layers); - -void -brw_hiz_exec(struct brw_context *brw, struct brw_mipmap_tree *mt, - unsigned int level, unsigned int start_layer, - unsigned int num_layers, enum isl_aux_op op); - -void gfx4_blorp_exec(struct blorp_batch *batch, - const struct blorp_params *params); -void gfx45_blorp_exec(struct blorp_batch *batch, - const struct blorp_params *params); -void gfx5_blorp_exec(struct blorp_batch *batch, - const struct blorp_params *params); -void gfx6_blorp_exec(struct blorp_batch *batch, - const struct blorp_params *params); -void gfx7_blorp_exec(struct blorp_batch *batch, - const struct blorp_params *params); -void gfx75_blorp_exec(struct blorp_batch *batch, - const struct blorp_params *params); -void gfx8_blorp_exec(struct blorp_batch *batch, - const struct blorp_params *params); -void gfx9_blorp_exec(struct blorp_batch *batch, - const struct blorp_params *params); -void gfx11_blorp_exec(struct blorp_batch *batch, - const struct blorp_params *params); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* BRW_BLORP_H */ diff --git a/src/mesa/drivers/dri/i965/brw_buffer_objects.c b/src/mesa/drivers/dri/i965/brw_buffer_objects.c deleted file mode 100644 index 929ff22..0000000 --- a/src/mesa/drivers/dri/i965/brw_buffer_objects.c +++ /dev/null @@ -1,710 +0,0 @@ -/* - * Copyright 2003 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * @file brw_buffer_objects.c - * - * This provides core GL buffer object functionality. - */ - -#include "main/mtypes.h" -#include "main/macros.h" -#include "main/streaming-load-memcpy.h" -#include "main/bufferobj.h" -#include "x86/common_x86_asm.h" -#include "util/u_memory.h" - -#include "brw_context.h" -#include "brw_blorp.h" -#include "brw_buffer_objects.h" -#include "brw_batch.h" - -static void -mark_buffer_gpu_usage(struct brw_buffer_object *intel_obj, - uint32_t offset, uint32_t size) -{ - intel_obj->gpu_active_start = MIN2(intel_obj->gpu_active_start, offset); - intel_obj->gpu_active_end = MAX2(intel_obj->gpu_active_end, offset + size); -} - -static void -mark_buffer_inactive(struct brw_buffer_object *intel_obj) -{ - intel_obj->gpu_active_start = ~0; - intel_obj->gpu_active_end = 0; -} - -static void -mark_buffer_valid_data(struct brw_buffer_object *intel_obj, - uint32_t offset, uint32_t size) -{ - intel_obj->valid_data_start = MIN2(intel_obj->valid_data_start, offset); - intel_obj->valid_data_end = MAX2(intel_obj->valid_data_end, offset + size); -} - -static void -mark_buffer_invalid(struct brw_buffer_object *intel_obj) -{ - intel_obj->valid_data_start = ~0; - intel_obj->valid_data_end = 0; -} - -/** Allocates a new brw_bo to store the data for the buffer object. */ -static void -alloc_buffer_object(struct brw_context *brw, - struct brw_buffer_object *intel_obj) -{ - const struct gl_context *ctx = &brw->ctx; - - uint64_t size = intel_obj->Base.Size; - if (ctx->Const.RobustAccess) { - /* Pad out buffer objects with an extra 2kB (half a page). - * - * When pushing UBOs, we need to safeguard against 3DSTATE_CONSTANT_* - * reading out of bounds memory. The application might bind a UBO that's - * smaller than what the program expects. Ideally, we'd bind an extra - * push buffer containing zeros, but we have a limited number of those, - * so it's not always viable. Our only safe option is to pad all buffer - * objects by the maximum push data length, so that it will never read - * past the end of a BO. - * - * This is unfortunate, but it should result in at most 1 extra page, - * which probably isn't too terrible. - */ - size += 64 * 32; /* max read length of 64 256-bit units */ - } - intel_obj->buffer = - brw_bo_alloc(brw->bufmgr, "bufferobj", size, BRW_MEMZONE_OTHER); - - /* the buffer might be bound as a uniform buffer, need to update it - */ - if (intel_obj->Base.UsageHistory & USAGE_UNIFORM_BUFFER) - brw->ctx.NewDriverState |= BRW_NEW_UNIFORM_BUFFER; - if (intel_obj->Base.UsageHistory & USAGE_SHADER_STORAGE_BUFFER) - brw->ctx.NewDriverState |= BRW_NEW_UNIFORM_BUFFER; - if (intel_obj->Base.UsageHistory & USAGE_TEXTURE_BUFFER) - brw->ctx.NewDriverState |= BRW_NEW_TEXTURE_BUFFER; - if (intel_obj->Base.UsageHistory & USAGE_ATOMIC_COUNTER_BUFFER) - brw->ctx.NewDriverState |= BRW_NEW_UNIFORM_BUFFER; - - mark_buffer_inactive(intel_obj); - mark_buffer_invalid(intel_obj); -} - -static void -release_buffer(struct brw_buffer_object *intel_obj) -{ - brw_bo_unreference(intel_obj->buffer); - intel_obj->buffer = NULL; -} - -/** - * The NewBufferObject() driver hook. - * - * Allocates a new brw_buffer_object structure and initializes it. - * - * There is some duplication between mesa's bufferobjects and our - * bufmgr buffers. Both have an integer handle and a hashtable to - * lookup an opaque structure. It would be nice if the handles and - * internal structure where somehow shared. - */ -static struct gl_buffer_object * -brw_new_buffer_object(struct gl_context * ctx, GLuint name) -{ - struct brw_buffer_object *obj = CALLOC_STRUCT(brw_buffer_object); - if (!obj) { - _mesa_error_no_memory(__func__); - return NULL; - } - - _mesa_initialize_buffer_object(ctx, &obj->Base, name); - - obj->buffer = NULL; - - return &obj->Base; -} - -/** - * The DeleteBuffer() driver hook. - * - * Deletes a single OpenGL buffer object. Used by glDeleteBuffers(). - */ -static void -brw_delete_buffer(struct gl_context * ctx, struct gl_buffer_object *obj) -{ - struct brw_buffer_object *intel_obj = brw_buffer_object(obj); - - assert(intel_obj); - - /* Buffer objects are automatically unmapped when deleting according - * to the spec, but Mesa doesn't do UnmapBuffer for us at context destroy - * (though it does if you call glDeleteBuffers) - */ - _mesa_buffer_unmap_all_mappings(ctx, obj); - - brw_bo_unreference(intel_obj->buffer); - _mesa_delete_buffer_object(ctx, obj); -} - - -/** - * The BufferData() driver hook. - * - * Implements glBufferData(), which recreates a buffer object's data store - * and populates it with the given data, if present. - * - * Any data that was previously stored in the buffer object is lost. - * - * \return true for success, false if out of memory - */ -static GLboolean -brw_buffer_data(struct gl_context *ctx, - GLenum target, - GLsizeiptrARB size, - const GLvoid *data, - GLenum usage, - GLbitfield storageFlags, - struct gl_buffer_object *obj) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_buffer_object *intel_obj = brw_buffer_object(obj); - - /* Part of the ABI, but this function doesn't use it. - */ - (void) target; - - intel_obj->Base.Size = size; - intel_obj->Base.Usage = usage; - intel_obj->Base.StorageFlags = storageFlags; - - assert(!obj->Mappings[MAP_USER].Pointer); /* Mesa should have unmapped it */ - assert(!obj->Mappings[MAP_INTERNAL].Pointer); - - if (intel_obj->buffer != NULL) - release_buffer(intel_obj); - - if (size != 0) { - alloc_buffer_object(brw, intel_obj); - if (!intel_obj->buffer) - return false; - - if (data != NULL) { - brw_bo_subdata(intel_obj->buffer, 0, size, data); - mark_buffer_valid_data(intel_obj, 0, size); - } - } - - return true; -} - -static GLboolean -brw_buffer_data_mem(struct gl_context *ctx, - GLenum target, - GLsizeiptrARB size, - struct gl_memory_object *memObj, - GLuint64 offset, - GLenum usage, - struct gl_buffer_object *bufObj) -{ - struct brw_buffer_object *intel_obj = brw_buffer_object(bufObj); - struct brw_memory_object *intel_memObj = brw_memory_object(memObj); - - /* Part of the ABI, but this function doesn't use it. - */ - (void) target; - - intel_obj->Base.Size = size; - intel_obj->Base.Usage = usage; - intel_obj->Base.StorageFlags = 0; - - assert(!bufObj->Mappings[MAP_USER].Pointer); /* Mesa should have unmapped it */ - assert(!bufObj->Mappings[MAP_INTERNAL].Pointer); - - if (intel_obj->buffer != NULL) - release_buffer(intel_obj); - - if (size != 0) { - intel_obj->buffer = intel_memObj->bo; - mark_buffer_valid_data(intel_obj, offset, size); - } - - return true; -} - -/** - * The BufferSubData() driver hook. - * - * Implements glBufferSubData(), which replaces a portion of the data in a - * buffer object. - * - * If the data range specified by (size + offset) extends beyond the end of - * the buffer or if data is NULL, no copy is performed. - */ -static void -brw_buffer_subdata(struct gl_context *ctx, - GLintptrARB offset, - GLsizeiptrARB size, - const GLvoid *data, - struct gl_buffer_object *obj) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_buffer_object *intel_obj = brw_buffer_object(obj); - bool busy; - - if (size == 0) - return; - - assert(intel_obj); - - /* See if we can unsynchronized write the data into the user's BO. This - * avoids GPU stalls in unfortunately common user patterns (uploading - * sequentially into a BO, with draw calls in between each upload). - * - * Once we've hit this path, we mark this GL BO as preferring stalling to - * blits, so that we can hopefully hit this path again in the future - * (otherwise, an app that might occasionally stall but mostly not will end - * up with blitting all the time, at the cost of bandwidth) - */ - if (offset + size <= intel_obj->gpu_active_start || - intel_obj->gpu_active_end <= offset || - offset + size <= intel_obj->valid_data_start || - intel_obj->valid_data_end <= offset) { - void *map = brw_bo_map(brw, intel_obj->buffer, MAP_WRITE | MAP_ASYNC); - memcpy(map + offset, data, size); - brw_bo_unmap(intel_obj->buffer); - - if (intel_obj->gpu_active_end > intel_obj->gpu_active_start) - intel_obj->prefer_stall_to_blit = true; - - mark_buffer_valid_data(intel_obj, offset, size); - return; - } - - busy = - brw_bo_busy(intel_obj->buffer) || - brw_batch_references(&brw->batch, intel_obj->buffer); - - if (busy) { - if (size == intel_obj->Base.Size || - (intel_obj->valid_data_start >= offset && - intel_obj->valid_data_end <= offset + size)) { - /* Replace the current busy bo so the subdata doesn't stall. */ - brw_bo_unreference(intel_obj->buffer); - alloc_buffer_object(brw, intel_obj); - } else if (!intel_obj->prefer_stall_to_blit) { - perf_debug("Using a blit copy to avoid stalling on " - "glBufferSubData(%ld, %ld) (%ldkb) to a busy " - "(%d-%d) / valid (%d-%d) buffer object.\n", - (long)offset, (long)offset + size, (long)(size/1024), - intel_obj->gpu_active_start, - intel_obj->gpu_active_end, - intel_obj->valid_data_start, - intel_obj->valid_data_end); - struct brw_bo *temp_bo = - brw_bo_alloc(brw->bufmgr, "subdata temp", size, BRW_MEMZONE_OTHER); - - brw_bo_subdata(temp_bo, 0, size, data); - - brw_blorp_copy_buffers(brw, - temp_bo, 0, - intel_obj->buffer, offset, - size); - brw_emit_mi_flush(brw); - - brw_bo_unreference(temp_bo); - mark_buffer_valid_data(intel_obj, offset, size); - return; - } else { - perf_debug("Stalling on glBufferSubData(%ld, %ld) (%ldkb) to a busy " - "(%d-%d) buffer object. Use glMapBufferRange() to " - "avoid this.\n", - (long)offset, (long)offset + size, (long)(size/1024), - intel_obj->gpu_active_start, - intel_obj->gpu_active_end); - brw_batch_flush(brw); - } - } - - brw_bo_subdata(intel_obj->buffer, offset, size, data); - mark_buffer_inactive(intel_obj); - mark_buffer_valid_data(intel_obj, offset, size); -} - -/* Typedef for memcpy function (used in brw_get_buffer_subdata below). */ -typedef void *(*mem_copy_fn)(void *dest, const void *src, size_t n); - -/** - * The GetBufferSubData() driver hook. - * - * Implements glGetBufferSubData(), which copies a subrange of a buffer - * object into user memory. - */ -static void -brw_get_buffer_subdata(struct gl_context *ctx, - GLintptrARB offset, - GLsizeiptrARB size, - GLvoid *data, - struct gl_buffer_object *obj) -{ - struct brw_buffer_object *intel_obj = brw_buffer_object(obj); - struct brw_context *brw = brw_context(ctx); - - assert(intel_obj); - if (brw_batch_references(&brw->batch, intel_obj->buffer)) { - brw_batch_flush(brw); - } - - unsigned int map_flags = MAP_READ; - mem_copy_fn memcpy_fn = memcpy; -#ifdef USE_SSE41 - if (!intel_obj->buffer->cache_coherent && cpu_has_sse4_1) { - /* Rather than acquire a new WB mmaping of the buffer object and pull - * it into the CPU cache, keep using the WC mmap that we have for writes, - * and use the magic movntd instructions instead. - */ - map_flags |= MAP_COHERENT; - memcpy_fn = (mem_copy_fn) _mesa_streaming_load_memcpy; - } -#endif - - void *map = brw_bo_map(brw, intel_obj->buffer, map_flags); - if (unlikely(!map)) { - _mesa_error_no_memory(__func__); - return; - } - memcpy_fn(data, map + offset, size); - brw_bo_unmap(intel_obj->buffer); - - mark_buffer_inactive(intel_obj); -} - - -/** - * The MapBufferRange() driver hook. - * - * This implements both glMapBufferRange() and glMapBuffer(). - * - * The goal of this extension is to allow apps to accumulate their rendering - * at the same time as they accumulate their buffer object. Without it, - * you'd end up blocking on execution of rendering every time you mapped - * the buffer to put new data in. - * - * We support it in 3 ways: If unsynchronized, then don't bother - * flushing the batchbuffer before mapping the buffer, which can save blocking - * in many cases. If we would still block, and they allow the whole buffer - * to be invalidated, then just allocate a new buffer to replace the old one. - * If not, and we'd block, and they allow the subrange of the buffer to be - * invalidated, then we can make a new little BO, let them write into that, - * and blit it into the real BO at unmap time. - */ -static void * -brw_map_buffer_range(struct gl_context *ctx, - GLintptr offset, GLsizeiptr length, - GLbitfield access, struct gl_buffer_object *obj, - gl_map_buffer_index index) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_buffer_object *intel_obj = brw_buffer_object(obj); - - assert(intel_obj); - - STATIC_ASSERT(GL_MAP_UNSYNCHRONIZED_BIT == MAP_ASYNC); - STATIC_ASSERT(GL_MAP_WRITE_BIT == MAP_WRITE); - STATIC_ASSERT(GL_MAP_READ_BIT == MAP_READ); - STATIC_ASSERT(GL_MAP_PERSISTENT_BIT == MAP_PERSISTENT); - STATIC_ASSERT(GL_MAP_COHERENT_BIT == MAP_COHERENT); - assert((access & MAP_INTERNAL_MASK) == 0); - - /* _mesa_MapBufferRange (GL entrypoint) sets these, but the vbo module also - * internally uses our functions directly. - */ - obj->Mappings[index].Offset = offset; - obj->Mappings[index].Length = length; - obj->Mappings[index].AccessFlags = access; - - if (intel_obj->buffer == NULL) { - obj->Mappings[index].Pointer = NULL; - return NULL; - } - - /* If the access is synchronized (like a normal buffer mapping), then get - * things flushed out so the later mapping syncs appropriately through GEM. - * If the user doesn't care about existing buffer contents and mapping would - * cause us to block, then throw out the old buffer. - * - * If they set INVALIDATE_BUFFER, we can pitch the current contents to - * achieve the required synchronization. - */ - if (!(access & GL_MAP_UNSYNCHRONIZED_BIT)) { - if (brw_batch_references(&brw->batch, intel_obj->buffer)) { - if (access & GL_MAP_INVALIDATE_BUFFER_BIT) { - brw_bo_unreference(intel_obj->buffer); - alloc_buffer_object(brw, intel_obj); - } else { - perf_debug("Stalling on the GPU for mapping a busy buffer " - "object\n"); - brw_batch_flush(brw); - } - } else if (brw_bo_busy(intel_obj->buffer) && - (access & GL_MAP_INVALIDATE_BUFFER_BIT)) { - brw_bo_unreference(intel_obj->buffer); - alloc_buffer_object(brw, intel_obj); - } - } - - if (access & MAP_WRITE) - mark_buffer_valid_data(intel_obj, offset, length); - - /* If the user is mapping a range of an active buffer object but - * doesn't require the current contents of that range, make a new - * BO, and we'll copy what they put in there out at unmap or - * FlushRange time. - * - * That is, unless they're looking for a persistent mapping -- we would - * need to do blits in the MemoryBarrier call, and it's easier to just do a - * GPU stall and do a mapping. - */ - if (!(access & (GL_MAP_UNSYNCHRONIZED_BIT | GL_MAP_PERSISTENT_BIT)) && - (access & GL_MAP_INVALIDATE_RANGE_BIT) && - brw_bo_busy(intel_obj->buffer)) { - /* Ensure that the base alignment of the allocation meets the alignment - * guarantees the driver has advertised to the application. - */ - const unsigned alignment = ctx->Const.MinMapBufferAlignment; - - intel_obj->map_extra[index] = (uintptr_t) offset % alignment; - intel_obj->range_map_bo[index] = - brw_bo_alloc(brw->bufmgr, "BO blit temp", - length + intel_obj->map_extra[index], - BRW_MEMZONE_OTHER); - void *map = brw_bo_map(brw, intel_obj->range_map_bo[index], access); - obj->Mappings[index].Pointer = map + intel_obj->map_extra[index]; - return obj->Mappings[index].Pointer; - } - - void *map = brw_bo_map(brw, intel_obj->buffer, access); - if (!(access & GL_MAP_UNSYNCHRONIZED_BIT)) { - mark_buffer_inactive(intel_obj); - } - - obj->Mappings[index].Pointer = map + offset; - return obj->Mappings[index].Pointer; -} - -/** - * The FlushMappedBufferRange() driver hook. - * - * Implements glFlushMappedBufferRange(), which signifies that modifications - * have been made to a range of a mapped buffer, and it should be flushed. - * - * This is only used for buffers mapped with GL_MAP_FLUSH_EXPLICIT_BIT. - * - * Ideally we'd use a BO to avoid taking up cache space for the temporary - * data, but FlushMappedBufferRange may be followed by further writes to - * the pointer, so we would have to re-map after emitting our blit, which - * would defeat the point. - */ -static void -brw_flush_mapped_buffer_range(struct gl_context *ctx, - GLintptr offset, GLsizeiptr length, - struct gl_buffer_object *obj, - gl_map_buffer_index index) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_buffer_object *intel_obj = brw_buffer_object(obj); - - assert(obj->Mappings[index].AccessFlags & GL_MAP_FLUSH_EXPLICIT_BIT); - - /* If we gave a direct mapping of the buffer instead of using a temporary, - * then there's nothing to do. - */ - if (intel_obj->range_map_bo[index] == NULL) - return; - - if (length == 0) - return; - - /* Note that we're not unmapping our buffer while executing the blit. We - * need to have a mapping still at the end of this call, since the user - * gets to make further modifications and glFlushMappedBufferRange() calls. - * This is safe, because: - * - * - On LLC platforms, we're using a CPU mapping that's coherent with the - * GPU (except for the render caches), so the kernel doesn't need to do - * any flushing work for us except for what happens at batch exec time - * anyway. - * - * - On non-LLC platforms, we're using a GTT mapping that writes directly - * to system memory (except for the chipset cache that gets flushed at - * batch exec time). - * - * In both cases we don't need to stall for the previous blit to complete - * so we can re-map (and we definitely don't want to, since that would be - * slow): If the user edits a part of their buffer that's previously been - * blitted, then our lack of synchoronization is fine, because either - * they'll get some too-new data in the first blit and not do another blit - * of that area (but in that case the results are undefined), or they'll do - * another blit of that area and the complete newer data will land the - * second time. - */ - brw_blorp_copy_buffers(brw, - intel_obj->range_map_bo[index], - intel_obj->map_extra[index] + offset, - intel_obj->buffer, - obj->Mappings[index].Offset + offset, - length); - mark_buffer_gpu_usage(intel_obj, - obj->Mappings[index].Offset + offset, - length); - brw_emit_mi_flush(brw); -} - - -/** - * The UnmapBuffer() driver hook. - * - * Implements glUnmapBuffer(). - */ -static GLboolean -brw_unmap_buffer(struct gl_context *ctx, - struct gl_buffer_object *obj, - gl_map_buffer_index index) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_buffer_object *intel_obj = brw_buffer_object(obj); - - assert(intel_obj); - assert(obj->Mappings[index].Pointer); - if (intel_obj->range_map_bo[index] != NULL) { - brw_bo_unmap(intel_obj->range_map_bo[index]); - - if (!(obj->Mappings[index].AccessFlags & GL_MAP_FLUSH_EXPLICIT_BIT)) { - brw_blorp_copy_buffers(brw, - intel_obj->range_map_bo[index], - intel_obj->map_extra[index], - intel_obj->buffer, obj->Mappings[index].Offset, - obj->Mappings[index].Length); - mark_buffer_gpu_usage(intel_obj, obj->Mappings[index].Offset, - obj->Mappings[index].Length); - brw_emit_mi_flush(brw); - } - - /* Since we've emitted some blits to buffers that will (likely) be used - * in rendering operations in other cache domains in this batch, emit a - * flush. Once again, we wish for a domain tracker in libdrm to cover - * usage inside of a batchbuffer. - */ - - brw_bo_unreference(intel_obj->range_map_bo[index]); - intel_obj->range_map_bo[index] = NULL; - } else if (intel_obj->buffer != NULL) { - brw_bo_unmap(intel_obj->buffer); - } - obj->Mappings[index].Pointer = NULL; - obj->Mappings[index].Offset = 0; - obj->Mappings[index].Length = 0; - - return true; -} - -/** - * Gets a pointer to the object's BO, and marks the given range as being used - * on the GPU. - * - * Anywhere that uses buffer objects in the pipeline should be using this to - * mark the range of the buffer that is being accessed by the pipeline. - */ -struct brw_bo * -brw_bufferobj_buffer(struct brw_context *brw, - struct brw_buffer_object *intel_obj, - uint32_t offset, uint32_t size, bool write) -{ - /* This is needed so that things like transform feedback and texture buffer - * objects that need a BO but don't want to check that they exist for - * draw-time validation can just always get a BO from a GL buffer object. - */ - if (intel_obj->buffer == NULL) - alloc_buffer_object(brw, intel_obj); - - mark_buffer_gpu_usage(intel_obj, offset, size); - - /* If writing, (conservatively) mark this section as having valid data. */ - if (write) - mark_buffer_valid_data(intel_obj, offset, size); - - return intel_obj->buffer; -} - -/** - * The CopyBufferSubData() driver hook. - * - * Implements glCopyBufferSubData(), which copies a portion of one buffer - * object's data to another. Independent source and destination offsets - * are allowed. - */ -static void -brw_copy_buffer_subdata(struct gl_context *ctx, - struct gl_buffer_object *src, - struct gl_buffer_object *dst, - GLintptr read_offset, GLintptr write_offset, - GLsizeiptr size) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_buffer_object *intel_src = brw_buffer_object(src); - struct brw_buffer_object *intel_dst = brw_buffer_object(dst); - struct brw_bo *src_bo, *dst_bo; - - if (size == 0) - return; - - dst_bo = brw_bufferobj_buffer(brw, intel_dst, write_offset, size, true); - src_bo = brw_bufferobj_buffer(brw, intel_src, read_offset, size, false); - - brw_blorp_copy_buffers(brw, - src_bo, read_offset, - dst_bo, write_offset, size); - - /* Since we've emitted some blits to buffers that will (likely) be used - * in rendering operations in other cache domains in this batch, emit a - * flush. Once again, we wish for a domain tracker in libdrm to cover - * usage inside of a batchbuffer. - */ - brw_emit_mi_flush(brw); -} - -void -brw_init_buffer_object_functions(struct dd_function_table *functions) -{ - functions->NewBufferObject = brw_new_buffer_object; - functions->DeleteBuffer = brw_delete_buffer; - functions->BufferData = brw_buffer_data; - functions->BufferDataMem = brw_buffer_data_mem; - functions->BufferSubData = brw_buffer_subdata; - functions->GetBufferSubData = brw_get_buffer_subdata; - functions->MapBufferRange = brw_map_buffer_range; - functions->FlushMappedBufferRange = brw_flush_mapped_buffer_range; - functions->UnmapBuffer = brw_unmap_buffer; - functions->CopyBufferSubData = brw_copy_buffer_subdata; -} diff --git a/src/mesa/drivers/dri/i965/brw_buffer_objects.h b/src/mesa/drivers/dri/i965/brw_buffer_objects.h deleted file mode 100644 index 3ed0930..0000000 --- a/src/mesa/drivers/dri/i965/brw_buffer_objects.h +++ /dev/null @@ -1,141 +0,0 @@ -/* - * Copyright 2005 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef BRW_BUFFEROBJ_H -#define BRW_BUFFEROBJ_H - -#include "main/mtypes.h" - -struct brw_context; -struct gl_buffer_object; - - -/** - * Intel vertex/pixel buffer object, derived from Mesa's gl_buffer_object. - */ -struct brw_buffer_object -{ - struct gl_buffer_object Base; - struct brw_bo *buffer; /* the low-level buffer manager's buffer handle */ - - struct brw_bo *range_map_bo[MAP_COUNT]; - - /** - * Alignment offset from the range_map_bo temporary mapping to the returned - * obj->Pointer (caused by GL_ARB_map_buffer_alignment). - */ - unsigned map_extra[MAP_COUNT]; - - /** @{ - * Tracking for what range of the BO may currently be in use by the GPU. - * - * Users often want to either glBufferSubData() or glMapBufferRange() a - * buffer object where some subset of it is busy on the GPU, without either - * stalling or doing an extra blit (since our blits are extra expensive, - * given that we have to reupload most of the 3D state when switching - * rings). We wish they'd just use glMapBufferRange() with the - * UNSYNC|INVALIDATE_RANGE flag or the INVALIDATE_BUFFER flag, but lots - * don't. - * - * To work around apps, we track what range of the BO we might have used on - * the GPU as vertex data, tranform feedback output, buffer textures, etc., - * and just do glBufferSubData() with an unsynchronized map when they're - * outside of that range. - * - * If gpu_active_start > gpu_active_end, then the GPU is not currently - * accessing the BO (and we can map it without synchronization). - */ - uint32_t gpu_active_start; - uint32_t gpu_active_end; - - /** @{ - * Tracking for what range of the BO may contain valid data. - * - * Users may create a large buffer object and only fill part of it - * with valid data. This is a conservative estimate of what part - * of the buffer contains valid data that we have to preserve. - */ - uint32_t valid_data_start; - uint32_t valid_data_end; - /** @} */ - - /** - * If we've avoided stalls/blits using the active tracking, flag the buffer - * for (occasional) stalling in the future to avoid getting stuck in a - * cycle of blitting on buffer wraparound. - */ - bool prefer_stall_to_blit; - /** @} */ -}; - - -/* Get the bm buffer associated with a GL bufferobject: - */ -struct brw_bo *brw_bufferobj_buffer(struct brw_context *brw, - struct brw_buffer_object *obj, - uint32_t offset, - uint32_t size, - bool write); - -void brw_upload_data(struct brw_uploader *upload, - const void *data, - uint32_t size, - uint32_t alignment, - struct brw_bo **out_bo, - uint32_t *out_offset); - -void *brw_upload_space(struct brw_uploader *upload, - uint32_t size, - uint32_t alignment, - struct brw_bo **out_bo, - uint32_t *out_offset); - -void brw_upload_finish(struct brw_uploader *upload); -void brw_upload_init(struct brw_uploader *upload, - struct brw_bufmgr *bufmgr, - unsigned default_size); - -/* Hook the bufferobject implementation into mesa: - */ -void brw_init_buffer_object_functions(struct dd_function_table *functions); - -static inline struct brw_buffer_object * -brw_buffer_object(struct gl_buffer_object *obj) -{ - return (struct brw_buffer_object *) obj; -} - -struct brw_memory_object { - struct gl_memory_object Base; - struct brw_bo *bo; -}; - -static inline struct brw_memory_object * -brw_memory_object(struct gl_memory_object *obj) -{ - return (struct brw_memory_object *)obj; -} - -#endif diff --git a/src/mesa/drivers/dri/i965/brw_buffers.c b/src/mesa/drivers/dri/i965/brw_buffers.c deleted file mode 100644 index 55b6925..0000000 --- a/src/mesa/drivers/dri/i965/brw_buffers.c +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright 2003 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "brw_context.h" -#include "brw_buffers.h" -#include "brw_fbo.h" -#include "brw_mipmap_tree.h" - -#include "main/fbobject.h" -#include "main/framebuffer.h" -#include "main/renderbuffer.h" - -static void -brw_drawbuffer(struct gl_context *ctx) -{ - if (_mesa_is_front_buffer_drawing(ctx->DrawBuffer)) { - struct brw_context *const brw = brw_context(ctx); - - /* If we might be front-buffer rendering on this buffer for the first - * time, invalidate our DRI drawable so we'll ask for new buffers - * (including the fake front) before we start rendering again. - */ - if (brw->driContext->driDrawablePriv) - dri2InvalidateDrawable(brw->driContext->driDrawablePriv); - brw_prepare_render(brw); - } -} - - -static void -brw_readbuffer(struct gl_context * ctx, GLenum mode) -{ - if (_mesa_is_front_buffer_reading(ctx->ReadBuffer)) { - struct brw_context *const brw = brw_context(ctx); - - /* If we might be front-buffer reading on this buffer for the first - * time, invalidate our DRI drawable so we'll ask for new buffers - * (including the fake front) before we start reading again. - */ - if (brw->driContext->driDrawablePriv) - dri2InvalidateDrawable(brw->driContext->driReadablePriv); - brw_prepare_render(brw); - } -} - - -void -brw_init_buffer_functions(struct dd_function_table *functions) -{ - functions->DrawBuffer = brw_drawbuffer; - functions->ReadBuffer = brw_readbuffer; -} diff --git a/src/mesa/drivers/dri/i965/brw_buffers.h b/src/mesa/drivers/dri/i965/brw_buffers.h deleted file mode 100644 index 37c385f..0000000 --- a/src/mesa/drivers/dri/i965/brw_buffers.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright 2006 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef BRW_BUFFERS_H -#define BRW_BUFFERS_H - -#include "dri_util.h" -#include "drm-uapi/drm.h" -#include "brw_context.h" - -extern void brw_init_buffer_functions(struct dd_function_table *functions); - -#endif /* BRW_BUFFERS_H */ diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c b/src/mesa/drivers/dri/i965/brw_bufmgr.c deleted file mode 100644 index b62d213..0000000 --- a/src/mesa/drivers/dri/i965/brw_bufmgr.c +++ /dev/null @@ -1,1967 +0,0 @@ -/* - * Copyright © 2007 Red Hat Inc. - * Copyright © 2007-2017 Intel Corporation - * Copyright © 2006 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -/* - * Authors: Thomas Hellström - * Keith Whitwell - * Eric Anholt - * Dave Airlie - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "errno.h" -#include "common/intel_clflush.h" -#include "dev/intel_debug.h" -#include "common/intel_gem.h" -#include "dev/intel_device_info.h" -#include "libdrm_macros.h" -#include "main/macros.h" -#include "util/macros.h" -#include "util/hash_table.h" -#include "util/list.h" -#include "util/os_file.h" -#include "util/u_dynarray.h" -#include "util/vma.h" -#include "brw_bufmgr.h" -#include "brw_context.h" -#include "string.h" - -#include "drm-uapi/i915_drm.h" - -#ifdef HAVE_VALGRIND -#include -#include -#define VG(x) x -#else -#define VG(x) -#endif - -/* Bufmgr is not aware of brw_context. */ -#undef WARN_ONCE -#define WARN_ONCE(cond, fmt...) do { \ - if (unlikely(cond)) { \ - static bool _warned = false; \ - if (!_warned) { \ - fprintf(stderr, "WARNING: "); \ - fprintf(stderr, fmt); \ - _warned = true; \ - } \ - } \ -} while (0) - - -/* VALGRIND_FREELIKE_BLOCK unfortunately does not actually undo the earlier - * VALGRIND_MALLOCLIKE_BLOCK but instead leaves vg convinced the memory is - * leaked. All because it does not call VG(cli_free) from its - * VG_USERREQ__FREELIKE_BLOCK handler. Instead of treating the memory like - * and allocation, we mark it available for use upon mmapping and remove - * it upon unmapping. - */ -#define VG_DEFINED(ptr, size) VG(VALGRIND_MAKE_MEM_DEFINED(ptr, size)) -#define VG_NOACCESS(ptr, size) VG(VALGRIND_MAKE_MEM_NOACCESS(ptr, size)) - -/* On FreeBSD PAGE_SIZE is already defined in - * /usr/include/machine/param.h that is indirectly - * included here. - */ -#ifndef PAGE_SIZE -#define PAGE_SIZE 4096 -#endif - -#define FILE_DEBUG_FLAG DEBUG_BUFMGR - -static inline int -atomic_add_unless(int *v, int add, int unless) -{ - int c, old; - c = p_atomic_read(v); - while (c != unless && (old = p_atomic_cmpxchg(v, c, c + add)) != c) - c = old; - return c == unless; -} - -/** - * i965 fixed-size bucketing VMA allocator. - * - * The BO cache maintains "cache buckets" for buffers of various sizes. - * All buffers in a given bucket are identically sized - when allocating, - * we always round up to the bucket size. This means that virtually all - * allocations are fixed-size; only buffers which are too large to fit in - * a bucket can be variably-sized. - * - * We create an allocator for each bucket. Each contains a free-list, where - * each node contains a pair. Each bit - * represents a bucket-sized block of memory. (At the first level, each - * bit corresponds to a page. For the second bucket, bits correspond to - * two pages, and so on.) 1 means a block is free, and 0 means it's in-use. - * The lowest bit in the bitmap is for the first block. - * - * This makes allocations cheap - any bit of any node will do. We can pick - * the head of the list and use ffs() to find a free block. If there are - * none, we allocate 64 blocks from a larger allocator - either a bigger - * bucketing allocator, or a fallback top-level allocator for large objects. - */ -struct vma_bucket_node { - uint64_t start_address; - uint64_t bitmap; -}; - -struct bo_cache_bucket { - /** List of cached BOs. */ - struct list_head head; - - /** Size of this bucket, in bytes. */ - uint64_t size; - - /** List of vma_bucket_nodes. */ - struct util_dynarray vma_list[BRW_MEMZONE_COUNT]; -}; - -struct bo_export { - /** File descriptor associated with a handle export. */ - int drm_fd; - - /** GEM handle in drm_fd */ - uint32_t gem_handle; - - struct list_head link; -}; - -struct brw_bufmgr { - uint32_t refcount; - - struct list_head link; - - int fd; - - mtx_t lock; - - /** Array of lists of cached gem objects of power-of-two sizes */ - struct bo_cache_bucket cache_bucket[14 * 4]; - int num_buckets; - time_t time; - - struct hash_table *name_table; - struct hash_table *handle_table; - - struct util_vma_heap vma_allocator[BRW_MEMZONE_COUNT]; - - bool has_llc:1; - bool has_mmap_wc:1; - bool has_mmap_offset:1; - bool bo_reuse:1; - - uint64_t initial_kflags; -}; - -static mtx_t global_bufmgr_list_mutex = _MTX_INITIALIZER_NP; -static struct list_head global_bufmgr_list = { - .next = &global_bufmgr_list, - .prev = &global_bufmgr_list, -}; - -static int bo_set_tiling_internal(struct brw_bo *bo, uint32_t tiling_mode, - uint32_t stride); - -static void bo_free(struct brw_bo *bo); - -static uint64_t vma_alloc(struct brw_bufmgr *bufmgr, - enum brw_memory_zone memzone, - uint64_t size, uint64_t alignment); - -static struct brw_bo * -hash_find_bo(struct hash_table *ht, unsigned int key) -{ - struct hash_entry *entry = _mesa_hash_table_search(ht, &key); - return entry ? (struct brw_bo *) entry->data : NULL; -} - -static uint64_t -bo_tile_size(struct brw_bufmgr *bufmgr, uint64_t size, uint32_t tiling) -{ - if (tiling == I915_TILING_NONE) - return size; - - /* 965+ just need multiples of page size for tiling */ - return ALIGN(size, PAGE_SIZE); -} - -/* - * Round a given pitch up to the minimum required for X tiling on a - * given chip. We use 512 as the minimum to allow for a later tiling - * change. - */ -static uint32_t -bo_tile_pitch(struct brw_bufmgr *bufmgr, uint32_t pitch, uint32_t tiling) -{ - unsigned long tile_width; - - /* If untiled, then just align it so that we can do rendering - * to it with the 3D engine. - */ - if (tiling == I915_TILING_NONE) - return ALIGN(pitch, 64); - - if (tiling == I915_TILING_X) - tile_width = 512; - else - tile_width = 128; - - /* 965 is flexible */ - return ALIGN(pitch, tile_width); -} - -/** - * This function finds the correct bucket fit for the input size. - * The function works with O(1) complexity when the requested size - * was queried instead of iterating the size through all the buckets. - */ -static struct bo_cache_bucket * -bucket_for_size(struct brw_bufmgr *bufmgr, uint64_t size) -{ - /* Calculating the pages and rounding up to the page size. */ - const unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE; - - /* Row Bucket sizes clz((x-1) | 3) Row Column - * in pages stride size - * 0: 1 2 3 4 -> 30 30 30 30 4 1 - * 1: 5 6 7 8 -> 29 29 29 29 4 1 - * 2: 10 12 14 16 -> 28 28 28 28 8 2 - * 3: 20 24 28 32 -> 27 27 27 27 16 4 - */ - const unsigned row = 30 - __builtin_clz((pages - 1) | 3); - const unsigned row_max_pages = 4 << row; - - /* The '& ~2' is the special case for row 1. In row 1, max pages / - * 2 is 2, but the previous row maximum is zero (because there is - * no previous row). All row maximum sizes are power of 2, so that - * is the only case where that bit will be set. - */ - const unsigned prev_row_max_pages = (row_max_pages / 2) & ~2; - int col_size_log2 = row - 1; - col_size_log2 += (col_size_log2 < 0); - - const unsigned col = (pages - prev_row_max_pages + - ((1 << col_size_log2) - 1)) >> col_size_log2; - - /* Calculating the index based on the row and column. */ - const unsigned index = (row * 4) + (col - 1); - - return (index < bufmgr->num_buckets) ? - &bufmgr->cache_bucket[index] : NULL; -} - -static enum brw_memory_zone -memzone_for_address(uint64_t address) -{ - const uint64_t _4GB = 1ull << 32; - - if (address >= _4GB) - return BRW_MEMZONE_OTHER; - - return BRW_MEMZONE_LOW_4G; -} - -static uint64_t -bucket_vma_alloc(struct brw_bufmgr *bufmgr, - struct bo_cache_bucket *bucket, - enum brw_memory_zone memzone) -{ - struct util_dynarray *vma_list = &bucket->vma_list[memzone]; - struct vma_bucket_node *node; - - if (vma_list->size == 0) { - /* This bucket allocator is out of space - allocate a new block of - * memory for 64 blocks from a larger allocator (either a larger - * bucket or util_vma). - * - * We align the address to the node size (64 blocks) so that - * bucket_vma_free can easily compute the starting address of this - * block by rounding any address we return down to the node size. - * - * Set the first bit used, and return the start address. - */ - uint64_t node_size = 64ull * bucket->size; - node = util_dynarray_grow(vma_list, struct vma_bucket_node, 1); - - if (unlikely(!node)) - return 0ull; - - uint64_t addr = vma_alloc(bufmgr, memzone, node_size, node_size); - node->start_address = intel_48b_address(addr); - node->bitmap = ~1ull; - return node->start_address; - } - - /* Pick any bit from any node - they're all the right size and free. */ - node = util_dynarray_top_ptr(vma_list, struct vma_bucket_node); - int bit = ffsll(node->bitmap) - 1; - assert(bit >= 0 && bit <= 63); - - /* Reserve the memory by clearing the bit. */ - assert((node->bitmap & (1ull << bit)) != 0ull); - node->bitmap &= ~(1ull << bit); - - uint64_t addr = node->start_address + bit * bucket->size; - - /* If this node is now completely full, remove it from the free list. */ - if (node->bitmap == 0ull) { - (void) util_dynarray_pop(vma_list, struct vma_bucket_node); - } - - return addr; -} - -static void -bucket_vma_free(struct bo_cache_bucket *bucket, uint64_t address) -{ - enum brw_memory_zone memzone = memzone_for_address(address); - struct util_dynarray *vma_list = &bucket->vma_list[memzone]; - const uint64_t node_bytes = 64ull * bucket->size; - struct vma_bucket_node *node = NULL; - - /* bucket_vma_alloc allocates 64 blocks at a time, and aligns it to - * that 64 block size. So, we can round down to get the starting address. - */ - uint64_t start = (address / node_bytes) * node_bytes; - - /* Dividing the offset from start by bucket size gives us the bit index. */ - int bit = (address - start) / bucket->size; - - assert(start + bit * bucket->size == address); - - util_dynarray_foreach(vma_list, struct vma_bucket_node, cur) { - if (cur->start_address == start) { - node = cur; - break; - } - } - - if (!node) { - /* No node - the whole group of 64 blocks must have been in-use. */ - node = util_dynarray_grow(vma_list, struct vma_bucket_node, 1); - - if (unlikely(!node)) - return; /* bogus, leaks some GPU VMA, but nothing we can do... */ - - node->start_address = start; - node->bitmap = 0ull; - } - - /* Set the bit to return the memory. */ - assert((node->bitmap & (1ull << bit)) == 0ull); - node->bitmap |= 1ull << bit; - - /* The block might be entirely free now, and if so, we could return it - * to the larger allocator. But we may as well hang on to it, in case - * we get more allocations at this block size. - */ -} - -static struct bo_cache_bucket * -get_bucket_allocator(struct brw_bufmgr *bufmgr, uint64_t size) -{ - /* Skip using the bucket allocator for very large sizes, as it allocates - * 64 of them and this can balloon rather quickly. - */ - if (size > 1024 * PAGE_SIZE) - return NULL; - - struct bo_cache_bucket *bucket = bucket_for_size(bufmgr, size); - - if (bucket && bucket->size == size) - return bucket; - - return NULL; -} - -/** - * Allocate a section of virtual memory for a buffer, assigning an address. - * - * This uses either the bucket allocator for the given size, or the large - * object allocator (util_vma). - */ -static uint64_t -vma_alloc(struct brw_bufmgr *bufmgr, - enum brw_memory_zone memzone, - uint64_t size, - uint64_t alignment) -{ - /* Without softpin support, we let the kernel assign addresses. */ - assert(brw_using_softpin(bufmgr)); - - alignment = ALIGN(alignment, PAGE_SIZE); - - struct bo_cache_bucket *bucket = get_bucket_allocator(bufmgr, size); - uint64_t addr; - - if (bucket) { - addr = bucket_vma_alloc(bufmgr, bucket, memzone); - } else { - addr = util_vma_heap_alloc(&bufmgr->vma_allocator[memzone], size, - alignment); - } - - assert((addr >> 48ull) == 0); - assert((addr % alignment) == 0); - - return intel_canonical_address(addr); -} - -/** - * Free a virtual memory area, allowing the address to be reused. - */ -static void -vma_free(struct brw_bufmgr *bufmgr, - uint64_t address, - uint64_t size) -{ - assert(brw_using_softpin(bufmgr)); - - /* Un-canonicalize the address. */ - address = intel_48b_address(address); - - if (address == 0ull) - return; - - struct bo_cache_bucket *bucket = get_bucket_allocator(bufmgr, size); - - if (bucket) { - bucket_vma_free(bucket, address); - } else { - enum brw_memory_zone memzone = memzone_for_address(address); - util_vma_heap_free(&bufmgr->vma_allocator[memzone], address, size); - } -} - -int -brw_bo_busy(struct brw_bo *bo) -{ - struct brw_bufmgr *bufmgr = bo->bufmgr; - struct drm_i915_gem_busy busy = { .handle = bo->gem_handle }; - - int ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_BUSY, &busy); - if (ret == 0) { - bo->idle = !busy.busy; - return busy.busy; - } - return false; -} - -int -brw_bo_madvise(struct brw_bo *bo, int state) -{ - struct drm_i915_gem_madvise madv = { - .handle = bo->gem_handle, - .madv = state, - .retained = 1, - }; - - drmIoctl(bo->bufmgr->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv); - - return madv.retained; -} - -/* drop the oldest entries that have been purged by the kernel */ -static void -brw_bo_cache_purge_bucket(struct brw_bufmgr *bufmgr, - struct bo_cache_bucket *bucket) -{ - list_for_each_entry_safe(struct brw_bo, bo, &bucket->head, head) { - if (brw_bo_madvise(bo, I915_MADV_DONTNEED)) - break; - - list_del(&bo->head); - bo_free(bo); - } -} - -static struct brw_bo * -bo_calloc(void) -{ - struct brw_bo *bo = calloc(1, sizeof(*bo)); - if (!bo) - return NULL; - - list_inithead(&bo->exports); - - return bo; -} - -static struct brw_bo * -bo_alloc_internal(struct brw_bufmgr *bufmgr, - const char *name, - uint64_t size, - enum brw_memory_zone memzone, - unsigned flags, - uint32_t tiling_mode, - uint32_t stride) -{ - struct brw_bo *bo; - int ret; - struct bo_cache_bucket *bucket; - bool alloc_from_cache; - uint64_t bo_size; - bool busy = false; - bool zeroed = false; - - if (flags & BO_ALLOC_BUSY) - busy = true; - - if (flags & BO_ALLOC_ZEROED) - zeroed = true; - - /* BUSY does doesn't really jive with ZEROED as we have to wait for it to - * be idle before we can memset. Just disallow that combination. - */ - assert(!(busy && zeroed)); - - /* Round the allocated size up to a power of two number of pages. */ - bucket = bucket_for_size(bufmgr, size); - - /* If we don't have caching at this size, don't actually round the - * allocation up. - */ - if (bucket == NULL) { - unsigned int page_size = getpagesize(); - bo_size = size == 0 ? page_size : ALIGN(size, page_size); - } else { - bo_size = bucket->size; - } - assert(bo_size); - - mtx_lock(&bufmgr->lock); - /* Get a buffer out of the cache if available */ -retry: - alloc_from_cache = false; - if (bucket != NULL && !list_is_empty(&bucket->head)) { - if (busy && !zeroed) { - /* Allocate new render-target BOs from the tail (MRU) - * of the list, as it will likely be hot in the GPU - * cache and in the aperture for us. If the caller - * asked us to zero the buffer, we don't want this - * because we are going to mmap it. - */ - bo = LIST_ENTRY(struct brw_bo, bucket->head.prev, head); - list_del(&bo->head); - alloc_from_cache = true; - } else { - /* For non-render-target BOs (where we're probably - * going to map it first thing in order to fill it - * with data), check if the last BO in the cache is - * unbusy, and only reuse in that case. Otherwise, - * allocating a new buffer is probably faster than - * waiting for the GPU to finish. - */ - bo = LIST_ENTRY(struct brw_bo, bucket->head.next, head); - if (!brw_bo_busy(bo)) { - alloc_from_cache = true; - list_del(&bo->head); - } - } - - if (alloc_from_cache) { - assert(list_is_empty(&bo->exports)); - if (!brw_bo_madvise(bo, I915_MADV_WILLNEED)) { - bo_free(bo); - brw_bo_cache_purge_bucket(bufmgr, bucket); - goto retry; - } - - if (bo_set_tiling_internal(bo, tiling_mode, stride)) { - bo_free(bo); - goto retry; - } - - if (zeroed) { - void *map = brw_bo_map(NULL, bo, MAP_WRITE | MAP_RAW); - if (!map) { - bo_free(bo); - goto retry; - } - memset(map, 0, bo_size); - } - } - } - - if (alloc_from_cache) { - /* If the cache BO isn't in the right memory zone, free the old - * memory and assign it a new address. - */ - if ((bo->kflags & EXEC_OBJECT_PINNED) && - memzone != memzone_for_address(bo->gtt_offset)) { - vma_free(bufmgr, bo->gtt_offset, bo->size); - bo->gtt_offset = 0ull; - } - } else { - bo = bo_calloc(); - if (!bo) - goto err; - - bo->size = bo_size; - bo->idle = true; - - struct drm_i915_gem_create create = { .size = bo_size }; - - /* All new BOs we get from the kernel are zeroed, so we don't need to - * worry about that here. - */ - ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CREATE, &create); - if (ret != 0) { - free(bo); - goto err; - } - - bo->gem_handle = create.handle; - - bo->bufmgr = bufmgr; - - bo->tiling_mode = I915_TILING_NONE; - bo->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; - bo->stride = 0; - - if (bo_set_tiling_internal(bo, tiling_mode, stride)) - goto err_free; - - /* Calling set_domain() will allocate pages for the BO outside of the - * struct mutex lock in the kernel, which is more efficient than waiting - * to create them during the first execbuf that uses the BO. - */ - struct drm_i915_gem_set_domain sd = { - .handle = bo->gem_handle, - .read_domains = I915_GEM_DOMAIN_CPU, - .write_domain = 0, - }; - - if (drmIoctl(bo->bufmgr->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &sd) != 0) - goto err_free; - } - - bo->name = name; - p_atomic_set(&bo->refcount, 1); - bo->reusable = true; - bo->cache_coherent = bufmgr->has_llc; - bo->index = -1; - bo->kflags = bufmgr->initial_kflags; - - if ((bo->kflags & EXEC_OBJECT_PINNED) && bo->gtt_offset == 0ull) { - bo->gtt_offset = vma_alloc(bufmgr, memzone, bo->size, 1); - - if (bo->gtt_offset == 0ull) - goto err_free; - } - - mtx_unlock(&bufmgr->lock); - - DBG("bo_create: buf %d (%s) %llub\n", bo->gem_handle, bo->name, - (unsigned long long) size); - - return bo; - -err_free: - bo_free(bo); -err: - mtx_unlock(&bufmgr->lock); - return NULL; -} - -struct brw_bo * -brw_bo_alloc(struct brw_bufmgr *bufmgr, - const char *name, uint64_t size, - enum brw_memory_zone memzone) -{ - return bo_alloc_internal(bufmgr, name, size, memzone, - 0, I915_TILING_NONE, 0); -} - -struct brw_bo * -brw_bo_alloc_tiled(struct brw_bufmgr *bufmgr, const char *name, - uint64_t size, enum brw_memory_zone memzone, - uint32_t tiling_mode, uint32_t pitch, - unsigned flags) -{ - return bo_alloc_internal(bufmgr, name, size, memzone, - flags, tiling_mode, pitch); -} - -struct brw_bo * -brw_bo_alloc_tiled_2d(struct brw_bufmgr *bufmgr, const char *name, - int x, int y, int cpp, enum brw_memory_zone memzone, - uint32_t tiling, uint32_t *pitch, unsigned flags) -{ - uint64_t size; - uint32_t stride; - unsigned long aligned_y, height_alignment; - - /* If we're tiled, our allocations are in 8 or 32-row blocks, - * so failure to align our height means that we won't allocate - * enough pages. - * - * If we're untiled, we still have to align to 2 rows high - * because the data port accesses 2x2 blocks even if the - * bottom row isn't to be rendered, so failure to align means - * we could walk off the end of the GTT and fault. This is - * documented on 965, and may be the case on older chipsets - * too so we try to be careful. - */ - aligned_y = y; - height_alignment = 2; - - if (tiling == I915_TILING_X) - height_alignment = 8; - else if (tiling == I915_TILING_Y) - height_alignment = 32; - aligned_y = ALIGN(y, height_alignment); - - stride = x * cpp; - stride = bo_tile_pitch(bufmgr, stride, tiling); - size = stride * aligned_y; - size = bo_tile_size(bufmgr, size, tiling); - *pitch = stride; - - if (tiling == I915_TILING_NONE) - stride = 0; - - return bo_alloc_internal(bufmgr, name, size, memzone, - flags, tiling, stride); -} - -/** - * Returns a brw_bo wrapping the given buffer object handle. - * - * This can be used when one application needs to pass a buffer object - * to another. - */ -struct brw_bo * -brw_bo_gem_create_from_name(struct brw_bufmgr *bufmgr, - const char *name, unsigned int handle) -{ - struct brw_bo *bo; - - /* At the moment most applications only have a few named bo. - * For instance, in a DRI client only the render buffers passed - * between X and the client are named. And since X returns the - * alternating names for the front/back buffer a linear search - * provides a sufficiently fast match. - */ - mtx_lock(&bufmgr->lock); - bo = hash_find_bo(bufmgr->name_table, handle); - if (bo) { - brw_bo_reference(bo); - goto out; - } - - struct drm_gem_open open_arg = { .name = handle }; - int ret = drmIoctl(bufmgr->fd, DRM_IOCTL_GEM_OPEN, &open_arg); - if (ret != 0) { - DBG("Couldn't reference %s handle 0x%08x: %s\n", - name, handle, strerror(errno)); - bo = NULL; - goto out; - } - /* Now see if someone has used a prime handle to get this - * object from the kernel before by looking through the list - * again for a matching gem_handle - */ - bo = hash_find_bo(bufmgr->handle_table, open_arg.handle); - if (bo) { - brw_bo_reference(bo); - goto out; - } - - bo = bo_calloc(); - if (!bo) - goto out; - - p_atomic_set(&bo->refcount, 1); - - bo->size = open_arg.size; - bo->gtt_offset = 0; - bo->bufmgr = bufmgr; - bo->gem_handle = open_arg.handle; - bo->name = name; - bo->global_name = handle; - bo->reusable = false; - bo->external = true; - bo->kflags = bufmgr->initial_kflags; - - if (bo->kflags & EXEC_OBJECT_PINNED) - bo->gtt_offset = vma_alloc(bufmgr, BRW_MEMZONE_OTHER, bo->size, 1); - - _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo); - _mesa_hash_table_insert(bufmgr->name_table, &bo->global_name, bo); - - struct drm_i915_gem_get_tiling get_tiling = { .handle = bo->gem_handle }; - ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling); - if (ret != 0) - goto err_unref; - - bo->tiling_mode = get_tiling.tiling_mode; - bo->swizzle_mode = get_tiling.swizzle_mode; - /* XXX stride is unknown */ - DBG("bo_create_from_handle: %d (%s)\n", handle, bo->name); - -out: - mtx_unlock(&bufmgr->lock); - return bo; - -err_unref: - bo_free(bo); - mtx_unlock(&bufmgr->lock); - return NULL; -} - -static void -bo_free(struct brw_bo *bo) -{ - struct brw_bufmgr *bufmgr = bo->bufmgr; - - if (bo->map_cpu) { - VG_NOACCESS(bo->map_cpu, bo->size); - drm_munmap(bo->map_cpu, bo->size); - } - if (bo->map_wc) { - VG_NOACCESS(bo->map_wc, bo->size); - drm_munmap(bo->map_wc, bo->size); - } - if (bo->map_gtt) { - VG_NOACCESS(bo->map_gtt, bo->size); - drm_munmap(bo->map_gtt, bo->size); - } - - if (bo->external) { - struct hash_entry *entry; - - if (bo->global_name) { - entry = _mesa_hash_table_search(bufmgr->name_table, &bo->global_name); - _mesa_hash_table_remove(bufmgr->name_table, entry); - } - - entry = _mesa_hash_table_search(bufmgr->handle_table, &bo->gem_handle); - _mesa_hash_table_remove(bufmgr->handle_table, entry); - } else { - assert(list_is_empty(&bo->exports)); - } - - /* Close this object */ - struct drm_gem_close close = { .handle = bo->gem_handle }; - int ret = drmIoctl(bufmgr->fd, DRM_IOCTL_GEM_CLOSE, &close); - if (ret != 0) { - DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n", - bo->gem_handle, bo->name, strerror(errno)); - } - - if (bo->kflags & EXEC_OBJECT_PINNED) - vma_free(bo->bufmgr, bo->gtt_offset, bo->size); - - free(bo); -} - -/** Frees all cached buffers significantly older than @time. */ -static void -cleanup_bo_cache(struct brw_bufmgr *bufmgr, time_t time) -{ - int i; - - if (bufmgr->time == time) - return; - - for (i = 0; i < bufmgr->num_buckets; i++) { - struct bo_cache_bucket *bucket = &bufmgr->cache_bucket[i]; - - list_for_each_entry_safe(struct brw_bo, bo, &bucket->head, head) { - if (time - bo->free_time <= 1) - break; - - list_del(&bo->head); - - bo_free(bo); - } - } - - bufmgr->time = time; -} - -static void -bo_unreference_final(struct brw_bo *bo, time_t time) -{ - struct brw_bufmgr *bufmgr = bo->bufmgr; - struct bo_cache_bucket *bucket; - - DBG("bo_unreference final: %d (%s)\n", bo->gem_handle, bo->name); - - list_for_each_entry_safe(struct bo_export, export, &bo->exports, link) { - struct drm_gem_close close = { .handle = export->gem_handle }; - intel_ioctl(export->drm_fd, DRM_IOCTL_GEM_CLOSE, &close); - - list_del(&export->link); - free(export); - } - - bucket = bucket_for_size(bufmgr, bo->size); - /* Put the buffer into our internal cache for reuse if we can. */ - if (bufmgr->bo_reuse && bo->reusable && bucket != NULL && - brw_bo_madvise(bo, I915_MADV_DONTNEED)) { - bo->free_time = time; - - bo->name = NULL; - - list_addtail(&bo->head, &bucket->head); - } else { - bo_free(bo); - } -} - -void -brw_bo_unreference(struct brw_bo *bo) -{ - if (bo == NULL) - return; - - assert(p_atomic_read(&bo->refcount) > 0); - - if (atomic_add_unless(&bo->refcount, -1, 1)) { - struct brw_bufmgr *bufmgr = bo->bufmgr; - struct timespec time; - - clock_gettime(CLOCK_MONOTONIC, &time); - - mtx_lock(&bufmgr->lock); - - if (p_atomic_dec_zero(&bo->refcount)) { - bo_unreference_final(bo, time.tv_sec); - cleanup_bo_cache(bufmgr, time.tv_sec); - } - - mtx_unlock(&bufmgr->lock); - } -} - -static void -bo_wait_with_stall_warning(struct brw_context *brw, - struct brw_bo *bo, - const char *action) -{ - bool busy = brw && brw->perf_debug && !bo->idle; - double elapsed = unlikely(busy) ? -get_time() : 0.0; - - brw_bo_wait_rendering(bo); - - if (unlikely(busy)) { - elapsed += get_time(); - if (elapsed > 1e-5) /* 0.01ms */ - perf_debug("%s a busy \"%s\" BO stalled and took %.03f ms.\n", - action, bo->name, elapsed * 1000); - } -} - -static void -print_flags(unsigned flags) -{ - if (flags & MAP_READ) - DBG("READ "); - if (flags & MAP_WRITE) - DBG("WRITE "); - if (flags & MAP_ASYNC) - DBG("ASYNC "); - if (flags & MAP_PERSISTENT) - DBG("PERSISTENT "); - if (flags & MAP_COHERENT) - DBG("COHERENT "); - if (flags & MAP_RAW) - DBG("RAW "); - DBG("\n"); -} - -static void * -brw_bo_gem_mmap_legacy(struct brw_context *brw, struct brw_bo *bo, bool wc) -{ - struct brw_bufmgr *bufmgr = bo->bufmgr; - - struct drm_i915_gem_mmap mmap_arg = { - .handle = bo->gem_handle, - .size = bo->size, - .flags = wc ? I915_MMAP_WC : 0, - }; - - int ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg); - if (ret != 0) { - DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", - __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno)); - return NULL; - } - void *map = (void *) (uintptr_t) mmap_arg.addr_ptr; - - return map; -} - -static void * -brw_bo_gem_mmap_offset(struct brw_context *brw, struct brw_bo *bo, bool wc) -{ - struct brw_bufmgr *bufmgr = bo->bufmgr; - - struct drm_i915_gem_mmap_offset mmap_arg = { - .handle = bo->gem_handle, - .flags = wc ? I915_MMAP_OFFSET_WC : I915_MMAP_OFFSET_WB, - }; - - /* Get the fake offset back */ - int ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP_OFFSET, &mmap_arg); - if (ret != 0) { - DBG("%s:%d: Error preparing buffer %d (%s): %s .\n", - __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno)); - return NULL; - } - - /* And map it */ - void *map = drm_mmap(0, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED, - bufmgr->fd, mmap_arg.offset); - if (map == MAP_FAILED) { - DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", - __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno)); - return NULL; - } - - return map; -} - -static void * -brw_bo_gem_mmap(struct brw_context *brw, struct brw_bo *bo, bool wc) -{ - struct brw_bufmgr *bufmgr = bo->bufmgr; - - if (bufmgr->has_mmap_offset) - return brw_bo_gem_mmap_offset(brw, bo, wc); - else - return brw_bo_gem_mmap_legacy(brw, bo, wc); -} - -static void * -brw_bo_map_cpu(struct brw_context *brw, struct brw_bo *bo, unsigned flags) -{ - /* We disallow CPU maps for writing to non-coherent buffers, as the - * CPU map can become invalidated when a batch is flushed out, which - * can happen at unpredictable times. You should use WC maps instead. - */ - assert(bo->cache_coherent || !(flags & MAP_WRITE)); - - if (!bo->map_cpu) { - DBG("brw_bo_map_cpu: %d (%s)\n", bo->gem_handle, bo->name); - - void *map = brw_bo_gem_mmap(brw, bo, false); - VG_DEFINED(map, bo->size); - - if (p_atomic_cmpxchg(&bo->map_cpu, NULL, map)) { - VG_NOACCESS(map, bo->size); - drm_munmap(map, bo->size); - } - } - assert(bo->map_cpu); - - DBG("brw_bo_map_cpu: %d (%s) -> %p, ", bo->gem_handle, bo->name, - bo->map_cpu); - print_flags(flags); - - if (!(flags & MAP_ASYNC)) { - bo_wait_with_stall_warning(brw, bo, "CPU mapping"); - } - - if (!bo->cache_coherent && !bo->bufmgr->has_llc) { - /* If we're reusing an existing CPU mapping, the CPU caches may - * contain stale data from the last time we read from that mapping. - * (With the BO cache, it might even be data from a previous buffer!) - * Even if it's a brand new mapping, the kernel may have zeroed the - * buffer via CPU writes. - * - * We need to invalidate those cachelines so that we see the latest - * contents, and so long as we only read from the CPU mmap we do not - * need to write those cachelines back afterwards. - * - * On LLC, the emprical evidence suggests that writes from the GPU - * that bypass the LLC (i.e. for scanout) do *invalidate* the CPU - * cachelines. (Other reads, such as the display engine, bypass the - * LLC entirely requiring us to keep dirty pixels for the scanout - * out of any cache.) - */ - intel_invalidate_range(bo->map_cpu, bo->size); - } - - return bo->map_cpu; -} - -static void * -brw_bo_map_wc(struct brw_context *brw, struct brw_bo *bo, unsigned flags) -{ - struct brw_bufmgr *bufmgr = bo->bufmgr; - - if (!bufmgr->has_mmap_wc) - return NULL; - - if (!bo->map_wc) { - DBG("brw_bo_map_wc: %d (%s)\n", bo->gem_handle, bo->name); - void *map = brw_bo_gem_mmap(brw, bo, true); - VG_DEFINED(map, bo->size); - - if (p_atomic_cmpxchg(&bo->map_wc, NULL, map)) { - VG_NOACCESS(map, bo->size); - drm_munmap(map, bo->size); - } - } - assert(bo->map_wc); - - DBG("brw_bo_map_wc: %d (%s) -> %p\n", bo->gem_handle, bo->name, bo->map_wc); - print_flags(flags); - - if (!(flags & MAP_ASYNC)) { - bo_wait_with_stall_warning(brw, bo, "WC mapping"); - } - - return bo->map_wc; -} - -/** - * Perform an uncached mapping via the GTT. - * - * Write access through the GTT is not quite fully coherent. On low power - * systems especially, like modern Atoms, we can observe reads from RAM before - * the write via GTT has landed. A write memory barrier that flushes the Write - * Combining Buffer (i.e. sfence/mfence) is not sufficient to order the later - * read after the write as the GTT write suffers a small delay through the GTT - * indirection. The kernel uses an uncached mmio read to ensure the GTT write - * is ordered with reads (either by the GPU, WB or WC) and unconditionally - * flushes prior to execbuf submission. However, if we are not informing the - * kernel about our GTT writes, it will not flush before earlier access, such - * as when using the cmdparser. Similarly, we need to be careful if we should - * ever issue a CPU read immediately following a GTT write. - * - * Telling the kernel about write access also has one more important - * side-effect. Upon receiving notification about the write, it cancels any - * scanout buffering for FBC/PSR and friends. Later FBC/PSR is then flushed by - * either SW_FINISH or DIRTYFB. The presumption is that we never write to the - * actual scanout via a mmaping, only to a backbuffer and so all the FBC/PSR - * tracking is handled on the buffer exchange instead. - */ -static void * -brw_bo_map_gtt(struct brw_context *brw, struct brw_bo *bo, unsigned flags) -{ - struct brw_bufmgr *bufmgr = bo->bufmgr; - - /* Get a mapping of the buffer if we haven't before. */ - if (bo->map_gtt == NULL) { - DBG("bo_map_gtt: mmap %d (%s)\n", bo->gem_handle, bo->name); - - struct drm_i915_gem_mmap_gtt mmap_arg = { .handle = bo->gem_handle }; - - /* Get the fake offset back... */ - int ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP_GTT, &mmap_arg); - if (ret != 0) { - DBG("%s:%d: Error preparing buffer map %d (%s): %s .\n", - __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno)); - return NULL; - } - - /* and mmap it. */ - void *map = drm_mmap(0, bo->size, PROT_READ | PROT_WRITE, - MAP_SHARED, bufmgr->fd, mmap_arg.offset); - if (map == MAP_FAILED) { - DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", - __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno)); - return NULL; - } - - /* We don't need to use VALGRIND_MALLOCLIKE_BLOCK because Valgrind will - * already intercept this mmap call. However, for consistency between - * all the mmap paths, we mark the pointer as defined now and mark it - * as inaccessible afterwards. - */ - VG_DEFINED(map, bo->size); - - if (p_atomic_cmpxchg(&bo->map_gtt, NULL, map)) { - VG_NOACCESS(map, bo->size); - drm_munmap(map, bo->size); - } - } - assert(bo->map_gtt); - - DBG("bo_map_gtt: %d (%s) -> %p, ", bo->gem_handle, bo->name, bo->map_gtt); - print_flags(flags); - - if (!(flags & MAP_ASYNC)) { - bo_wait_with_stall_warning(brw, bo, "GTT mapping"); - } - - return bo->map_gtt; -} - -static bool -can_map_cpu(struct brw_bo *bo, unsigned flags) -{ - if (bo->cache_coherent) - return true; - - /* Even if the buffer itself is not cache-coherent (such as a scanout), on - * an LLC platform reads always are coherent (as they are performed via the - * central system agent). It is just the writes that we need to take special - * care to ensure that land in main memory and not stick in the CPU cache. - */ - if (!(flags & MAP_WRITE) && bo->bufmgr->has_llc) - return true; - - /* If PERSISTENT or COHERENT are set, the mmapping needs to remain valid - * across batch flushes where the kernel will change cache domains of the - * bo, invalidating continued access to the CPU mmap on non-LLC device. - * - * Similarly, ASYNC typically means that the buffer will be accessed via - * both the CPU and the GPU simultaneously. Batches may be executed that - * use the BO even while it is mapped. While OpenGL technically disallows - * most drawing while non-persistent mappings are active, we may still use - * the GPU for blits or other operations, causing batches to happen at - * inconvenient times. - */ - if (flags & (MAP_PERSISTENT | MAP_COHERENT | MAP_ASYNC)) - return false; - - return !(flags & MAP_WRITE); -} - -void * -brw_bo_map(struct brw_context *brw, struct brw_bo *bo, unsigned flags) -{ - if (bo->tiling_mode != I915_TILING_NONE && !(flags & MAP_RAW)) - return brw_bo_map_gtt(brw, bo, flags); - - void *map; - - if (can_map_cpu(bo, flags)) - map = brw_bo_map_cpu(brw, bo, flags); - else - map = brw_bo_map_wc(brw, bo, flags); - - /* Allow the attempt to fail by falling back to the GTT where necessary. - * - * Not every buffer can be mmaped directly using the CPU (or WC), for - * example buffers that wrap stolen memory or are imported from other - * devices. For those, we have little choice but to use a GTT mmapping. - * However, if we use a slow GTT mmapping for reads where we expected fast - * access, that order of magnitude difference in throughput will be clearly - * expressed by angry users. - * - * We skip MAP_RAW because we want to avoid map_gtt's fence detiling. - */ - if (!map && !(flags & MAP_RAW)) { - if (brw) { - perf_debug("Fallback GTT mapping for %s with access flags %x\n", - bo->name, flags); - } - map = brw_bo_map_gtt(brw, bo, flags); - } - - return map; -} - -int -brw_bo_subdata(struct brw_bo *bo, uint64_t offset, - uint64_t size, const void *data) -{ - struct brw_bufmgr *bufmgr = bo->bufmgr; - - struct drm_i915_gem_pwrite pwrite = { - .handle = bo->gem_handle, - .offset = offset, - .size = size, - .data_ptr = (uint64_t) (uintptr_t) data, - }; - - int ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_PWRITE, &pwrite); - if (ret != 0) { - ret = -errno; - DBG("%s:%d: Error writing data to buffer %d: " - "(%"PRIu64" %"PRIu64") %s .\n", - __FILE__, __LINE__, bo->gem_handle, offset, size, strerror(errno)); - } - - return ret; -} - -/** Waits for all GPU rendering with the object to have completed. */ -void -brw_bo_wait_rendering(struct brw_bo *bo) -{ - /* We require a kernel recent enough for WAIT_IOCTL support. - * See brw_init_bufmgr() - */ - brw_bo_wait(bo, -1); -} - -/** - * Waits on a BO for the given amount of time. - * - * @bo: buffer object to wait for - * @timeout_ns: amount of time to wait in nanoseconds. - * If value is less than 0, an infinite wait will occur. - * - * Returns 0 if the wait was successful ie. the last batch referencing the - * object has completed within the allotted time. Otherwise some negative return - * value describes the error. Of particular interest is -ETIME when the wait has - * failed to yield the desired result. - * - * Similar to brw_bo_wait_rendering except a timeout parameter allows - * the operation to give up after a certain amount of time. Another subtle - * difference is the internal locking semantics are different (this variant does - * not hold the lock for the duration of the wait). This makes the wait subject - * to a larger userspace race window. - * - * The implementation shall wait until the object is no longer actively - * referenced within a batch buffer at the time of the call. The wait will - * not guarantee that the buffer is re-issued via another thread, or an flinked - * handle. Userspace must make sure this race does not occur if such precision - * is important. - * - * Note that some kernels have broken the inifite wait for negative values - * promise, upgrade to latest stable kernels if this is the case. - */ -int -brw_bo_wait(struct brw_bo *bo, int64_t timeout_ns) -{ - struct brw_bufmgr *bufmgr = bo->bufmgr; - - /* If we know it's idle, don't bother with the kernel round trip */ - if (bo->idle && !bo->external) - return 0; - - struct drm_i915_gem_wait wait = { - .bo_handle = bo->gem_handle, - .timeout_ns = timeout_ns, - }; - int ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_WAIT, &wait); - if (ret != 0) - return -errno; - - bo->idle = true; - - return ret; -} - -void -brw_bufmgr_unref(struct brw_bufmgr *bufmgr) -{ - mtx_lock(&global_bufmgr_list_mutex); - if (p_atomic_dec_zero(&bufmgr->refcount)) { - list_del(&bufmgr->link); - } else { - bufmgr = NULL; - } - mtx_unlock(&global_bufmgr_list_mutex); - - if (!bufmgr) - return; - - mtx_destroy(&bufmgr->lock); - - /* Free any cached buffer objects we were going to reuse */ - for (int i = 0; i < bufmgr->num_buckets; i++) { - struct bo_cache_bucket *bucket = &bufmgr->cache_bucket[i]; - - list_for_each_entry_safe(struct brw_bo, bo, &bucket->head, head) { - list_del(&bo->head); - - bo_free(bo); - } - - if (brw_using_softpin(bufmgr)) { - for (int z = 0; z < BRW_MEMZONE_COUNT; z++) { - util_dynarray_fini(&bucket->vma_list[z]); - } - } - } - - _mesa_hash_table_destroy(bufmgr->name_table, NULL); - _mesa_hash_table_destroy(bufmgr->handle_table, NULL); - - if (brw_using_softpin(bufmgr)) { - for (int z = 0; z < BRW_MEMZONE_COUNT; z++) { - util_vma_heap_finish(&bufmgr->vma_allocator[z]); - } - } - - close(bufmgr->fd); - bufmgr->fd = -1; - - free(bufmgr); -} - -static int -bo_set_tiling_internal(struct brw_bo *bo, uint32_t tiling_mode, - uint32_t stride) -{ - struct brw_bufmgr *bufmgr = bo->bufmgr; - struct drm_i915_gem_set_tiling set_tiling; - int ret; - - if (bo->global_name == 0 && - tiling_mode == bo->tiling_mode && stride == bo->stride) - return 0; - - memset(&set_tiling, 0, sizeof(set_tiling)); - do { - /* set_tiling is slightly broken and overwrites the - * input on the error path, so we have to open code - * rmIoctl. - */ - set_tiling.handle = bo->gem_handle; - set_tiling.tiling_mode = tiling_mode; - set_tiling.stride = stride; - - ret = ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling); - } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); - if (ret == -1) - return -errno; - - bo->tiling_mode = set_tiling.tiling_mode; - bo->swizzle_mode = set_tiling.swizzle_mode; - bo->stride = set_tiling.stride; - return 0; -} - -int -brw_bo_get_tiling(struct brw_bo *bo, uint32_t *tiling_mode, - uint32_t *swizzle_mode) -{ - *tiling_mode = bo->tiling_mode; - *swizzle_mode = bo->swizzle_mode; - return 0; -} - -static struct brw_bo * -brw_bo_gem_create_from_prime_internal(struct brw_bufmgr *bufmgr, int prime_fd, - int tiling_mode, uint32_t stride) -{ - uint32_t handle; - struct brw_bo *bo; - - mtx_lock(&bufmgr->lock); - int ret = drmPrimeFDToHandle(bufmgr->fd, prime_fd, &handle); - if (ret) { - DBG("create_from_prime: failed to obtain handle from fd: %s\n", - strerror(errno)); - mtx_unlock(&bufmgr->lock); - return NULL; - } - - /* - * See if the kernel has already returned this buffer to us. Just as - * for named buffers, we must not create two bo's pointing at the same - * kernel object - */ - bo = hash_find_bo(bufmgr->handle_table, handle); - if (bo) { - brw_bo_reference(bo); - goto out; - } - - bo = bo_calloc(); - if (!bo) - goto out; - - p_atomic_set(&bo->refcount, 1); - - /* Determine size of bo. The fd-to-handle ioctl really should - * return the size, but it doesn't. If we have kernel 3.12 or - * later, we can lseek on the prime fd to get the size. Older - * kernels will just fail, in which case we fall back to the - * provided (estimated or guess size). */ - ret = lseek(prime_fd, 0, SEEK_END); - if (ret != -1) - bo->size = ret; - - bo->bufmgr = bufmgr; - - bo->gem_handle = handle; - _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo); - - bo->name = "prime"; - bo->reusable = false; - bo->external = true; - bo->kflags = bufmgr->initial_kflags; - - if (bo->kflags & EXEC_OBJECT_PINNED) { - assert(bo->size > 0); - bo->gtt_offset = vma_alloc(bufmgr, BRW_MEMZONE_OTHER, bo->size, 1); - } - - if (tiling_mode < 0) { - struct drm_i915_gem_get_tiling get_tiling = { .handle = bo->gem_handle }; - if (drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling)) - goto err; - - bo->tiling_mode = get_tiling.tiling_mode; - bo->swizzle_mode = get_tiling.swizzle_mode; - /* XXX stride is unknown */ - } else { - bo_set_tiling_internal(bo, tiling_mode, stride); - } - -out: - mtx_unlock(&bufmgr->lock); - return bo; - -err: - bo_free(bo); - mtx_unlock(&bufmgr->lock); - return NULL; -} - -struct brw_bo * -brw_bo_gem_create_from_prime(struct brw_bufmgr *bufmgr, int prime_fd) -{ - return brw_bo_gem_create_from_prime_internal(bufmgr, prime_fd, -1, 0); -} - -struct brw_bo * -brw_bo_gem_create_from_prime_tiled(struct brw_bufmgr *bufmgr, int prime_fd, - uint32_t tiling_mode, uint32_t stride) -{ - assert(tiling_mode == I915_TILING_NONE || - tiling_mode == I915_TILING_X || - tiling_mode == I915_TILING_Y); - - return brw_bo_gem_create_from_prime_internal(bufmgr, prime_fd, - tiling_mode, stride); -} - -static void -brw_bo_make_external(struct brw_bo *bo) -{ - struct brw_bufmgr *bufmgr = bo->bufmgr; - - if (!bo->external) { - mtx_lock(&bufmgr->lock); - if (!bo->external) { - _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo); - bo->external = true; - } - mtx_unlock(&bufmgr->lock); - } -} - -int -brw_bo_gem_export_to_prime(struct brw_bo *bo, int *prime_fd) -{ - struct brw_bufmgr *bufmgr = bo->bufmgr; - - brw_bo_make_external(bo); - - if (drmPrimeHandleToFD(bufmgr->fd, bo->gem_handle, - DRM_CLOEXEC | DRM_RDWR, prime_fd) != 0) - return -errno; - - bo->reusable = false; - - return 0; -} - -uint32_t -brw_bo_export_gem_handle(struct brw_bo *bo) -{ - brw_bo_make_external(bo); - - return bo->gem_handle; -} - -int -brw_bo_flink(struct brw_bo *bo, uint32_t *name) -{ - struct brw_bufmgr *bufmgr = bo->bufmgr; - - if (!bo->global_name) { - struct drm_gem_flink flink = { .handle = bo->gem_handle }; - - if (drmIoctl(bufmgr->fd, DRM_IOCTL_GEM_FLINK, &flink)) - return -errno; - - brw_bo_make_external(bo); - mtx_lock(&bufmgr->lock); - if (!bo->global_name) { - bo->global_name = flink.name; - _mesa_hash_table_insert(bufmgr->name_table, &bo->global_name, bo); - } - mtx_unlock(&bufmgr->lock); - - bo->reusable = false; - } - - *name = bo->global_name; - return 0; -} - -int -brw_bo_export_gem_handle_for_device(struct brw_bo *bo, int drm_fd, - uint32_t *out_handle) -{ - struct brw_bufmgr *bufmgr = bo->bufmgr; - - /* Only add the new GEM handle to the list of export if it belongs to a - * different GEM device. Otherwise we might close the same buffer multiple - * times. - */ - int ret = os_same_file_description(drm_fd, bufmgr->fd); - WARN_ONCE(ret < 0, - "Kernel has no file descriptor comparison support: %s\n", - strerror(errno)); - if (ret == 0) { - *out_handle = brw_bo_export_gem_handle(bo); - return 0; - } - - struct bo_export *export = calloc(1, sizeof(*export)); - if (!export) - return -ENOMEM; - - export->drm_fd = drm_fd; - - int dmabuf_fd = -1; - int err = brw_bo_gem_export_to_prime(bo, &dmabuf_fd); - if (err) { - free(export); - return err; - } - - mtx_lock(&bufmgr->lock); - err = drmPrimeFDToHandle(drm_fd, dmabuf_fd, &export->gem_handle); - close(dmabuf_fd); - if (err) { - mtx_unlock(&bufmgr->lock); - free(export); - return err; - } - - bool found = false; - list_for_each_entry(struct bo_export, iter, &bo->exports, link) { - if (iter->drm_fd != drm_fd) - continue; - /* Here we assume that for a given DRM fd, we'll always get back the - * same GEM handle for a given buffer. - */ - assert(iter->gem_handle == export->gem_handle); - free(export); - export = iter; - found = true; - break; - } - if (!found) - list_addtail(&export->link, &bo->exports); - - mtx_unlock(&bufmgr->lock); - - *out_handle = export->gem_handle; - - return 0; -} - -static void -add_bucket(struct brw_bufmgr *bufmgr, int size) -{ - unsigned int i = bufmgr->num_buckets; - - assert(i < ARRAY_SIZE(bufmgr->cache_bucket)); - - list_inithead(&bufmgr->cache_bucket[i].head); - if (brw_using_softpin(bufmgr)) { - for (int z = 0; z < BRW_MEMZONE_COUNT; z++) - util_dynarray_init(&bufmgr->cache_bucket[i].vma_list[z], NULL); - } - bufmgr->cache_bucket[i].size = size; - bufmgr->num_buckets++; - - assert(bucket_for_size(bufmgr, size) == &bufmgr->cache_bucket[i]); - assert(bucket_for_size(bufmgr, size - 2048) == &bufmgr->cache_bucket[i]); - assert(bucket_for_size(bufmgr, size + 1) != &bufmgr->cache_bucket[i]); -} - -static void -init_cache_buckets(struct brw_bufmgr *bufmgr) -{ - uint64_t size, cache_max_size = 64 * 1024 * 1024; - - /* OK, so power of two buckets was too wasteful of memory. - * Give 3 other sizes between each power of two, to hopefully - * cover things accurately enough. (The alternative is - * probably to just go for exact matching of sizes, and assume - * that for things like composited window resize the tiled - * width/height alignment and rounding of sizes to pages will - * get us useful cache hit rates anyway) - */ - add_bucket(bufmgr, PAGE_SIZE); - add_bucket(bufmgr, PAGE_SIZE * 2); - add_bucket(bufmgr, PAGE_SIZE * 3); - - /* Initialize the linked lists for BO reuse cache. */ - for (size = 4 * PAGE_SIZE; size <= cache_max_size; size *= 2) { - add_bucket(bufmgr, size); - - add_bucket(bufmgr, size + size * 1 / 4); - add_bucket(bufmgr, size + size * 2 / 4); - add_bucket(bufmgr, size + size * 3 / 4); - } -} - -uint32_t -brw_create_hw_context(struct brw_bufmgr *bufmgr) -{ - struct drm_i915_gem_context_create create = { }; - int ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create); - if (ret != 0) { - DBG("DRM_IOCTL_I915_GEM_CONTEXT_CREATE failed: %s\n", strerror(errno)); - return 0; - } - - return create.ctx_id; -} - -int -brw_hw_context_set_priority(struct brw_bufmgr *bufmgr, - uint32_t ctx_id, - int priority) -{ - struct drm_i915_gem_context_param p = { - .ctx_id = ctx_id, - .param = I915_CONTEXT_PARAM_PRIORITY, - .value = priority, - }; - int err; - - err = 0; - if (drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM, &p)) - err = -errno; - - return err; -} - -void -brw_destroy_hw_context(struct brw_bufmgr *bufmgr, uint32_t ctx_id) -{ - struct drm_i915_gem_context_destroy d = { .ctx_id = ctx_id }; - - if (ctx_id != 0 && - drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, &d) != 0) { - fprintf(stderr, "DRM_IOCTL_I915_GEM_CONTEXT_DESTROY failed: %s\n", - strerror(errno)); - } -} - -int -brw_reg_read(struct brw_bufmgr *bufmgr, uint32_t offset, uint64_t *result) -{ - struct drm_i915_reg_read reg_read = { .offset = offset }; - int ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_REG_READ, ®_read); - - *result = reg_read.val; - return ret; -} - -static int -gem_param(int fd, int name) -{ - int v = -1; /* No param uses (yet) the sign bit, reserve it for errors */ - - struct drm_i915_getparam gp = { .param = name, .value = &v }; - if (drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp)) - return -1; - - return v; -} - -static int -gem_context_getparam(int fd, uint32_t context, uint64_t param, uint64_t *value) -{ - struct drm_i915_gem_context_param gp = { - .ctx_id = context, - .param = param, - }; - - if (drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM, &gp)) - return -1; - - *value = gp.value; - - return 0; -} - -bool -brw_using_softpin(struct brw_bufmgr *bufmgr) -{ - return bufmgr->initial_kflags & EXEC_OBJECT_PINNED; -} - -static struct brw_bufmgr * -brw_bufmgr_ref(struct brw_bufmgr *bufmgr) -{ - p_atomic_inc(&bufmgr->refcount); - return bufmgr; -} - -/** - * Initializes the GEM buffer manager, which uses the kernel to allocate, map, - * and manage map buffer objections. - * - * \param fd File descriptor of the opened DRM device. - */ -static struct brw_bufmgr * -brw_bufmgr_create(struct intel_device_info *devinfo, int fd, bool bo_reuse) -{ - struct brw_bufmgr *bufmgr; - - bufmgr = calloc(1, sizeof(*bufmgr)); - if (bufmgr == NULL) - return NULL; - - /* Handles to buffer objects belong to the device fd and are not - * reference counted by the kernel. If the same fd is used by - * multiple parties (threads sharing the same screen bufmgr, or - * even worse the same device fd passed to multiple libraries) - * ownership of those handles is shared by those independent parties. - * - * Don't do this! Ensure that each library/bufmgr has its own device - * fd so that its namespace does not clash with another. - */ - bufmgr->fd = os_dupfd_cloexec(fd); - if (bufmgr->fd < 0) { - free(bufmgr); - return NULL; - } - - p_atomic_set(&bufmgr->refcount, 1); - - if (mtx_init(&bufmgr->lock, mtx_plain) != 0) { - close(bufmgr->fd); - free(bufmgr); - return NULL; - } - - uint64_t gtt_size; - if (gem_context_getparam(fd, 0, I915_CONTEXT_PARAM_GTT_SIZE, >t_size)) - gtt_size = 0; - - bufmgr->has_llc = devinfo->has_llc; - bufmgr->has_mmap_wc = gem_param(fd, I915_PARAM_MMAP_VERSION) > 0; - bufmgr->bo_reuse = bo_reuse; - bufmgr->has_mmap_offset = gem_param(fd, I915_PARAM_MMAP_GTT_VERSION) >= 4; - - const uint64_t _4GB = 4ull << 30; - - /* The STATE_BASE_ADDRESS size field can only hold 1 page shy of 4GB */ - const uint64_t _4GB_minus_1 = _4GB - PAGE_SIZE; - - if (devinfo->ver >= 8 && gtt_size > _4GB) { - bufmgr->initial_kflags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS; - - /* Allocate VMA in userspace if we have softpin and full PPGTT. */ - if (gem_param(fd, I915_PARAM_HAS_EXEC_SOFTPIN) > 0 && - gem_param(fd, I915_PARAM_HAS_ALIASING_PPGTT) > 1) { - bufmgr->initial_kflags |= EXEC_OBJECT_PINNED; - - util_vma_heap_init(&bufmgr->vma_allocator[BRW_MEMZONE_LOW_4G], - PAGE_SIZE, _4GB_minus_1); - - /* Leave the last 4GB out of the high vma range, so that no state - * base address + size can overflow 48 bits. - */ - util_vma_heap_init(&bufmgr->vma_allocator[BRW_MEMZONE_OTHER], - 1 * _4GB, gtt_size - 2 * _4GB); - } else if (devinfo->ver >= 10) { - /* Softpin landed in 4.5, but GVT used an aliasing PPGTT until - * kernel commit 6b3816d69628becb7ff35978aa0751798b4a940a in - * 4.14. Gfx10+ GVT hasn't landed yet, so it's not actually a - * problem - but extending this requirement back to earlier gens - * might actually mean requiring 4.14. - */ - fprintf(stderr, "i965 requires softpin (Kernel 4.5) on Gfx10+."); - close(bufmgr->fd); - free(bufmgr); - return NULL; - } - } - - init_cache_buckets(bufmgr); - - bufmgr->name_table = - _mesa_hash_table_create(NULL, _mesa_hash_uint, _mesa_key_uint_equal); - bufmgr->handle_table = - _mesa_hash_table_create(NULL, _mesa_hash_uint, _mesa_key_uint_equal); - - return bufmgr; -} - -struct brw_bufmgr * -brw_bufmgr_get_for_fd(struct intel_device_info *devinfo, int fd, bool bo_reuse) -{ - struct stat st; - - if (fstat(fd, &st)) - return NULL; - - struct brw_bufmgr *bufmgr = NULL; - - mtx_lock(&global_bufmgr_list_mutex); - list_for_each_entry(struct brw_bufmgr, iter_bufmgr, &global_bufmgr_list, link) { - struct stat iter_st; - if (fstat(iter_bufmgr->fd, &iter_st)) - continue; - - if (st.st_rdev == iter_st.st_rdev) { - assert(iter_bufmgr->bo_reuse == bo_reuse); - bufmgr = brw_bufmgr_ref(iter_bufmgr); - goto unlock; - } - } - - bufmgr = brw_bufmgr_create(devinfo, fd, bo_reuse); - if (bufmgr) - list_addtail(&bufmgr->link, &global_bufmgr_list); - - unlock: - mtx_unlock(&global_bufmgr_list_mutex); - - return bufmgr; -} - -int -brw_bufmgr_get_fd(struct brw_bufmgr *bufmgr) -{ - return bufmgr->fd; -} diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.h b/src/mesa/drivers/dri/i965/brw_bufmgr.h deleted file mode 100644 index cb272a9..0000000 --- a/src/mesa/drivers/dri/i965/brw_bufmgr.h +++ /dev/null @@ -1,404 +0,0 @@ -/* - * Copyright © 2008-2012 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Eric Anholt - * - */ - -/** - * @file brw_bufmgr.h - * - * Public definitions of Intel-specific bufmgr functions. - */ - -#ifndef BRW_BUFMGR_H -#define BRW_BUFMGR_H - -#include -#include -#include -#include - -#include "c11/threads.h" -#include "util/u_atomic.h" -#include "util/list.h" - -#if defined(__cplusplus) -extern "C" { -#endif - -struct intel_device_info; -struct brw_context; - -/** - * Memory zones. When allocating a buffer, you can request that it is - * placed into a specific region of the virtual address space (PPGTT). - * - * Most buffers can go anywhere (BRW_MEMZONE_OTHER). Some buffers are - * accessed via an offset from a base address. STATE_BASE_ADDRESS has - * a maximum 4GB size for each region, so we need to restrict those - * buffers to be within 4GB of the base. Each memory zone corresponds - * to a particular base address. - * - * Currently, i965 partitions the address space into two regions: - * - * - Low 4GB - * - Full 48-bit address space - * - * Eventually, we hope to carve out 4GB of VMA for each base address. - */ -enum brw_memory_zone { - BRW_MEMZONE_LOW_4G, - BRW_MEMZONE_OTHER, - - /* Shaders - Instruction State Base Address */ - BRW_MEMZONE_SHADER = BRW_MEMZONE_LOW_4G, - - /* Scratch - General State Base Address */ - BRW_MEMZONE_SCRATCH = BRW_MEMZONE_LOW_4G, - - /* Surface State Base Address */ - BRW_MEMZONE_SURFACE = BRW_MEMZONE_LOW_4G, - - /* Dynamic State Base Address */ - BRW_MEMZONE_DYNAMIC = BRW_MEMZONE_LOW_4G, -}; - -#define BRW_MEMZONE_COUNT (BRW_MEMZONE_OTHER + 1) - -struct brw_bo { - /** - * Size in bytes of the buffer object. - * - * The size may be larger than the size originally requested for the - * allocation, such as being aligned to page size. - */ - uint64_t size; - - /** Buffer manager context associated with this buffer object */ - struct brw_bufmgr *bufmgr; - - /** The GEM handle for this buffer object. */ - uint32_t gem_handle; - - /** - * Offset of the buffer inside the Graphics Translation Table. - * - * This is effectively our GPU address for the buffer and we use it - * as our base for all state pointers into the buffer. However, since the - * kernel may be forced to move it around during the course of the - * buffer's lifetime, we can only know where the buffer was on the last - * execbuf. We presume, and are usually right, that the buffer will not - * move and so we use that last offset for the next batch and by doing - * so we can avoid having the kernel perform a relocation fixup pass as - * our pointers inside the batch will be using the correct base offset. - * - * Since we do use it as a base address for the next batch of pointers, - * the kernel treats our offset as a request, and if possible will - * arrange the buffer to placed at that address (trying to balance - * the cost of buffer migration versus the cost of performing - * relocations). Furthermore, we can force the kernel to place the buffer, - * or report a failure if we specified a conflicting offset, at our chosen - * offset by specifying EXEC_OBJECT_PINNED. - * - * Note the GTT may be either per context, or shared globally across the - * system. On a shared system, our buffers have to contend for address - * space with both aperture mappings and framebuffers and so are more - * likely to be moved. On a full ppGTT system, each batch exists in its - * own GTT, and so each buffer may have their own offset within each - * context. - */ - uint64_t gtt_offset; - - /** - * The validation list index for this buffer, or -1 when not in a batch. - * Note that a single buffer may be in multiple batches (contexts), and - * this is a global field, which refers to the last batch using the BO. - * It should not be considered authoritative, but can be used to avoid a - * linear walk of the validation list in the common case by guessing that - * exec_bos[bo->index] == bo and confirming whether that's the case. - */ - unsigned index; - - /** - * Boolean of whether the GPU is definitely not accessing the buffer. - * - * This is only valid when reusable, since non-reusable - * buffers are those that have been shared with other - * processes, so we don't know their state. - */ - bool idle; - - int refcount; - const char *name; - - uint64_t kflags; - - /** - * Kenel-assigned global name for this object - * - * List contains both flink named and prime fd'd objects - */ - unsigned int global_name; - - /** - * Current tiling mode - */ - uint32_t tiling_mode; - uint32_t swizzle_mode; - uint32_t stride; - - time_t free_time; - - /** Mapped address for the buffer, saved across map/unmap cycles */ - void *map_cpu; - /** GTT virtual address for the buffer, saved across map/unmap cycles */ - void *map_gtt; - /** WC CPU address for the buffer, saved across map/unmap cycles */ - void *map_wc; - - /** BO cache list */ - struct list_head head; - - /** - * List of GEM handle exports of this buffer (bo_export). - * - * Hold bufmgr->lock when using this list. - */ - struct list_head exports; - - /** - * Boolean of whether this buffer can be re-used - */ - bool reusable; - - /** - * Boolean of whether this buffer has been shared with an external client. - */ - bool external; - - /** - * Boolean of whether this buffer is cache coherent - */ - bool cache_coherent; -}; - -#define BO_ALLOC_BUSY (1<<0) -#define BO_ALLOC_ZEROED (1<<1) - -/** - * Allocate a buffer object. - * - * Buffer objects are not necessarily initially mapped into CPU virtual - * address space or graphics device aperture. They must be mapped - * using brw_bo_map() to be used by the CPU. - */ -struct brw_bo *brw_bo_alloc(struct brw_bufmgr *bufmgr, const char *name, - uint64_t size, enum brw_memory_zone memzone); - -/** - * Allocate a tiled buffer object. - * - * Alignment for tiled objects is set automatically; the 'flags' - * argument provides a hint about how the object will be used initially. - * - * Valid tiling formats are: - * I915_TILING_NONE - * I915_TILING_X - * I915_TILING_Y - */ -struct brw_bo *brw_bo_alloc_tiled(struct brw_bufmgr *bufmgr, - const char *name, - uint64_t size, - enum brw_memory_zone memzone, - uint32_t tiling_mode, - uint32_t pitch, - unsigned flags); - -/** - * Allocate a tiled buffer object. - * - * Alignment for tiled objects is set automatically; the 'flags' - * argument provides a hint about how the object will be used initially. - * - * Valid tiling formats are: - * I915_TILING_NONE - * I915_TILING_X - * I915_TILING_Y - * - * Note the tiling format may be rejected; callers should check the - * 'tiling_mode' field on return, as well as the pitch value, which - * may have been rounded up to accommodate for tiling restrictions. - */ -struct brw_bo *brw_bo_alloc_tiled_2d(struct brw_bufmgr *bufmgr, - const char *name, - int x, int y, int cpp, - enum brw_memory_zone memzone, - uint32_t tiling_mode, - uint32_t *pitch, - unsigned flags); - -/** Takes a reference on a buffer object */ -static inline void -brw_bo_reference(struct brw_bo *bo) -{ - p_atomic_inc(&bo->refcount); -} - -/** - * Releases a reference on a buffer object, freeing the data if - * no references remain. - */ -void brw_bo_unreference(struct brw_bo *bo); - -/* Must match MapBufferRange interface (for convenience) */ -#define MAP_READ GL_MAP_READ_BIT -#define MAP_WRITE GL_MAP_WRITE_BIT -#define MAP_ASYNC GL_MAP_UNSYNCHRONIZED_BIT -#define MAP_PERSISTENT GL_MAP_PERSISTENT_BIT -#define MAP_COHERENT GL_MAP_COHERENT_BIT -/* internal */ -#define MAP_INTERNAL_MASK (0xffu << 24) -#define MAP_RAW (0x01 << 24) - -/** - * Maps the buffer into userspace. - * - * This function will block waiting for any existing execution on the - * buffer to complete, first. The resulting mapping is returned. - */ -MUST_CHECK void *brw_bo_map(struct brw_context *brw, struct brw_bo *bo, unsigned flags); - -/** - * Reduces the refcount on the userspace mapping of the buffer - * object. - */ -static inline int brw_bo_unmap(UNUSED struct brw_bo *bo) { return 0; } - -/** Write data into an object. */ -int brw_bo_subdata(struct brw_bo *bo, uint64_t offset, - uint64_t size, const void *data); -/** - * Waits for rendering to an object by the GPU to have completed. - * - * This is not required for any access to the BO by bo_map, - * bo_subdata, etc. It is merely a way for the driver to implement - * glFinish. - */ -void brw_bo_wait_rendering(struct brw_bo *bo); - -/** - * Unref a buffer manager instance. - */ -void brw_bufmgr_unref(struct brw_bufmgr *bufmgr); - -/** - * Get the current tiling (and resulting swizzling) mode for the bo. - * - * \param buf Buffer to get tiling mode for - * \param tiling_mode returned tiling mode - * \param swizzle_mode returned swizzling mode - */ -int brw_bo_get_tiling(struct brw_bo *bo, uint32_t *tiling_mode, - uint32_t *swizzle_mode); - -/** - * Create a visible name for a buffer which can be used by other apps - * - * \param buf Buffer to create a name for - * \param name Returned name - */ -int brw_bo_flink(struct brw_bo *bo, uint32_t *name); - -/** - * Returns 1 if mapping the buffer for write could cause the process - * to block, due to the object being active in the GPU. - */ -int brw_bo_busy(struct brw_bo *bo); - -/** - * Specify the volatility of the buffer. - * \param bo Buffer to create a name for - * \param madv The purgeable status - * - * Use I915_MADV_DONTNEED to mark the buffer as purgeable, and it will be - * reclaimed under memory pressure. If you subsequently require the buffer, - * then you must pass I915_MADV_WILLNEED to mark the buffer as required. - * - * Returns 1 if the buffer was retained, or 0 if it was discarded whilst - * marked as I915_MADV_DONTNEED. - */ -int brw_bo_madvise(struct brw_bo *bo, int madv); - -struct brw_bufmgr *brw_bufmgr_get_for_fd(struct intel_device_info *devinfo, - int fd, bool bo_reuse); - -struct brw_bo *brw_bo_gem_create_from_name(struct brw_bufmgr *bufmgr, - const char *name, - unsigned int handle); - -int brw_bo_wait(struct brw_bo *bo, int64_t timeout_ns); - -uint32_t brw_create_hw_context(struct brw_bufmgr *bufmgr); - -int brw_hw_context_set_priority(struct brw_bufmgr *bufmgr, - uint32_t ctx_id, - int priority); - -void brw_destroy_hw_context(struct brw_bufmgr *bufmgr, uint32_t ctx_id); - -int brw_bufmgr_get_fd(struct brw_bufmgr *bufmgr); - -int brw_bo_gem_export_to_prime(struct brw_bo *bo, int *prime_fd); -struct brw_bo *brw_bo_gem_create_from_prime(struct brw_bufmgr *bufmgr, - int prime_fd); -struct brw_bo *brw_bo_gem_create_from_prime_tiled(struct brw_bufmgr *bufmgr, - int prime_fd, - uint32_t tiling_mode, - uint32_t stride); - -uint32_t brw_bo_export_gem_handle(struct brw_bo *bo); - -/** - * Exports a bo as a GEM handle into a given DRM file descriptor - * \param bo Buffer to export - * \param drm_fd File descriptor where the new handle is created - * \param out_handle Pointer to store the new handle - * - * Returns 0 if the buffer was successfully exported, a non zero error code - * otherwise. - */ -int brw_bo_export_gem_handle_for_device(struct brw_bo *bo, int drm_fd, - uint32_t *out_handle); - -int brw_reg_read(struct brw_bufmgr *bufmgr, uint32_t offset, - uint64_t *result); - -bool brw_using_softpin(struct brw_bufmgr *bufmgr); - -/** @{ */ - -#if defined(__cplusplus) -} -#endif -#endif /* BRW_BUFMGR_H */ diff --git a/src/mesa/drivers/dri/i965/brw_clear.c b/src/mesa/drivers/dri/i965/brw_clear.c deleted file mode 100644 index 3fcc31a..0000000 --- a/src/mesa/drivers/dri/i965/brw_clear.c +++ /dev/null @@ -1,302 +0,0 @@ -/* - * Copyright 2003 VMware, Inc. - * Copyright 2009, 2012 Intel Corporation. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "main/mtypes.h" -#include "main/condrender.h" -#include "swrast/swrast.h" -#include "drivers/common/meta.h" - -#include "brw_batch.h" -#include "brw_fbo.h" -#include "brw_mipmap_tree.h" - -#include "brw_context.h" -#include "brw_blorp.h" -#include "brw_defines.h" - -#define FILE_DEBUG_FLAG DEBUG_BLIT - -static const char *buffer_names[] = { - [BUFFER_FRONT_LEFT] = "front", - [BUFFER_BACK_LEFT] = "back", - [BUFFER_FRONT_RIGHT] = "front right", - [BUFFER_BACK_RIGHT] = "back right", - [BUFFER_DEPTH] = "depth", - [BUFFER_STENCIL] = "stencil", - [BUFFER_ACCUM] = "accum", - [BUFFER_COLOR0] = "color0", - [BUFFER_COLOR1] = "color1", - [BUFFER_COLOR2] = "color2", - [BUFFER_COLOR3] = "color3", - [BUFFER_COLOR4] = "color4", - [BUFFER_COLOR5] = "color5", - [BUFFER_COLOR6] = "color6", - [BUFFER_COLOR7] = "color7", -}; - -static void -debug_mask(const char *name, GLbitfield mask) -{ - GLuint i; - - if (INTEL_DEBUG(DEBUG_BLIT)) { - DBG("%s clear:", name); - for (i = 0; i < BUFFER_COUNT; i++) { - if (mask & (1 << i)) - DBG(" %s", buffer_names[i]); - } - DBG("\n"); - } -} - -/** - * Returns true if the scissor is a noop (cuts out nothing). - */ -static bool -noop_scissor(struct gl_framebuffer *fb) -{ - return fb->_Xmin <= 0 && - fb->_Ymin <= 0 && - fb->_Xmax >= fb->Width && - fb->_Ymax >= fb->Height; -} - -/** - * Implements fast depth clears on gfx6+. - * - * Fast clears basically work by setting a flag in each of the subspans - * represented in the HiZ buffer that says "When you need the depth values for - * this subspan, it's the hardware's current clear value." Then later rendering - * can just use the static clear value instead of referencing memory. - * - * The tricky part of the implementation is that you have to have the clear - * value that was used on the depth buffer in place for all further rendering, - * at least until a resolve to the real depth buffer happens. - */ -static bool -brw_fast_clear_depth(struct gl_context *ctx) -{ - struct brw_context *brw = brw_context(ctx); - struct gl_framebuffer *fb = ctx->DrawBuffer; - struct brw_renderbuffer *depth_irb = - brw_get_renderbuffer(fb, BUFFER_DEPTH); - struct brw_mipmap_tree *mt = depth_irb->mt; - struct gl_renderbuffer_attachment *depth_att = &fb->Attachment[BUFFER_DEPTH]; - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - if (INTEL_DEBUG(DEBUG_NO_FAST_CLEAR)) - return false; - - if (devinfo->ver < 6) - return false; - - if (!brw_renderbuffer_has_hiz(depth_irb)) - return false; - - /* We only handle full buffer clears -- otherwise you'd have to track whether - * a previous clear had happened at a different clear value and resolve it - * first. - */ - if ((ctx->Scissor.EnableFlags & 1) && !noop_scissor(fb)) { - perf_debug("Failed to fast clear %dx%d depth because of scissors. " - "Possible 5%% performance win if avoided.\n", - mt->surf.logical_level0_px.width, - mt->surf.logical_level0_px.height); - return false; - } - - switch (mt->format) { - case MESA_FORMAT_Z32_FLOAT_S8X24_UINT: - case MESA_FORMAT_Z24_UNORM_S8_UINT: - /* From the Sandy Bridge PRM, volume 2 part 1, page 314: - * - * "[DevSNB+]: Several cases exist where Depth Buffer Clear cannot be - * enabled (the legacy method of clearing must be performed): - * - * - If the depth buffer format is D32_FLOAT_S8X24_UINT or - * D24_UNORM_S8_UINT. - */ - return false; - - case MESA_FORMAT_Z_UNORM16: - /* From the Sandy Bridge PRM, volume 2 part 1, page 314: - * - * "[DevSNB+]: Several cases exist where Depth Buffer Clear cannot be - * enabled (the legacy method of clearing must be performed): - * - * - DevSNB{W/A}]: When depth buffer format is D16_UNORM and the - * width of the map (LOD0) is not multiple of 16, fast clear - * optimization must be disabled. - */ - if (devinfo->ver == 6 && - (minify(mt->surf.phys_level0_sa.width, - depth_irb->mt_level - mt->first_level) % 16) != 0) - return false; - break; - - default: - break; - } - - /* Quantize the clear value to what can be stored in the actual depth - * buffer. This makes the following check more accurate because it now - * checks if the actual depth bits will match. It also prevents us from - * getting a too-accurate depth value during depth testing or when sampling - * with HiZ enabled. - */ - float clear_value = - mt->format == MESA_FORMAT_Z_FLOAT32 ? ctx->Depth.Clear : - _mesa_lroundeven(ctx->Depth.Clear * fb->_DepthMax) / (float)(fb->_DepthMax); - - const uint32_t num_layers = depth_att->Layered ? depth_irb->layer_count : 1; - - /* If we're clearing to a new clear value, then we need to resolve any clear - * flags out of the HiZ buffer into the real depth buffer. - */ - if (mt->fast_clear_color.f32[0] != clear_value) { - for (uint32_t level = mt->first_level; level <= mt->last_level; level++) { - if (!brw_miptree_level_has_hiz(mt, level)) - continue; - - const unsigned level_layers = brw_get_num_logical_layers(mt, level); - - for (uint32_t layer = 0; layer < level_layers; layer++) { - if (level == depth_irb->mt_level && - layer >= depth_irb->mt_layer && - layer < depth_irb->mt_layer + num_layers) { - /* We're going to clear this layer anyway. Leave it alone. */ - continue; - } - - enum isl_aux_state aux_state = - brw_miptree_get_aux_state(mt, level, layer); - - if (aux_state != ISL_AUX_STATE_CLEAR && - aux_state != ISL_AUX_STATE_COMPRESSED_CLEAR) { - /* This slice doesn't have any fast-cleared bits. */ - continue; - } - - /* If we got here, then the level may have fast-clear bits that - * use the old clear value. We need to do a depth resolve to get - * rid of their use of the clear value before we can change it. - * Fortunately, few applications ever change their depth clear - * value so this shouldn't happen often. - */ - brw_hiz_exec(brw, mt, level, layer, 1, ISL_AUX_OP_FULL_RESOLVE); - brw_miptree_set_aux_state(brw, mt, level, layer, 1, - ISL_AUX_STATE_RESOLVED); - } - } - - const union isl_color_value clear_color = { .f32 = {clear_value, } }; - brw_miptree_set_clear_color(brw, mt, clear_color); - } - - for (unsigned a = 0; a < num_layers; a++) { - enum isl_aux_state aux_state = - brw_miptree_get_aux_state(mt, depth_irb->mt_level, - depth_irb->mt_layer + a); - - if (aux_state != ISL_AUX_STATE_CLEAR) { - brw_hiz_exec(brw, mt, depth_irb->mt_level, - depth_irb->mt_layer + a, 1, - ISL_AUX_OP_FAST_CLEAR); - } - } - - brw_miptree_set_aux_state(brw, mt, depth_irb->mt_level, - depth_irb->mt_layer, num_layers, - ISL_AUX_STATE_CLEAR); - return true; -} - -/** - * Called by ctx->Driver.Clear. - */ -static void -brw_clear(struct gl_context *ctx, GLbitfield mask) -{ - struct brw_context *brw = brw_context(ctx); - struct gl_framebuffer *fb = ctx->DrawBuffer; - const struct intel_device_info *devinfo = &brw->screen->devinfo; - bool partial_clear = ctx->Scissor.EnableFlags && !noop_scissor(fb); - - if (!_mesa_check_conditional_render(ctx)) - return; - - if (mask & (BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_FRONT_RIGHT)) { - brw->front_buffer_dirty = true; - } - - brw_prepare_render(brw); - brw_workaround_depthstencil_alignment(brw, partial_clear ? 0 : mask); - - if (mask & BUFFER_BIT_DEPTH) { - if (brw_fast_clear_depth(ctx)) { - DBG("fast clear: depth\n"); - mask &= ~BUFFER_BIT_DEPTH; - } - } - - if (mask & BUFFER_BITS_COLOR) { - brw_blorp_clear_color(brw, fb, mask, partial_clear, - ctx->Color.sRGBEnabled); - debug_mask("blorp color", mask & BUFFER_BITS_COLOR); - mask &= ~BUFFER_BITS_COLOR; - } - - if (devinfo->ver >= 6 && (mask & BUFFER_BITS_DEPTH_STENCIL)) { - brw_blorp_clear_depth_stencil(brw, fb, mask, partial_clear); - debug_mask("blorp depth/stencil", mask & BUFFER_BITS_DEPTH_STENCIL); - mask &= ~BUFFER_BITS_DEPTH_STENCIL; - } - - GLbitfield tri_mask = mask & (BUFFER_BIT_STENCIL | - BUFFER_BIT_DEPTH); - - if (tri_mask) { - debug_mask("tri", tri_mask); - mask &= ~tri_mask; - _mesa_meta_glsl_Clear(&brw->ctx, tri_mask); - } - - /* Any strange buffers get passed off to swrast. The only thing that - * should be left at this point is the accumulation buffer. - */ - assert((mask & ~BUFFER_BIT_ACCUM) == 0); - if (mask) { - debug_mask("swrast", mask); - _swrast_Clear(ctx, mask); - } -} - - -void -brw_init_clear_functions(struct dd_function_table *functions) -{ - functions->Clear = brw_clear; -} diff --git a/src/mesa/drivers/dri/i965/brw_clip.c b/src/mesa/drivers/dri/i965/brw_clip.c deleted file mode 100644 index fa97f9a..0000000 --- a/src/mesa/drivers/dri/i965/brw_clip.c +++ /dev/null @@ -1,210 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - -#include "main/macros.h" -#include "main/enums.h" - -#include "brw_batch.h" - -#include "brw_defines.h" -#include "brw_context.h" -#include "brw_util.h" -#include "brw_state.h" -#include "compiler/brw_eu.h" - -#include "util/ralloc.h" - -static void -compile_clip_prog(struct brw_context *brw, struct brw_clip_prog_key *key) -{ - const unsigned *program; - void *mem_ctx; - unsigned program_size; - - mem_ctx = ralloc_context(NULL); - - struct brw_clip_prog_data prog_data; - program = brw_compile_clip(brw->screen->compiler, mem_ctx, key, &prog_data, - &brw->vue_map_geom_out, &program_size); - - brw_upload_cache(&brw->cache, - BRW_CACHE_CLIP_PROG, - key, sizeof(*key), - program, program_size, - &prog_data, sizeof(prog_data), - &brw->clip.prog_offset, &brw->clip.prog_data); - ralloc_free(mem_ctx); -} - -/* Calculate interpolants for triangle and line rasterization. - */ -void -brw_upload_clip_prog(struct brw_context *brw) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - struct gl_context *ctx = &brw->ctx; - struct brw_clip_prog_key key; - - if (!brw_state_dirty(brw, - _NEW_BUFFERS | - _NEW_LIGHT | - _NEW_POLYGON | - _NEW_TRANSFORM, - BRW_NEW_BLORP | - BRW_NEW_FS_PROG_DATA | - BRW_NEW_REDUCED_PRIMITIVE | - BRW_NEW_VUE_MAP_GEOM_OUT)) - return; - - memset(&key, 0, sizeof(key)); - - /* Populate the key: - */ - - /* BRW_NEW_FS_PROG_DATA */ - const struct brw_wm_prog_data *wm_prog_data = - brw_wm_prog_data(brw->wm.base.prog_data); - if (wm_prog_data) { - key.contains_flat_varying = wm_prog_data->contains_flat_varying; - key.contains_noperspective_varying = - wm_prog_data->contains_noperspective_varying; - - STATIC_ASSERT(sizeof(key.interp_mode) == - sizeof(wm_prog_data->interp_mode)); - memcpy(key.interp_mode, wm_prog_data->interp_mode, - sizeof(key.interp_mode)); - } - - /* BRW_NEW_REDUCED_PRIMITIVE */ - key.primitive = brw->reduced_primitive; - /* BRW_NEW_VUE_MAP_GEOM_OUT */ - key.attrs = brw->vue_map_geom_out.slots_valid; - - /* _NEW_LIGHT */ - key.pv_first = (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION); - /* _NEW_TRANSFORM (also part of VUE map)*/ - if (ctx->Transform.ClipPlanesEnabled) - key.nr_userclip = util_logbase2(ctx->Transform.ClipPlanesEnabled) + 1; - - if (devinfo->ver == 5) - key.clip_mode = BRW_CLIP_MODE_KERNEL_CLIP; - else - key.clip_mode = BRW_CLIP_MODE_NORMAL; - - /* _NEW_POLYGON */ - if (key.primitive == GL_TRIANGLES) { - if (ctx->Polygon.CullFlag && - ctx->Polygon.CullFaceMode == GL_FRONT_AND_BACK) - key.clip_mode = BRW_CLIP_MODE_REJECT_ALL; - else { - GLuint fill_front = BRW_CLIP_FILL_MODE_CULL; - GLuint fill_back = BRW_CLIP_FILL_MODE_CULL; - GLuint offset_front = 0; - GLuint offset_back = 0; - - if (!ctx->Polygon.CullFlag || - ctx->Polygon.CullFaceMode != GL_FRONT) { - switch (ctx->Polygon.FrontMode) { - case GL_FILL: - fill_front = BRW_CLIP_FILL_MODE_FILL; - offset_front = 0; - break; - case GL_LINE: - fill_front = BRW_CLIP_FILL_MODE_LINE; - offset_front = ctx->Polygon.OffsetLine; - break; - case GL_POINT: - fill_front = BRW_CLIP_FILL_MODE_POINT; - offset_front = ctx->Polygon.OffsetPoint; - break; - } - } - - if (!ctx->Polygon.CullFlag || - ctx->Polygon.CullFaceMode != GL_BACK) { - switch (ctx->Polygon.BackMode) { - case GL_FILL: - fill_back = BRW_CLIP_FILL_MODE_FILL; - offset_back = 0; - break; - case GL_LINE: - fill_back = BRW_CLIP_FILL_MODE_LINE; - offset_back = ctx->Polygon.OffsetLine; - break; - case GL_POINT: - fill_back = BRW_CLIP_FILL_MODE_POINT; - offset_back = ctx->Polygon.OffsetPoint; - break; - } - } - - if (ctx->Polygon.BackMode != GL_FILL || - ctx->Polygon.FrontMode != GL_FILL) { - key.do_unfilled = 1; - - /* Most cases the fixed function units will handle. Cases where - * one or more polygon faces are unfilled will require help: - */ - key.clip_mode = BRW_CLIP_MODE_CLIP_NON_REJECTED; - - if (offset_back || offset_front) { - /* _NEW_POLYGON, _NEW_BUFFERS */ - key.offset_units = ctx->Polygon.OffsetUnits * ctx->DrawBuffer->_MRD * 2; - key.offset_factor = ctx->Polygon.OffsetFactor * ctx->DrawBuffer->_MRD; - key.offset_clamp = ctx->Polygon.OffsetClamp * ctx->DrawBuffer->_MRD; - } - - if (!brw->polygon_front_bit) { - key.fill_ccw = fill_front; - key.fill_cw = fill_back; - key.offset_ccw = offset_front; - key.offset_cw = offset_back; - if (ctx->Light.Model.TwoSide && - key.fill_cw != BRW_CLIP_FILL_MODE_CULL) - key.copy_bfc_cw = 1; - } else { - key.fill_cw = fill_front; - key.fill_ccw = fill_back; - key.offset_cw = offset_front; - key.offset_ccw = offset_back; - if (ctx->Light.Model.TwoSide && - key.fill_ccw != BRW_CLIP_FILL_MODE_CULL) - key.copy_bfc_ccw = 1; - } - } - } - } - - if (!brw_search_cache(&brw->cache, BRW_CACHE_CLIP_PROG, &key, sizeof(key), - &brw->clip.prog_offset, &brw->clip.prog_data, true)) { - compile_clip_prog( brw, &key ); - } -} diff --git a/src/mesa/drivers/dri/i965/brw_compute.c b/src/mesa/drivers/dri/i965/brw_compute.c deleted file mode 100644 index 0b0ecbd..0000000 --- a/src/mesa/drivers/dri/i965/brw_compute.c +++ /dev/null @@ -1,151 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include - -#include "main/condrender.h" -#include "main/mtypes.h" -#include "main/state.h" -#include "brw_context.h" -#include "brw_draw.h" -#include "brw_state.h" -#include "brw_batch.h" -#include "brw_buffer_objects.h" -#include "brw_defines.h" - - -static void -brw_dispatch_compute_common(struct gl_context *ctx) -{ - struct brw_context *brw = brw_context(ctx); - bool fail_next; - - if (!_mesa_check_conditional_render(ctx)) - return; - - if (ctx->NewState) - _mesa_update_state(ctx); - - brw_validate_textures(brw); - - brw_predraw_resolve_inputs(brw, false, NULL); - - /* Flush the batch if the batch/state buffers are nearly full. We can - * grow them if needed, but this is not free, so we'd like to avoid it. - */ - brw_batch_require_space(brw, 600); - brw_require_statebuffer_space(brw, 2500); - brw_batch_save_state(brw); - fail_next = brw_batch_saved_state_is_empty(brw); - - retry: - brw->batch.no_wrap = true; - brw_upload_compute_state(brw); - - brw->vtbl.emit_compute_walker(brw); - - brw->batch.no_wrap = false; - - if (!brw_batch_has_aperture_space(brw, 0)) { - if (!fail_next) { - brw_batch_reset_to_saved(brw); - brw_batch_flush(brw); - fail_next = true; - goto retry; - } else { - int ret = brw_batch_flush(brw); - WARN_ONCE(ret == -ENOSPC, - "i965: Single compute shader dispatch " - "exceeded available aperture space\n"); - } - } - - /* Now that we know we haven't run out of aperture space, we can safely - * reset the dirty bits. - */ - brw_compute_state_finished(brw); - - if (brw->always_flush_batch) - brw_batch_flush(brw); - - brw_program_cache_check_size(brw); - - /* Note: since compute shaders can't write to framebuffers, there's no need - * to call brw_postdraw_set_buffers_need_resolve(). - */ -} - -static void -brw_dispatch_compute(struct gl_context *ctx, const GLuint *num_groups) { - struct brw_context *brw = brw_context(ctx); - - brw->compute.num_work_groups_bo = NULL; - brw->compute.num_work_groups = num_groups; - brw->compute.group_size = NULL; - ctx->NewDriverState |= BRW_NEW_CS_WORK_GROUPS; - - brw_dispatch_compute_common(ctx); -} - -static void -brw_dispatch_compute_indirect(struct gl_context *ctx, GLintptr indirect) -{ - struct brw_context *brw = brw_context(ctx); - static const GLuint indirect_group_counts[3] = { 0, 0, 0 }; - struct gl_buffer_object *indirect_buffer = ctx->DispatchIndirectBuffer; - struct brw_bo *bo = - brw_bufferobj_buffer(brw, - brw_buffer_object(indirect_buffer), - indirect, 3 * sizeof(GLuint), false); - - brw->compute.num_work_groups_bo = bo; - brw->compute.num_work_groups_offset = indirect; - brw->compute.num_work_groups = indirect_group_counts; - brw->compute.group_size = NULL; - ctx->NewDriverState |= BRW_NEW_CS_WORK_GROUPS; - - brw_dispatch_compute_common(ctx); -} - -static void -brw_dispatch_compute_group_size(struct gl_context *ctx, - const GLuint *num_groups, - const GLuint *group_size) -{ - struct brw_context *brw = brw_context(ctx); - - brw->compute.num_work_groups_bo = NULL; - brw->compute.num_work_groups = num_groups; - brw->compute.group_size = group_size; - ctx->NewDriverState |= BRW_NEW_CS_WORK_GROUPS; - - brw_dispatch_compute_common(ctx); -} - -void -brw_init_compute_functions(struct dd_function_table *functions) -{ - functions->DispatchCompute = brw_dispatch_compute; - functions->DispatchComputeIndirect = brw_dispatch_compute_indirect; - functions->DispatchComputeGroupSize = brw_dispatch_compute_group_size; -} diff --git a/src/mesa/drivers/dri/i965/brw_conditional_render.c b/src/mesa/drivers/dri/i965/brw_conditional_render.c deleted file mode 100644 index 2736624..0000000 --- a/src/mesa/drivers/dri/i965/brw_conditional_render.c +++ /dev/null @@ -1,193 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Neil Roberts - */ - -/** @file brw_conditional_render.c - * - * Support for conditional rendering based on query objects - * (GL_NV_conditional_render, GL_ARB_conditional_render_inverted) on Gfx7+. - */ - -#include "main/condrender.h" - -#include "brw_context.h" -#include "brw_defines.h" -#include "brw_batch.h" - -static void -set_predicate_enable(struct brw_context *brw, - bool value) -{ - if (value) - brw->predicate.state = BRW_PREDICATE_STATE_RENDER; - else - brw->predicate.state = BRW_PREDICATE_STATE_DONT_RENDER; -} - -static void -set_predicate_for_overflow_query(struct brw_context *brw, - struct brw_query_object *query, - int stream_start, int count) -{ - if (!can_do_mi_math_and_lrr(brw->screen)) { - brw->predicate.state = BRW_PREDICATE_STATE_STALL_FOR_QUERY; - return; - } - - brw->predicate.state = BRW_PREDICATE_STATE_USE_BIT; - - /* Needed to ensure the memory is coherent for the MI_LOAD_REGISTER_MEM - * command when loading the values into the predicate source registers for - * conditional rendering. - */ - brw_emit_pipe_control_flush(brw, PIPE_CONTROL_FLUSH_ENABLE); - - hsw_overflow_result_to_gpr0(brw, query, count); - brw_load_register_reg64(brw, MI_PREDICATE_SRC0, HSW_CS_GPR(0)); - brw_load_register_imm64(brw, MI_PREDICATE_SRC1, 0ull); -} - -static void -set_predicate_for_occlusion_query(struct brw_context *brw, - struct brw_query_object *query) -{ - if (!brw->predicate.supported) { - brw->predicate.state = BRW_PREDICATE_STATE_STALL_FOR_QUERY; - return; - } - - brw->predicate.state = BRW_PREDICATE_STATE_USE_BIT; - - /* Needed to ensure the memory is coherent for the MI_LOAD_REGISTER_MEM - * command when loading the values into the predicate source registers for - * conditional rendering. - */ - brw_emit_pipe_control_flush(brw, PIPE_CONTROL_FLUSH_ENABLE); - - brw_load_register_mem64(brw, MI_PREDICATE_SRC0, query->bo, 0 /* offset */); - brw_load_register_mem64(brw, MI_PREDICATE_SRC1, query->bo, 8 /* offset */); -} - -static void -set_predicate_for_result(struct brw_context *brw, - struct brw_query_object *query, - bool inverted) -{ - int load_op; - - assert(query->bo != NULL); - - switch (query->Base.Target) { - case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB: - set_predicate_for_overflow_query(brw, query, 0, 1); - break; - case GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB: - set_predicate_for_overflow_query(brw, query, 0, MAX_VERTEX_STREAMS); - break; - default: - set_predicate_for_occlusion_query(brw, query); - } - - if (brw->predicate.state == BRW_PREDICATE_STATE_USE_BIT) { - if (inverted) - load_op = MI_PREDICATE_LOADOP_LOAD; - else - load_op = MI_PREDICATE_LOADOP_LOADINV; - - BEGIN_BATCH(1); - OUT_BATCH(GFX7_MI_PREDICATE | - load_op | - MI_PREDICATE_COMBINEOP_SET | - MI_PREDICATE_COMPAREOP_SRCS_EQUAL); - ADVANCE_BATCH(); - } -} - -static void -brw_begin_conditional_render(struct gl_context *ctx, - struct gl_query_object *q, - GLenum mode) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_query_object *query = (struct brw_query_object *) q; - bool inverted; - - switch (mode) { - case GL_QUERY_WAIT: - case GL_QUERY_NO_WAIT: - case GL_QUERY_BY_REGION_WAIT: - case GL_QUERY_BY_REGION_NO_WAIT: - inverted = false; - break; - case GL_QUERY_WAIT_INVERTED: - case GL_QUERY_NO_WAIT_INVERTED: - case GL_QUERY_BY_REGION_WAIT_INVERTED: - case GL_QUERY_BY_REGION_NO_WAIT_INVERTED: - inverted = true; - break; - default: - unreachable("Unexpected conditional render mode"); - } - - /* If there are already samples from a BLT operation or if the query object - * is ready then we can avoid looking at the values in the buffer and just - * decide whether to draw using the CPU without stalling. - */ - if (query->Base.Result || query->Base.Ready) - set_predicate_enable(brw, (query->Base.Result != 0) ^ inverted); - else - set_predicate_for_result(brw, query, inverted); -} - -static void -brw_end_conditional_render(struct gl_context *ctx, - struct gl_query_object *q) -{ - struct brw_context *brw = brw_context(ctx); - - /* When there is no longer a conditional render in progress it should - * always render. - */ - brw->predicate.state = BRW_PREDICATE_STATE_RENDER; -} - -void -brw_init_conditional_render_functions(struct dd_function_table *functions) -{ - functions->BeginConditionalRender = brw_begin_conditional_render; - functions->EndConditionalRender = brw_end_conditional_render; -} - -bool -brw_check_conditional_render(struct brw_context *brw) -{ - if (brw->predicate.state == BRW_PREDICATE_STATE_STALL_FOR_QUERY) { - perf_debug("Conditional rendering is implemented in software and may " - "stall.\n"); - return _mesa_check_conditional_render(&brw->ctx); - } - - return brw->predicate.state != BRW_PREDICATE_STATE_DONT_RENDER; -} diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c deleted file mode 100644 index af8b349..0000000 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ /dev/null @@ -1,1975 +0,0 @@ -/* - Copyright 2003 VMware, Inc. - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - - -#include "compiler/nir/nir.h" -#include "main/api_exec.h" -#include "main/context.h" -#include "main/fbobject.h" -#include "main/extensions.h" -#include "main/glthread.h" -#include "main/macros.h" -#include "main/points.h" -#include "main/version.h" -#include "main/vtxfmt.h" -#include "main/texobj.h" -#include "main/framebuffer.h" -#include "main/stencil.h" -#include "main/state.h" -#include "main/spirv_extensions.h" -#include "main/externalobjects.h" - -#include "vbo/vbo.h" - -#include "drivers/common/driverfuncs.h" -#include "drivers/common/meta.h" -#include "utils.h" - -#include "brw_context.h" -#include "brw_defines.h" -#include "brw_blorp.h" -#include "brw_draw.h" -#include "brw_state.h" - -#include "brw_batch.h" -#include "brw_buffer_objects.h" -#include "brw_buffers.h" -#include "brw_fbo.h" -#include "brw_mipmap_tree.h" -#include "brw_pixel.h" -#include "brw_image.h" -#include "brw_tex.h" -#include "brw_tex_obj.h" - -#include "swrast_setup/swrast_setup.h" -#include "tnl/tnl.h" -#include "tnl/t_pipeline.h" -#include "util/ralloc.h" -#include "util/debug.h" -#include "util/disk_cache.h" -#include "util/u_memory.h" -#include "isl/isl.h" - -#include "common/intel_defines.h" -#include "common/intel_uuid.h" - -#include "compiler/spirv/nir_spirv.h" -/*************************************** - * Mesa's Driver Functions - ***************************************/ - -const char *const brw_vendor_string = "Intel Open Source Technology Center"; - -const char * -brw_get_renderer_string(const struct brw_screen *screen) -{ - static char buf[128]; - const char *name = screen->devinfo.name; - - if (!name) - name = "Intel Unknown"; - - snprintf(buf, sizeof(buf), "Mesa DRI %s", name); - - return buf; -} - -static const GLubyte * -brw_get_string(struct gl_context * ctx, GLenum name) -{ - const struct brw_context *const brw = brw_context(ctx); - - switch (name) { - case GL_VENDOR: - return (GLubyte *) brw_vendor_string; - - case GL_RENDERER: - return - (GLubyte *) brw_get_renderer_string(brw->screen); - - default: - return NULL; - } -} - -static void -brw_set_background_context(struct gl_context *ctx, - UNUSED struct util_queue_monitoring *queue_info) -{ - struct brw_context *brw = brw_context(ctx); - __DRIcontext *driContext = brw->driContext; - __DRIscreen *driScreen = driContext->driScreenPriv; - const __DRIbackgroundCallableExtension *backgroundCallable = - driScreen->dri2.backgroundCallable; - - /* Note: Mesa will only call this function if we've called - * _mesa_enable_multithreading(). We only do that if the loader exposed - * the __DRI_BACKGROUND_CALLABLE extension. So we know that - * backgroundCallable is not NULL. - */ - backgroundCallable->setBackgroundContext(driContext->loaderPrivate); -} - -static struct gl_memory_object * -brw_new_memoryobj(struct gl_context *ctx, GLuint name) -{ - struct brw_memory_object *memory_object = CALLOC_STRUCT(brw_memory_object); - if (!memory_object) - return NULL; - - _mesa_initialize_memory_object(ctx, &memory_object->Base, name); - return &memory_object->Base; -} - -static void -brw_delete_memoryobj(struct gl_context *ctx, struct gl_memory_object *memObj) -{ - struct brw_memory_object *memory_object = brw_memory_object(memObj); - brw_bo_unreference(memory_object->bo); - _mesa_delete_memory_object(ctx, memObj); -} - -static void -brw_import_memoryobj_fd(struct gl_context *ctx, - struct gl_memory_object *obj, - GLuint64 size, - int fd) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_memory_object *memory_object = brw_memory_object(obj); - - memory_object->bo = brw_bo_gem_create_from_prime(brw->bufmgr, fd); - brw_bo_reference(memory_object->bo); - assert(memory_object->bo->size >= size); - close(fd); -} - -static void -brw_viewport(struct gl_context *ctx) -{ - struct brw_context *brw = brw_context(ctx); - __DRIcontext *driContext = brw->driContext; - - if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) { - if (driContext->driDrawablePriv) - dri2InvalidateDrawable(driContext->driDrawablePriv); - if (driContext->driReadablePriv) - dri2InvalidateDrawable(driContext->driReadablePriv); - } -} - -static void -brw_update_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb) -{ - struct brw_context *brw = brw_context(ctx); - - /* Quantize the derived default number of samples - */ - fb->DefaultGeometry._NumSamples = - brw_quantize_num_samples(brw->screen, fb->DefaultGeometry.NumSamples); -} - -static void -brw_update_state(struct gl_context * ctx) -{ - GLuint new_state = ctx->NewState; - struct brw_context *brw = brw_context(ctx); - - if (ctx->swrast_context) - _swrast_InvalidateState(ctx, new_state); - - brw->NewGLState |= new_state; - - if (new_state & (_NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT)) - _mesa_update_draw_buffer_bounds(ctx, ctx->DrawBuffer); - - if (new_state & (_NEW_STENCIL | _NEW_BUFFERS)) { - brw->stencil_enabled = _mesa_stencil_is_enabled(ctx); - brw->stencil_two_sided = _mesa_stencil_is_two_sided(ctx); - brw->stencil_write_enabled = - _mesa_stencil_is_write_enabled(ctx, brw->stencil_two_sided); - } - - if (new_state & _NEW_POLYGON) - brw->polygon_front_bit = _mesa_polygon_get_front_bit(ctx); - - if (new_state & _NEW_BUFFERS) { - brw_update_framebuffer(ctx, ctx->DrawBuffer); - if (ctx->DrawBuffer != ctx->ReadBuffer) - brw_update_framebuffer(ctx, ctx->ReadBuffer); - } -} - -#define flushFront(screen) ((screen)->image.loader ? (screen)->image.loader->flushFrontBuffer : (screen)->dri2.loader->flushFrontBuffer) - -static void -brw_flush_front(struct gl_context *ctx) -{ - struct brw_context *brw = brw_context(ctx); - __DRIcontext *driContext = brw->driContext; - __DRIdrawable *driDrawable = driContext->driDrawablePriv; - __DRIscreen *const dri_screen = brw->screen->driScrnPriv; - - if (brw->front_buffer_dirty && _mesa_is_winsys_fbo(ctx->DrawBuffer)) { - if (flushFront(dri_screen) && driDrawable && - driDrawable->loaderPrivate) { - - /* Resolve before flushing FAKE_FRONT_LEFT to FRONT_LEFT. - * - * This potentially resolves both front and back buffer. It - * is unnecessary to resolve the back, but harms nothing except - * performance. And no one cares about front-buffer render - * performance. - */ - brw_resolve_for_dri2_flush(brw, driDrawable); - brw_batch_flush(brw); - - flushFront(dri_screen)(driDrawable, driDrawable->loaderPrivate); - - /* We set the dirty bit in brw_prepare_render() if we're - * front buffer rendering once we get there. - */ - brw->front_buffer_dirty = false; - } - } -} - -static void -brw_display_shared_buffer(struct brw_context *brw) -{ - __DRIcontext *dri_context = brw->driContext; - __DRIdrawable *dri_drawable = dri_context->driDrawablePriv; - __DRIscreen *dri_screen = brw->screen->driScrnPriv; - int fence_fd = -1; - - if (!brw->is_shared_buffer_bound) - return; - - if (!brw->is_shared_buffer_dirty) - return; - - if (brw->screen->has_exec_fence) { - /* This function is always called during a flush operation, so there is - * no need to flush again here. But we want to provide a fence_fd to the - * loader, and a redundant flush is the easiest way to acquire one. - */ - if (brw_batch_flush_fence(brw, -1, &fence_fd)) - return; - } - - dri_screen->mutableRenderBuffer.loader - ->displaySharedBuffer(dri_drawable, fence_fd, - dri_drawable->loaderPrivate); - brw->is_shared_buffer_dirty = false; -} - -static void -brw_glFlush(struct gl_context *ctx, unsigned gallium_flush_flags) -{ - struct brw_context *brw = brw_context(ctx); - - brw_batch_flush(brw); - brw_flush_front(ctx); - brw_display_shared_buffer(brw); - brw->need_flush_throttle = true; -} - -static void -brw_glEnable(struct gl_context *ctx, GLenum cap, GLboolean state) -{ - struct brw_context *brw = brw_context(ctx); - - switch (cap) { - case GL_BLACKHOLE_RENDER_INTEL: - brw->frontend_noop = state; - brw_batch_flush(brw); - brw_batch_maybe_noop(brw); - /* Because we started previous batches with a potential - * MI_BATCH_BUFFER_END if NOOP was enabled, that means that anything - * that was ever emitted after that never made it to the HW. So when the - * blackhole state changes from NOOP->!NOOP reupload the entire state. - */ - if (!brw->frontend_noop) { - brw->NewGLState = ~0u; - brw->ctx.NewDriverState = ~0ull; - } - break; - default: - break; - } -} - -static void -brw_finish(struct gl_context * ctx) -{ - struct brw_context *brw = brw_context(ctx); - - brw_glFlush(ctx, 0); - - if (brw->batch.last_bo) - brw_bo_wait_rendering(brw->batch.last_bo); -} - -static void -brw_get_device_uuid(struct gl_context *ctx, char *uuid) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_screen *screen = brw->screen; - - assert(GL_UUID_SIZE_EXT >= PIPE_UUID_SIZE); - memset(uuid, 0, GL_UUID_SIZE_EXT); - intel_uuid_compute_device_id((uint8_t *)uuid, &screen->isl_dev, PIPE_UUID_SIZE); -} - - -static void -brw_get_driver_uuid(struct gl_context *ctx, char *uuid) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_screen *screen = brw->screen; - - assert(GL_UUID_SIZE_EXT >= PIPE_UUID_SIZE); - memset(uuid, 0, GL_UUID_SIZE_EXT); - intel_uuid_compute_driver_id((uint8_t *)uuid, &screen->devinfo, PIPE_UUID_SIZE); -} - -static void -brw_init_driver_functions(struct brw_context *brw, - struct dd_function_table *functions) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - _mesa_init_driver_functions(functions); - - /* GLX uses DRI2 invalidate events to handle window resizing. - * Unfortunately, EGL does not - libEGL is written in XCB (not Xlib), - * which doesn't provide a mechanism for snooping the event queues. - * - * So EGL still relies on viewport hacks to handle window resizing. - * This should go away with DRI3000. - */ - if (!brw->driContext->driScreenPriv->dri2.useInvalidate) - functions->Viewport = brw_viewport; - - functions->Enable = brw_glEnable; - functions->Flush = brw_glFlush; - functions->Finish = brw_finish; - functions->GetString = brw_get_string; - functions->UpdateState = brw_update_state; - - brw_init_draw_functions(functions); - brw_init_texture_functions(functions); - brw_init_texture_image_functions(functions); - brw_init_texture_copy_image_functions(functions); - brw_init_copy_image_functions(functions); - brw_init_clear_functions(functions); - brw_init_buffer_functions(functions); - brw_init_pixel_functions(functions); - brw_init_buffer_object_functions(functions); - brw_init_syncobj_functions(functions); - brw_init_object_purgeable_functions(functions); - - brw_init_frag_prog_functions(functions); - brw_init_common_queryobj_functions(functions); - if (devinfo->verx10 >= 75) - hsw_init_queryobj_functions(functions); - else if (devinfo->ver >= 6) - gfx6_init_queryobj_functions(functions); - else - gfx4_init_queryobj_functions(functions); - brw_init_compute_functions(functions); - brw_init_conditional_render_functions(functions); - - functions->GenerateMipmap = brw_generate_mipmap; - - functions->QueryInternalFormat = brw_query_internal_format; - - functions->NewTransformFeedback = brw_new_transform_feedback; - functions->DeleteTransformFeedback = brw_delete_transform_feedback; - if (can_do_mi_math_and_lrr(brw->screen)) { - functions->BeginTransformFeedback = hsw_begin_transform_feedback; - functions->EndTransformFeedback = hsw_end_transform_feedback; - functions->PauseTransformFeedback = hsw_pause_transform_feedback; - functions->ResumeTransformFeedback = hsw_resume_transform_feedback; - } else if (devinfo->ver >= 7) { - functions->BeginTransformFeedback = gfx7_begin_transform_feedback; - functions->EndTransformFeedback = gfx7_end_transform_feedback; - functions->PauseTransformFeedback = gfx7_pause_transform_feedback; - functions->ResumeTransformFeedback = gfx7_resume_transform_feedback; - functions->GetTransformFeedbackVertexCount = - brw_get_transform_feedback_vertex_count; - } else { - functions->BeginTransformFeedback = brw_begin_transform_feedback; - functions->EndTransformFeedback = brw_end_transform_feedback; - functions->PauseTransformFeedback = brw_pause_transform_feedback; - functions->ResumeTransformFeedback = brw_resume_transform_feedback; - functions->GetTransformFeedbackVertexCount = - brw_get_transform_feedback_vertex_count; - } - - if (devinfo->ver >= 6) - functions->GetSamplePosition = gfx6_get_sample_position; - - /* GL_ARB_get_program_binary */ - brw_program_binary_init(brw->screen->deviceID); - functions->GetProgramBinaryDriverSHA1 = brw_get_program_binary_driver_sha1; - functions->ProgramBinarySerializeDriverBlob = brw_serialize_program_binary; - functions->ProgramBinaryDeserializeDriverBlob = - brw_deserialize_program_binary; - - if (brw->screen->disk_cache) { - functions->ShaderCacheSerializeDriverBlob = brw_program_serialize_nir; - } - - functions->SetBackgroundContext = brw_set_background_context; - - functions->NewMemoryObject = brw_new_memoryobj; - functions->DeleteMemoryObject = brw_delete_memoryobj; - functions->ImportMemoryObjectFd = brw_import_memoryobj_fd; - functions->GetDeviceUuid = brw_get_device_uuid; - functions->GetDriverUuid = brw_get_driver_uuid; -} - -static void -brw_initialize_spirv_supported_capabilities(struct brw_context *brw) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - struct gl_context *ctx = &brw->ctx; - - /* The following SPIR-V capabilities are only supported on gfx7+. In theory - * you should enable the extension only on gfx7+, but just in case let's - * assert it. - */ - assert(devinfo->ver >= 7); - - ctx->Const.SpirVCapabilities.atomic_storage = devinfo->ver >= 7; - ctx->Const.SpirVCapabilities.draw_parameters = true; - ctx->Const.SpirVCapabilities.float64 = devinfo->ver >= 8; - ctx->Const.SpirVCapabilities.geometry_streams = devinfo->ver >= 7; - ctx->Const.SpirVCapabilities.image_write_without_format = true; - ctx->Const.SpirVCapabilities.int64 = devinfo->ver >= 8; - ctx->Const.SpirVCapabilities.tessellation = true; - ctx->Const.SpirVCapabilities.transform_feedback = devinfo->ver >= 7; - ctx->Const.SpirVCapabilities.variable_pointers = true; - ctx->Const.SpirVCapabilities.integer_functions2 = devinfo->ver >= 8; -} - -static void -brw_initialize_context_constants(struct brw_context *brw) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - struct gl_context *ctx = &brw->ctx; - const struct brw_compiler *compiler = brw->screen->compiler; - - const bool stage_exists[MESA_SHADER_STAGES] = { - [MESA_SHADER_VERTEX] = true, - [MESA_SHADER_TESS_CTRL] = devinfo->ver >= 7, - [MESA_SHADER_TESS_EVAL] = devinfo->ver >= 7, - [MESA_SHADER_GEOMETRY] = devinfo->ver >= 6, - [MESA_SHADER_FRAGMENT] = true, - [MESA_SHADER_COMPUTE] = - (_mesa_is_desktop_gl(ctx) && - ctx->Const.MaxComputeWorkGroupSize[0] >= 1024) || - (ctx->API == API_OPENGLES2 && - ctx->Const.MaxComputeWorkGroupSize[0] >= 128), - }; - - unsigned num_stages = 0; - for (int i = 0; i < MESA_SHADER_STAGES; i++) { - if (stage_exists[i]) - num_stages++; - } - - unsigned max_samplers = - devinfo->verx10 >= 75 ? BRW_MAX_TEX_UNIT : 16; - - ctx->Const.MaxDualSourceDrawBuffers = 1; - ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS; - ctx->Const.MaxCombinedShaderOutputResources = - MAX_IMAGE_UNITS + BRW_MAX_DRAW_BUFFERS; - - /* The timestamp register we can read for glGetTimestamp() is - * sometimes only 32 bits, before scaling to nanoseconds (depending - * on kernel). - * - * Once scaled to nanoseconds the timestamp would roll over at a - * non-power-of-two, so an application couldn't use - * GL_QUERY_COUNTER_BITS to handle rollover correctly. Instead, we - * report 36 bits and truncate at that (rolling over 5 times as - * often as the HW counter), and when the 32-bit counter rolls - * over, it happens to also be at a rollover in the reported value - * from near (1<<36) to 0. - * - * The low 32 bits rolls over in ~343 seconds. Our 36-bit result - * rolls over every ~69 seconds. - */ - ctx->Const.QueryCounterBits.Timestamp = 36; - - ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */ - ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS; - if (devinfo->ver >= 7) { - ctx->Const.MaxRenderbufferSize = 16384; - ctx->Const.MaxTextureSize = 16384; - ctx->Const.MaxCubeTextureLevels = 15; /* 16384 */ - } else { - ctx->Const.MaxRenderbufferSize = 8192; - ctx->Const.MaxTextureSize = 8192; - ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */ - } - ctx->Const.Max3DTextureLevels = 12; /* 2048 */ - ctx->Const.MaxArrayTextureLayers = devinfo->ver >= 7 ? 2048 : 512; - ctx->Const.MaxTextureMbytes = 1536; - ctx->Const.MaxTextureRectSize = devinfo->ver >= 7 ? 16384 : 8192; - ctx->Const.MaxTextureMaxAnisotropy = 16.0; - ctx->Const.MaxTextureLodBias = 15.0; - ctx->Const.StripTextureBorder = true; - if (devinfo->ver >= 7) { - ctx->Const.MaxProgramTextureGatherComponents = 4; - ctx->Const.MinProgramTextureGatherOffset = -32; - ctx->Const.MaxProgramTextureGatherOffset = 31; - } else if (devinfo->ver == 6) { - ctx->Const.MaxProgramTextureGatherComponents = 1; - ctx->Const.MinProgramTextureGatherOffset = -8; - ctx->Const.MaxProgramTextureGatherOffset = 7; - } - - ctx->Const.MaxUniformBlockSize = 65536; - - for (int i = 0; i < MESA_SHADER_STAGES; i++) { - struct gl_program_constants *prog = &ctx->Const.Program[i]; - - if (!stage_exists[i]) - continue; - - prog->MaxTextureImageUnits = max_samplers; - - prog->MaxUniformBlocks = BRW_MAX_UBO; - prog->MaxCombinedUniformComponents = - prog->MaxUniformComponents + - ctx->Const.MaxUniformBlockSize / 4 * prog->MaxUniformBlocks; - - prog->MaxAtomicCounters = MAX_ATOMIC_COUNTERS; - prog->MaxAtomicBuffers = BRW_MAX_ABO; - prog->MaxImageUniforms = compiler->scalar_stage[i] ? BRW_MAX_IMAGES : 0; - prog->MaxShaderStorageBlocks = BRW_MAX_SSBO; - } - - ctx->Const.MaxTextureUnits = - MIN2(ctx->Const.MaxTextureCoordUnits, - ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits); - - ctx->Const.MaxUniformBufferBindings = num_stages * BRW_MAX_UBO; - ctx->Const.MaxCombinedUniformBlocks = num_stages * BRW_MAX_UBO; - ctx->Const.MaxCombinedAtomicBuffers = num_stages * BRW_MAX_ABO; - ctx->Const.MaxCombinedShaderStorageBlocks = num_stages * BRW_MAX_SSBO; - ctx->Const.MaxShaderStorageBufferBindings = num_stages * BRW_MAX_SSBO; - ctx->Const.MaxCombinedTextureImageUnits = num_stages * max_samplers; - ctx->Const.MaxCombinedImageUniforms = num_stages * BRW_MAX_IMAGES; - - - /* Hardware only supports a limited number of transform feedback buffers. - * So we need to override the Mesa default (which is based only on software - * limits). - */ - ctx->Const.MaxTransformFeedbackBuffers = BRW_MAX_SOL_BUFFERS; - - /* On Gfx6, in the worst case, we use up one binding table entry per - * transform feedback component (see comments above the definition of - * BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value - * for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to - * BRW_MAX_SOL_BINDINGS. - * - * In "separate components" mode, we need to divide this value by - * BRW_MAX_SOL_BUFFERS, so that the total number of binding table entries - * used up by all buffers will not exceed BRW_MAX_SOL_BINDINGS. - */ - ctx->Const.MaxTransformFeedbackInterleavedComponents = BRW_MAX_SOL_BINDINGS; - ctx->Const.MaxTransformFeedbackSeparateComponents = - BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS; - - ctx->Const.AlwaysUseGetTransformFeedbackVertexCount = - !can_do_mi_math_and_lrr(brw->screen); - - int max_samples; - const int *msaa_modes = brw_supported_msaa_modes(brw->screen); - const int clamp_max_samples = - driQueryOptioni(&brw->screen->optionCache, "clamp_max_samples"); - - if (clamp_max_samples < 0) { - max_samples = msaa_modes[0]; - } else { - /* Select the largest supported MSAA mode that does not exceed - * clamp_max_samples. - */ - max_samples = 0; - for (int i = 0; msaa_modes[i] != 0; ++i) { - if (msaa_modes[i] <= clamp_max_samples) { - max_samples = msaa_modes[i]; - break; - } - } - } - - ctx->Const.MaxSamples = max_samples; - ctx->Const.MaxColorTextureSamples = max_samples; - ctx->Const.MaxDepthTextureSamples = max_samples; - ctx->Const.MaxIntegerSamples = max_samples; - ctx->Const.MaxImageSamples = 0; - - ctx->Const.MinLineWidth = 1.0; - ctx->Const.MinLineWidthAA = 1.0; - if (devinfo->ver >= 6) { - ctx->Const.MaxLineWidth = 7.375; - ctx->Const.MaxLineWidthAA = 7.375; - ctx->Const.LineWidthGranularity = 0.125; - } else { - ctx->Const.MaxLineWidth = 7.0; - ctx->Const.MaxLineWidthAA = 7.0; - ctx->Const.LineWidthGranularity = 0.5; - } - - /* For non-antialiased lines, we have to round the line width to the - * nearest whole number. Make sure that we don't advertise a line - * width that, when rounded, will be beyond the actual hardware - * maximum. - */ - assert(roundf(ctx->Const.MaxLineWidth) <= ctx->Const.MaxLineWidth); - - ctx->Const.MinPointSize = 1.0; - ctx->Const.MinPointSizeAA = 1.0; - ctx->Const.MaxPointSize = 255.0; - ctx->Const.MaxPointSizeAA = 255.0; - ctx->Const.PointSizeGranularity = 1.0; - - if (devinfo->verx10 >= 45) - ctx->Const.MaxClipPlanes = 8; - - ctx->Const.GLSLFragCoordIsSysVal = true; - ctx->Const.GLSLFrontFacingIsSysVal = true; - ctx->Const.GLSLTessLevelsAsInputs = true; - ctx->Const.PrimitiveRestartForPatches = true; - - ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeInstructions = 16 * 1024; - ctx->Const.Program[MESA_SHADER_VERTEX].MaxAluInstructions = 0; - ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexInstructions = 0; - ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexIndirections = 0; - ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAluInstructions = 0; - ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexInstructions = 0; - ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexIndirections = 0; - ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAttribs = 16; - ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTemps = 256; - ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAddressRegs = 1; - ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters = 1024; - ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams = - MIN2(ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters, - ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams); - - ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeInstructions = 1024; - ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAluInstructions = 1024; - ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexInstructions = 1024; - ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexIndirections = 1024; - ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAttribs = 12; - ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTemps = 256; - ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAddressRegs = 0; - ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters = 1024; - ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams = - MIN2(ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters, - ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams); - - /* Fragment shaders use real, 32-bit twos-complement integers for all - * integer types. - */ - ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMin = 31; - ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMax = 30; - ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.Precision = 0; - ctx->Const.Program[MESA_SHADER_FRAGMENT].HighInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt; - ctx->Const.Program[MESA_SHADER_FRAGMENT].MediumInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt; - - ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMin = 31; - ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMax = 30; - ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.Precision = 0; - ctx->Const.Program[MESA_SHADER_VERTEX].HighInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt; - ctx->Const.Program[MESA_SHADER_VERTEX].MediumInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt; - - /* Gfx6 converts quads to polygon in beginning of 3D pipeline, - * but we're not sure how it's actually done for vertex order, - * that affect provoking vertex decision. Always use last vertex - * convention for quad primitive which works as expected for now. - */ - if (devinfo->ver >= 6) - ctx->Const.QuadsFollowProvokingVertexConvention = false; - - ctx->Const.NativeIntegers = true; - - /* Regarding the CMP instruction, the Ivybridge PRM says: - * - * "For each enabled channel 0b or 1b is assigned to the appropriate flag - * bit and 0/all zeros or all ones (e.g, byte 0xFF, word 0xFFFF, DWord - * 0xFFFFFFFF) is assigned to dst." - * - * but PRMs for earlier generations say - * - * "In dword format, one GRF may store up to 8 results. When the register - * is used later as a vector of Booleans, as only LSB at each channel - * contains meaning [sic] data, software should make sure all higher bits - * are masked out (e.g. by 'and-ing' an [sic] 0x01 constant)." - * - * We select the representation of a true boolean uniform to be ~0, and fix - * the results of Gen <= 5 CMP instruction's with -(result & 1). - */ - ctx->Const.UniformBooleanTrue = ~0; - - /* From the gfx4 PRM, volume 4 page 127: - * - * "For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies - * the base address of the first element of the surface, computed in - * software by adding the surface base address to the byte offset of - * the element in the buffer." - * - * However, unaligned accesses are slower, so enforce buffer alignment. - * - * In order to push UBO data, 3DSTATE_CONSTANT_XS imposes an additional - * restriction: the start of the buffer needs to be 32B aligned. - */ - ctx->Const.UniformBufferOffsetAlignment = 32; - - /* ShaderStorageBufferOffsetAlignment should be a cacheline (64 bytes) so - * that we can safely have the CPU and GPU writing the same SSBO on - * non-cachecoherent systems (our Atom CPUs). With UBOs, the GPU never - * writes, so there's no problem. For an SSBO, the GPU and the CPU can - * be updating disjoint regions of the buffer simultaneously and that will - * break if the regions overlap the same cacheline. - */ - ctx->Const.ShaderStorageBufferOffsetAlignment = 64; - ctx->Const.TextureBufferOffsetAlignment = 16; - ctx->Const.MaxTextureBufferSize = 128 * 1024 * 1024; - - if (devinfo->ver >= 6) { - ctx->Const.MaxVarying = 32; - ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 128; - ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents = - compiler->scalar_stage[MESA_SHADER_GEOMETRY] ? 128 : 64; - ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents = 128; - ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 128; - ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxInputComponents = 128; - ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxOutputComponents = 128; - ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxInputComponents = 128; - ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxOutputComponents = 128; - } - - /* We want the GLSL compiler to emit code that uses condition codes */ - for (int i = 0; i < MESA_SHADER_STAGES; i++) { - ctx->Const.ShaderCompilerOptions[i] = - brw->screen->compiler->glsl_compiler_options[i]; - } - - if (devinfo->ver >= 7) { - ctx->Const.MaxViewportWidth = 32768; - ctx->Const.MaxViewportHeight = 32768; - } - - /* ARB_viewport_array, OES_viewport_array */ - if (devinfo->ver >= 6) { - ctx->Const.MaxViewports = GFX6_NUM_VIEWPORTS; - ctx->Const.ViewportSubpixelBits = 8; - - /* Cast to float before negating because MaxViewportWidth is unsigned. - */ - ctx->Const.ViewportBounds.Min = -(float)ctx->Const.MaxViewportWidth; - ctx->Const.ViewportBounds.Max = ctx->Const.MaxViewportWidth; - } - - /* ARB_gpu_shader5 */ - if (devinfo->ver >= 7) - ctx->Const.MaxVertexStreams = MIN2(4, MAX_VERTEX_STREAMS); - - /* ARB_framebuffer_no_attachments */ - ctx->Const.MaxFramebufferWidth = 16384; - ctx->Const.MaxFramebufferHeight = 16384; - ctx->Const.MaxFramebufferLayers = ctx->Const.MaxArrayTextureLayers; - ctx->Const.MaxFramebufferSamples = max_samples; - - /* OES_primitive_bounding_box */ - ctx->Const.NoPrimitiveBoundingBoxOutput = true; - - /* TODO: We should be able to use STD430 packing by default on all hardware - * but some piglit tests [1] currently fail on SNB when this is enabled. - * The problem is the messages we're using for doing uniform pulls - * in the vec4 back-end on SNB is the OWORD block load instruction, which - * takes its offset in units of OWORDS (16 bytes). On IVB+, we use the - * sampler which doesn't have these restrictions. - * - * In the scalar back-end, we use the sampler for dynamic uniform loads and - * pull an entire cache line at a time for constant offset loads both of - * which support almost any alignment. - * - * [1] glsl-1.40/uniform_buffer/vs-float-array-variable-index.shader_test - */ - if (devinfo->ver >= 7) - ctx->Const.UseSTD430AsDefaultPacking = true; - - if (!(ctx->Const.ContextFlags & GL_CONTEXT_FLAG_DEBUG_BIT)) - ctx->Const.AllowMappedBuffersDuringExecution = true; - - /* GL_ARB_get_program_binary */ - ctx->Const.NumProgramBinaryFormats = 1; -} - -static void -brw_initialize_cs_context_constants(struct brw_context *brw) -{ - struct gl_context *ctx = &brw->ctx; - struct intel_device_info *devinfo = &brw->screen->devinfo; - - /* Maximum number of scalar compute shader invocations that can be run in - * parallel in the same subslice assuming SIMD32 dispatch. - */ - const unsigned max_threads = devinfo->max_cs_workgroup_threads; - const uint32_t max_invocations = 32 * max_threads; - ctx->Const.MaxComputeWorkGroupSize[0] = max_invocations; - ctx->Const.MaxComputeWorkGroupSize[1] = max_invocations; - ctx->Const.MaxComputeWorkGroupSize[2] = max_invocations; - ctx->Const.MaxComputeWorkGroupInvocations = max_invocations; - ctx->Const.MaxComputeSharedMemorySize = 64 * 1024; - - /* Constants used for ARB_compute_variable_group_size. */ - if (devinfo->ver >= 7) { - assert(max_invocations >= 512); - ctx->Const.MaxComputeVariableGroupSize[0] = max_invocations; - ctx->Const.MaxComputeVariableGroupSize[1] = max_invocations; - ctx->Const.MaxComputeVariableGroupSize[2] = max_invocations; - ctx->Const.MaxComputeVariableGroupInvocations = max_invocations; - } -} - -/** - * Process driconf (drirc) options, setting appropriate context flags. - * - * brw_init_extensions still pokes at optionCache directly, in order to - * avoid advertising various extensions. No flags are set, so it makes - * sense to continue doing that there. - */ -static void -brw_process_driconf_options(struct brw_context *brw) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - struct gl_context *ctx = &brw->ctx; - const driOptionCache *const options = &brw->screen->optionCache; - - if (INTEL_DEBUG(DEBUG_NO_HIZ)) { - brw->has_hiz = false; - /* On gfx6, you can only do separate stencil with HIZ. */ - if (devinfo->ver == 6) - brw->has_separate_stencil = false; - } - - if (driQueryOptionb(options, "mesa_no_error")) - ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_NO_ERROR_BIT_KHR; - - if (driQueryOptionb(options, "always_flush_batch")) { - fprintf(stderr, "flushing batchbuffer before/after each draw call\n"); - brw->always_flush_batch = true; - } - - if (driQueryOptionb(options, "always_flush_cache")) { - fprintf(stderr, "flushing GPU caches before/after each draw call\n"); - brw->always_flush_cache = true; - } - - if (driQueryOptionb(options, "disable_throttling")) { - fprintf(stderr, "disabling flush throttling\n"); - brw->disable_throttling = true; - } - - brw->precompile = driQueryOptionb(&brw->screen->optionCache, "shader_precompile"); - - if (driQueryOptionb(&brw->screen->optionCache, "precise_trig")) - brw->screen->compiler->precise_trig = true; - - ctx->Const.ForceGLSLExtensionsWarn = - driQueryOptionb(options, "force_glsl_extensions_warn"); - - ctx->Const.ForceGLSLVersion = - driQueryOptioni(options, "force_glsl_version"); - - ctx->Const.DisableGLSLLineContinuations = - driQueryOptionb(options, "disable_glsl_line_continuations"); - - ctx->Const.AllowGLSLExtensionDirectiveMidShader = - driQueryOptionb(options, "allow_glsl_extension_directive_midshader"); - - ctx->Const.AllowGLSLBuiltinVariableRedeclaration = - driQueryOptionb(options, "allow_glsl_builtin_variable_redeclaration"); - - ctx->Const.AllowHigherCompatVersion = - driQueryOptionb(options, "allow_higher_compat_version"); - - ctx->Const.ForceGLSLAbsSqrt = - driQueryOptionb(options, "force_glsl_abs_sqrt"); - - ctx->Const.GLSLZeroInit = driQueryOptionb(options, "glsl_zero_init") ? 1 : 0; - - brw->dual_color_blend_by_location = - driQueryOptionb(options, "dual_color_blend_by_location"); - - ctx->Const.AllowGLSLCrossStageInterpolationMismatch = - driQueryOptionb(options, "allow_glsl_cross_stage_interpolation_mismatch"); - - char *vendor_str = driQueryOptionstr(options, "force_gl_vendor"); - /* not an empty string */ - if (*vendor_str) - ctx->Const.VendorOverride = vendor_str; - - ctx->Const.dri_config_options_sha1 = - ralloc_array(brw->mem_ctx, unsigned char, 20); - driComputeOptionsSha1(&brw->screen->optionCache, - ctx->Const.dri_config_options_sha1); -} - -GLboolean -brw_create_context(gl_api api, - const struct gl_config *mesaVis, - __DRIcontext *driContextPriv, - const struct __DriverContextConfig *ctx_config, - unsigned *dri_ctx_error, - void *sharedContextPrivate) -{ - struct gl_context *shareCtx = (struct gl_context *) sharedContextPrivate; - struct brw_screen *screen = driContextPriv->driScreenPriv->driverPrivate; - const struct intel_device_info *devinfo = &screen->devinfo; - struct dd_function_table functions; - - /* Only allow the __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS flag if the kernel - * provides us with context reset notifications. - */ - uint32_t allowed_flags = __DRI_CTX_FLAG_DEBUG | - __DRI_CTX_FLAG_FORWARD_COMPATIBLE | - __DRI_CTX_FLAG_NO_ERROR; - - if (screen->has_context_reset_notification) - allowed_flags |= __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS; - - if (ctx_config->flags & ~allowed_flags) { - *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_FLAG; - return false; - } - - if (ctx_config->attribute_mask & - ~(__DRIVER_CONTEXT_ATTRIB_RESET_STRATEGY | - __DRIVER_CONTEXT_ATTRIB_PRIORITY)) { - *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_ATTRIBUTE; - return false; - } - - bool notify_reset = - ((ctx_config->attribute_mask & __DRIVER_CONTEXT_ATTRIB_RESET_STRATEGY) && - ctx_config->reset_strategy != __DRI_CTX_RESET_NO_NOTIFICATION); - - struct brw_context *brw = align_calloc(sizeof(struct brw_context), 16); - if (!brw) { - fprintf(stderr, "%s: failed to alloc context\n", __func__); - *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY; - return false; - } - brw->mem_ctx = ralloc_context(NULL); - brw->perf_ctx = intel_perf_new_context(brw->mem_ctx); - - driContextPriv->driverPrivate = brw; - brw->driContext = driContextPriv; - brw->screen = screen; - brw->bufmgr = screen->bufmgr; - - brw->has_hiz = devinfo->has_hiz_and_separate_stencil; - brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil; - - /* We don't push UBOs on IVB and earlier because the restrictions on - * 3DSTATE_CONSTANT_* make it really annoying to use push constants - * without dynamic state base address. - */ - brw->can_push_ubos = devinfo->verx10 >= 75; - - brw->isl_dev = screen->isl_dev; - - brw->vs.base.stage = MESA_SHADER_VERTEX; - brw->tcs.base.stage = MESA_SHADER_TESS_CTRL; - brw->tes.base.stage = MESA_SHADER_TESS_EVAL; - brw->gs.base.stage = MESA_SHADER_GEOMETRY; - brw->wm.base.stage = MESA_SHADER_FRAGMENT; - brw->cs.base.stage = MESA_SHADER_COMPUTE; - - brw_init_driver_functions(brw, &functions); - - if (notify_reset) - functions.GetGraphicsResetStatus = brw_get_graphics_reset_status; - - brw_process_driconf_options(brw); - - if (api == API_OPENGL_CORE && - driQueryOptionb(&screen->optionCache, "force_compat_profile")) { - api = API_OPENGL_COMPAT; - } - - struct gl_context *ctx = &brw->ctx; - - if (!_mesa_initialize_context(ctx, api, mesaVis, shareCtx, &functions)) { - *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY; - fprintf(stderr, "%s: failed to init mesa context\n", __func__); - brw_destroy_context(driContextPriv); - return false; - } - - driContextSetFlags(ctx, ctx_config->flags); - - /* Initialize the software rasterizer and helper modules. - * - * As of GL 3.1 core, the gfx4+ driver doesn't need the swrast context for - * software fallbacks (which we have to support on legacy GL to do weird - * glDrawPixels(), glBitmap(), and other functions). - */ - if (api != API_OPENGL_CORE && api != API_OPENGLES2) { - _swrast_CreateContext(ctx); - } - - _vbo_CreateContext(ctx, true); - if (ctx->swrast_context) { - _tnl_CreateContext(ctx); - TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline; - _swsetup_CreateContext(ctx); - - /* Configure swrast to match hardware characteristics: */ - _swrast_allow_pixel_fog(ctx, false); - _swrast_allow_vertex_fog(ctx, true); - } - - _mesa_meta_init(ctx); - - if (INTEL_DEBUG(DEBUG_PERF)) - brw->perf_debug = true; - - brw_initialize_cs_context_constants(brw); - brw_initialize_context_constants(brw); - - ctx->Const.ResetStrategy = notify_reset - ? GL_LOSE_CONTEXT_ON_RESET_ARB : GL_NO_RESET_NOTIFICATION_ARB; - - /* Reinitialize the context point state. It depends on ctx->Const values. */ - _mesa_init_point(ctx); - - brw_fbo_init(brw); - - brw_batch_init(brw); - - /* Create a new hardware context. Using a hardware context means that - * our GPU state will be saved/restored on context switch, allowing us - * to assume that the GPU is in the same state we left it in. - * - * This is required for transform feedback buffer offsets, query objects, - * and also allows us to reduce how much state we have to emit. - */ - brw->hw_ctx = brw_create_hw_context(brw->bufmgr); - if (!brw->hw_ctx && devinfo->ver >= 6) { - fprintf(stderr, "Failed to create hardware context.\n"); - brw_destroy_context(driContextPriv); - return false; - } - - if (brw->hw_ctx) { - int hw_priority = INTEL_CONTEXT_MEDIUM_PRIORITY; - if (ctx_config->attribute_mask & __DRIVER_CONTEXT_ATTRIB_PRIORITY) { - switch (ctx_config->priority) { - case __DRI_CTX_PRIORITY_LOW: - hw_priority = INTEL_CONTEXT_LOW_PRIORITY; - break; - case __DRI_CTX_PRIORITY_HIGH: - hw_priority = INTEL_CONTEXT_HIGH_PRIORITY; - break; - } - } - if (hw_priority != I915_CONTEXT_DEFAULT_PRIORITY && - brw_hw_context_set_priority(brw->bufmgr, brw->hw_ctx, hw_priority)) { - fprintf(stderr, - "Failed to set priority [%d:%d] for hardware context.\n", - ctx_config->priority, hw_priority); - brw_destroy_context(driContextPriv); - return false; - } - } - - if (brw_init_pipe_control(brw, devinfo)) { - *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY; - brw_destroy_context(driContextPriv); - return false; - } - - brw_upload_init(&brw->upload, brw->bufmgr, 65536); - - brw_init_state(brw); - - brw_init_extensions(ctx); - - brw_init_surface_formats(brw); - - brw_blorp_init(brw); - - brw->urb.size = devinfo->urb.size; - - if (devinfo->ver == 6) - brw->urb.gs_present = false; - - brw->prim_restart.in_progress = false; - brw->prim_restart.enable_cut_index = false; - brw->gs.enabled = false; - brw->clip.viewport_count = 1; - - brw->predicate.state = BRW_PREDICATE_STATE_RENDER; - - brw->max_gtt_map_object_size = screen->max_gtt_map_object_size; - - ctx->VertexProgram._MaintainTnlProgram = true; - ctx->FragmentProgram._MaintainTexEnvProgram = true; - _mesa_reset_vertex_processing_mode(ctx); - - brw_draw_init( brw ); - - if ((ctx_config->flags & __DRI_CTX_FLAG_DEBUG) != 0) { - /* Turn on some extra GL_ARB_debug_output generation. */ - brw->perf_debug = true; - } - - if ((ctx_config->flags & __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS) != 0) { - ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB; - ctx->Const.RobustAccess = GL_TRUE; - } - - if (INTEL_DEBUG(DEBUG_SHADER_TIME)) - brw_init_shader_time(brw); - - _mesa_override_extensions(ctx); - _mesa_compute_version(ctx); - -#ifndef NDEBUG - /* Enforce that the version of the context that was created is at least as - * high as the version that was advertised via GLX / EGL / whatever window - * system. - */ - const __DRIscreen *const dri_screen = brw->screen->driScrnPriv; - - switch (api) { - case API_OPENGL_COMPAT: - assert(ctx->Version >= dri_screen->max_gl_compat_version); - break; - case API_OPENGLES: - assert(ctx->Version >= dri_screen->max_gl_es1_version); - break; - case API_OPENGLES2: - assert(ctx->Version >= dri_screen->max_gl_es2_version); - break; - case API_OPENGL_CORE: - assert(ctx->Version >= dri_screen->max_gl_core_version); - break; - } -#endif - - /* GL_ARB_gl_spirv */ - if (ctx->Extensions.ARB_gl_spirv) { - brw_initialize_spirv_supported_capabilities(brw); - - if (ctx->Extensions.ARB_spirv_extensions) { - /* GL_ARB_spirv_extensions */ - ctx->Const.SpirVExtensions = MALLOC_STRUCT(spirv_supported_extensions); - _mesa_fill_supported_spirv_extensions(ctx->Const.SpirVExtensions, - &ctx->Const.SpirVCapabilities); - } - } - - _mesa_initialize_dispatch_tables(ctx); - _mesa_initialize_vbo_vtxfmt(ctx); - - if (ctx->Extensions.INTEL_performance_query) - brw_init_performance_queries(brw); - - brw->ctx.Cache = brw->screen->disk_cache; - - if (driContextPriv->driScreenPriv->dri2.backgroundCallable && - driQueryOptionb(&screen->optionCache, "mesa_glthread")) { - /* Loader supports multithreading, and so do we. */ - _mesa_glthread_init(ctx); - } - - return true; -} - -void -brw_destroy_context(__DRIcontext *driContextPriv) -{ - struct brw_context *brw = - (struct brw_context *) driContextPriv->driverPrivate; - struct gl_context *ctx = &brw->ctx; - - GET_CURRENT_CONTEXT(curctx); - - if (curctx == NULL) { - /* No current context, but we need one to release - * renderbuffer surface when we release framebuffer. - * So temporarily bind the context. - */ - _mesa_make_current(ctx, NULL, NULL); - } - - _mesa_glthread_destroy(&brw->ctx); - - _mesa_meta_free(&brw->ctx); - - if (INTEL_DEBUG(DEBUG_SHADER_TIME)) { - /* Force a report. */ - brw->shader_time.report_time = 0; - - brw_collect_and_report_shader_time(brw); - brw_destroy_shader_time(brw); - } - - blorp_finish(&brw->blorp); - - brw_destroy_state(brw); - brw_draw_destroy(brw); - - brw_bo_unreference(brw->curbe.curbe_bo); - - brw_bo_unreference(brw->vs.base.scratch_bo); - brw_bo_unreference(brw->tcs.base.scratch_bo); - brw_bo_unreference(brw->tes.base.scratch_bo); - brw_bo_unreference(brw->gs.base.scratch_bo); - brw_bo_unreference(brw->wm.base.scratch_bo); - - brw_bo_unreference(brw->vs.base.push_const_bo); - brw_bo_unreference(brw->tcs.base.push_const_bo); - brw_bo_unreference(brw->tes.base.push_const_bo); - brw_bo_unreference(brw->gs.base.push_const_bo); - brw_bo_unreference(brw->wm.base.push_const_bo); - - brw_destroy_hw_context(brw->bufmgr, brw->hw_ctx); - - if (ctx->swrast_context) { - _swsetup_DestroyContext(&brw->ctx); - _tnl_DestroyContext(&brw->ctx); - } - _vbo_DestroyContext(&brw->ctx); - - if (ctx->swrast_context) - _swrast_DestroyContext(&brw->ctx); - - brw_fini_pipe_control(brw); - brw_batch_free(&brw->batch); - - brw_bo_unreference(brw->throttle_batch[1]); - brw_bo_unreference(brw->throttle_batch[0]); - brw->throttle_batch[1] = NULL; - brw->throttle_batch[0] = NULL; - - /* free the Mesa context */ - _mesa_free_context_data(&brw->ctx, true); - - ralloc_free(brw->mem_ctx); - align_free(brw); - driContextPriv->driverPrivate = NULL; -} - -GLboolean -brw_unbind_context(__DRIcontext *driContextPriv) -{ - struct gl_context *ctx = driContextPriv->driverPrivate; - _mesa_glthread_finish(ctx); - - /* Unset current context and dispath table */ - _mesa_make_current(NULL, NULL, NULL); - - return true; -} - -/** - * Fixes up the context for GLES23 with our default-to-sRGB-capable behavior - * on window system framebuffers. - * - * Desktop GL is fairly reasonable in its handling of sRGB: You can ask if - * your renderbuffer can do sRGB encode, and you can flip a switch that does - * sRGB encode if the renderbuffer can handle it. You can ask specifically - * for a visual where you're guaranteed to be capable, but it turns out that - * everyone just makes all their ARGB8888 visuals capable and doesn't offer - * incapable ones, because there's no difference between the two in resources - * used. Applications thus get built that accidentally rely on the default - * visual choice being sRGB, so we make ours sRGB capable. Everything sounds - * great... - * - * But for GLES2/3, they decided that it was silly to not turn on sRGB encode - * for sRGB renderbuffers you made with the GL_EXT_texture_sRGB equivalent. - * So they removed the enable knob and made it "if the renderbuffer is sRGB - * capable, do sRGB encode". Then, for your window system renderbuffers, you - * can ask for sRGB visuals and get sRGB encode, or not ask for sRGB visuals - * and get no sRGB encode (assuming that both kinds of visual are available). - * Thus our choice to support sRGB by default on our visuals for desktop would - * result in broken rendering of GLES apps that aren't expecting sRGB encode. - * - * Unfortunately, renderbuffer setup happens before a context is created. So - * in brw_screen.c we always set up sRGB, and here, if you're a GLES2/3 - * context (without an sRGB visual), we go turn that back off before anyone - * finds out. - */ -static void -brw_gles3_srgb_workaround(struct brw_context *brw, struct gl_framebuffer *fb) -{ - struct gl_context *ctx = &brw->ctx; - - if (_mesa_is_desktop_gl(ctx) || !fb->Visual.sRGBCapable) - return; - - for (int i = 0; i < BUFFER_COUNT; i++) { - struct gl_renderbuffer *rb = fb->Attachment[i].Renderbuffer; - - /* Check if sRGB was specifically asked for. */ - struct brw_renderbuffer *irb = brw_get_renderbuffer(fb, i); - if (irb && irb->need_srgb) - return; - - if (rb) - rb->Format = _mesa_get_srgb_format_linear(rb->Format); - } - /* Disable sRGB from framebuffers that are not compatible. */ - fb->Visual.sRGBCapable = false; -} - -GLboolean -brw_make_current(__DRIcontext *driContextPriv, - __DRIdrawable *driDrawPriv, - __DRIdrawable *driReadPriv) -{ - struct brw_context *brw; - - if (driContextPriv) - brw = (struct brw_context *) driContextPriv->driverPrivate; - else - brw = NULL; - - if (driContextPriv) { - struct gl_context *ctx = &brw->ctx; - struct gl_framebuffer *fb, *readFb; - - if (driDrawPriv == NULL) { - fb = _mesa_get_incomplete_framebuffer(); - } else { - fb = driDrawPriv->driverPrivate; - driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1; - } - - if (driReadPriv == NULL) { - readFb = _mesa_get_incomplete_framebuffer(); - } else { - readFb = driReadPriv->driverPrivate; - driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1; - } - - /* The sRGB workaround changes the renderbuffer's format. We must change - * the format before the renderbuffer's miptree get's allocated, otherwise - * the formats of the renderbuffer and its miptree will differ. - */ - brw_gles3_srgb_workaround(brw, fb); - brw_gles3_srgb_workaround(brw, readFb); - - /* If the context viewport hasn't been initialized, force a call out to - * the loader to get buffers so we have a drawable size for the initial - * viewport. */ - if (!brw->ctx.ViewportInitialized) - brw_prepare_render(brw); - - _mesa_make_current(ctx, fb, readFb); - } else { - GET_CURRENT_CONTEXT(ctx); - _mesa_glthread_finish(ctx); - _mesa_make_current(NULL, NULL, NULL); - } - - return true; -} - -void -brw_resolve_for_dri2_flush(struct brw_context *brw, - __DRIdrawable *drawable) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - if (devinfo->ver < 6) { - /* MSAA and fast color clear are not supported, so don't waste time - * checking whether a resolve is needed. - */ - return; - } - - struct gl_framebuffer *fb = drawable->driverPrivate; - struct brw_renderbuffer *rb; - - /* Usually, only the back buffer will need to be downsampled. However, - * the front buffer will also need it if the user has rendered into it. - */ - static const gl_buffer_index buffers[2] = { - BUFFER_BACK_LEFT, - BUFFER_FRONT_LEFT, - }; - - for (int i = 0; i < 2; ++i) { - rb = brw_get_renderbuffer(fb, buffers[i]); - if (rb == NULL || rb->mt == NULL) - continue; - if (rb->mt->surf.samples == 1) { - assert(rb->mt_layer == 0 && rb->mt_level == 0 && - rb->layer_count == 1); - brw_miptree_prepare_external(brw, rb->mt); - } else { - brw_renderbuffer_downsample(brw, rb); - - /* Call prepare_external on the single-sample miptree to do any - * needed resolves prior to handing it off to the window system. - * This is needed in the case that rb->singlesample_mt is Y-tiled - * with CCS_E enabled but without I915_FORMAT_MOD_Y_TILED_CCS_E. In - * this case, the MSAA resolve above will write compressed data into - * rb->singlesample_mt. - * - * TODO: Some day, if we decide to care about the tiny performance - * hit we're taking by doing the MSAA resolve and then a CCS resolve, - * we could detect this case and just allocate the single-sampled - * miptree without aux. However, that would be a lot of plumbing and - * this is a rather exotic case so it's not really worth it. - */ - brw_miptree_prepare_external(brw, rb->singlesample_mt); - } - } -} - -static unsigned -brw_bits_per_pixel(const struct brw_renderbuffer *rb) -{ - return _mesa_get_format_bytes(brw_rb_format(rb)) * 8; -} - -static void -brw_query_dri2_buffers(struct brw_context *brw, - __DRIdrawable *drawable, - __DRIbuffer **buffers, - int *count); - -static void -brw_process_dri2_buffer(struct brw_context *brw, - __DRIdrawable *drawable, - __DRIbuffer *buffer, - struct brw_renderbuffer *rb, - const char *buffer_name); - -static void -brw_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable); - -static void -brw_update_dri2_buffers(struct brw_context *brw, __DRIdrawable *drawable) -{ - struct gl_framebuffer *fb = drawable->driverPrivate; - struct brw_renderbuffer *rb; - __DRIbuffer *buffers = NULL; - int count; - const char *region_name; - - /* Set this up front, so that in case our buffers get invalidated - * while we're getting new buffers, we don't clobber the stamp and - * thus ignore the invalidate. */ - drawable->lastStamp = drawable->dri2.stamp; - - if (INTEL_DEBUG(DEBUG_DRI)) - fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable); - - brw_query_dri2_buffers(brw, drawable, &buffers, &count); - - if (buffers == NULL) - return; - - for (int i = 0; i < count; i++) { - switch (buffers[i].attachment) { - case __DRI_BUFFER_FRONT_LEFT: - rb = brw_get_renderbuffer(fb, BUFFER_FRONT_LEFT); - region_name = "dri2 front buffer"; - break; - - case __DRI_BUFFER_FAKE_FRONT_LEFT: - rb = brw_get_renderbuffer(fb, BUFFER_FRONT_LEFT); - region_name = "dri2 fake front buffer"; - break; - - case __DRI_BUFFER_BACK_LEFT: - rb = brw_get_renderbuffer(fb, BUFFER_BACK_LEFT); - region_name = "dri2 back buffer"; - break; - - case __DRI_BUFFER_DEPTH: - case __DRI_BUFFER_HIZ: - case __DRI_BUFFER_DEPTH_STENCIL: - case __DRI_BUFFER_STENCIL: - case __DRI_BUFFER_ACCUM: - default: - fprintf(stderr, - "unhandled buffer attach event, attachment type %d\n", - buffers[i].attachment); - return; - } - - brw_process_dri2_buffer(brw, drawable, &buffers[i], rb, region_name); - } - -} - -void -brw_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable) -{ - struct brw_context *brw = context->driverPrivate; - __DRIscreen *dri_screen = brw->screen->driScrnPriv; - - /* Set this up front, so that in case our buffers get invalidated - * while we're getting new buffers, we don't clobber the stamp and - * thus ignore the invalidate. */ - drawable->lastStamp = drawable->dri2.stamp; - - if (INTEL_DEBUG(DEBUG_DRI)) - fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable); - - if (dri_screen->image.loader) - brw_update_image_buffers(brw, drawable); - else - brw_update_dri2_buffers(brw, drawable); - - driUpdateFramebufferSize(&brw->ctx, drawable); -} - -/** - * intel_prepare_render should be called anywhere that curent read/drawbuffer - * state is required. - */ -void -brw_prepare_render(struct brw_context *brw) -{ - struct gl_context *ctx = &brw->ctx; - __DRIcontext *driContext = brw->driContext; - __DRIdrawable *drawable; - - drawable = driContext->driDrawablePriv; - if (drawable && drawable->dri2.stamp != driContext->dri2.draw_stamp) { - if (drawable->lastStamp != drawable->dri2.stamp) - brw_update_renderbuffers(driContext, drawable); - driContext->dri2.draw_stamp = drawable->dri2.stamp; - } - - drawable = driContext->driReadablePriv; - if (drawable && drawable->dri2.stamp != driContext->dri2.read_stamp) { - if (drawable->lastStamp != drawable->dri2.stamp) - brw_update_renderbuffers(driContext, drawable); - driContext->dri2.read_stamp = drawable->dri2.stamp; - } - - /* If we're currently rendering to the front buffer, the rendering - * that will happen next will probably dirty the front buffer. So - * mark it as dirty here. - */ - if (_mesa_is_front_buffer_drawing(ctx->DrawBuffer) && - ctx->DrawBuffer != _mesa_get_incomplete_framebuffer()) { - brw->front_buffer_dirty = true; - } - - if (brw->is_shared_buffer_bound) { - /* Subsequent rendering will probably dirty the shared buffer. */ - brw->is_shared_buffer_dirty = true; - } -} - -/** - * \brief Query DRI2 to obtain a DRIdrawable's buffers. - * - * To determine which DRI buffers to request, examine the renderbuffers - * attached to the drawable's framebuffer. Then request the buffers with - * DRI2GetBuffers() or DRI2GetBuffersWithFormat(). - * - * This is called from brw_update_renderbuffers(). - * - * \param drawable Drawable whose buffers are queried. - * \param buffers [out] List of buffers returned by DRI2 query. - * \param buffer_count [out] Number of buffers returned. - * - * \see brw_update_renderbuffers() - * \see DRI2GetBuffers() - * \see DRI2GetBuffersWithFormat() - */ -static void -brw_query_dri2_buffers(struct brw_context *brw, - __DRIdrawable *drawable, - __DRIbuffer **buffers, - int *buffer_count) -{ - __DRIscreen *dri_screen = brw->screen->driScrnPriv; - struct gl_framebuffer *fb = drawable->driverPrivate; - int i = 0; - unsigned attachments[__DRI_BUFFER_COUNT]; - - struct brw_renderbuffer *front_rb; - struct brw_renderbuffer *back_rb; - - front_rb = brw_get_renderbuffer(fb, BUFFER_FRONT_LEFT); - back_rb = brw_get_renderbuffer(fb, BUFFER_BACK_LEFT); - - memset(attachments, 0, sizeof(attachments)); - if ((_mesa_is_front_buffer_drawing(fb) || - _mesa_is_front_buffer_reading(fb) || - !back_rb) && front_rb) { - /* If a fake front buffer is in use, then querying for - * __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from - * the real front buffer to the fake front buffer. So before doing the - * query, we need to make sure all the pending drawing has landed in the - * real front buffer. - */ - brw_batch_flush(brw); - brw_flush_front(&brw->ctx); - - attachments[i++] = __DRI_BUFFER_FRONT_LEFT; - attachments[i++] = brw_bits_per_pixel(front_rb); - } else if (front_rb && brw->front_buffer_dirty) { - /* We have pending front buffer rendering, but we aren't querying for a - * front buffer. If the front buffer we have is a fake front buffer, - * the X server is going to throw it away when it processes the query. - * So before doing the query, make sure all the pending drawing has - * landed in the real front buffer. - */ - brw_batch_flush(brw); - brw_flush_front(&brw->ctx); - } - - if (back_rb) { - attachments[i++] = __DRI_BUFFER_BACK_LEFT; - attachments[i++] = brw_bits_per_pixel(back_rb); - } - - assert(i <= ARRAY_SIZE(attachments)); - - *buffers = - dri_screen->dri2.loader->getBuffersWithFormat(drawable, - &drawable->w, - &drawable->h, - attachments, i / 2, - buffer_count, - drawable->loaderPrivate); -} - -/** - * \brief Assign a DRI buffer's DRM region to a renderbuffer. - * - * This is called from brw_update_renderbuffers(). - * - * \par Note: - * DRI buffers whose attachment point is DRI2BufferStencil or - * DRI2BufferDepthStencil are handled as special cases. - * - * \param buffer_name is a human readable name, such as "dri2 front buffer", - * that is passed to brw_bo_gem_create_from_name(). - * - * \see brw_update_renderbuffers() - */ -static void -brw_process_dri2_buffer(struct brw_context *brw, - __DRIdrawable *drawable, - __DRIbuffer *buffer, - struct brw_renderbuffer *rb, - const char *buffer_name) -{ - struct gl_framebuffer *fb = drawable->driverPrivate; - struct brw_bo *bo; - - if (!rb) - return; - - unsigned num_samples = rb->Base.Base.NumSamples; - - /* We try to avoid closing and reopening the same BO name, because the first - * use of a mapping of the buffer involves a bunch of page faulting which is - * moderately expensive. - */ - struct brw_mipmap_tree *last_mt; - if (num_samples == 0) - last_mt = rb->mt; - else - last_mt = rb->singlesample_mt; - - uint32_t old_name = 0; - if (last_mt) { - /* The bo already has a name because the miptree was created by a - * previous call to brw_process_dri2_buffer(). If a bo already has a - * name, then brw_bo_flink() is a low-cost getter. It does not - * create a new name. - */ - brw_bo_flink(last_mt->bo, &old_name); - } - - if (old_name == buffer->name) - return; - - if (INTEL_DEBUG(DEBUG_DRI)) { - fprintf(stderr, - "attaching buffer %d, at %d, cpp %d, pitch %d\n", - buffer->name, buffer->attachment, - buffer->cpp, buffer->pitch); - } - - bo = brw_bo_gem_create_from_name(brw->bufmgr, buffer_name, - buffer->name); - if (!bo) { - fprintf(stderr, - "Failed to open BO for returned DRI2 buffer " - "(%dx%d, %s, named %d).\n" - "This is likely a bug in the X Server that will lead to a " - "crash soon.\n", - drawable->w, drawable->h, buffer_name, buffer->name); - return; - } - - uint32_t tiling, swizzle; - brw_bo_get_tiling(bo, &tiling, &swizzle); - - struct brw_mipmap_tree *mt = - brw_miptree_create_for_bo(brw, - bo, - brw_rb_format(rb), - 0, - drawable->w, - drawable->h, - 1, - buffer->pitch, - isl_tiling_from_i915_tiling(tiling), - MIPTREE_CREATE_DEFAULT); - if (!mt) { - brw_bo_unreference(bo); - return; - } - - /* We got this BO from X11. We cana't assume that we have coherent texture - * access because X may suddenly decide to use it for scan-out which would - * destroy coherency. - */ - bo->cache_coherent = false; - - if (!brw_update_winsys_renderbuffer_miptree(brw, rb, mt, - drawable->w, drawable->h, - buffer->pitch)) { - brw_bo_unreference(bo); - brw_miptree_release(&mt); - return; - } - - if (_mesa_is_front_buffer_drawing(fb) && - (buffer->attachment == __DRI_BUFFER_FRONT_LEFT || - buffer->attachment == __DRI_BUFFER_FAKE_FRONT_LEFT) && - rb->Base.Base.NumSamples > 1) { - brw_renderbuffer_upsample(brw, rb); - } - - assert(rb->mt); - - brw_bo_unreference(bo); -} - -/** - * \brief Query DRI image loader to obtain a DRIdrawable's buffers. - * - * To determine which DRI buffers to request, examine the renderbuffers - * attached to the drawable's framebuffer. Then request the buffers from - * the image loader - * - * This is called from brw_update_renderbuffers(). - * - * \param drawable Drawable whose buffers are queried. - * \param buffers [out] List of buffers returned by DRI2 query. - * \param buffer_count [out] Number of buffers returned. - * - * \see brw_update_renderbuffers() - */ - -static void -brw_update_image_buffer(struct brw_context *intel, - __DRIdrawable *drawable, - struct brw_renderbuffer *rb, - __DRIimage *buffer, - enum __DRIimageBufferMask buffer_type) -{ - struct gl_framebuffer *fb = drawable->driverPrivate; - - if (!rb || !buffer->bo) - return; - - unsigned num_samples = rb->Base.Base.NumSamples; - - /* Check and see if we're already bound to the right - * buffer object - */ - struct brw_mipmap_tree *last_mt; - if (num_samples == 0) - last_mt = rb->mt; - else - last_mt = rb->singlesample_mt; - - if (last_mt && last_mt->bo == buffer->bo) { - if (buffer_type == __DRI_IMAGE_BUFFER_SHARED) { - brw_miptree_make_shareable(intel, last_mt); - } - return; - } - - /* Only allow internal compression if samples == 0. For multisampled - * window system buffers, the only thing the single-sampled buffer is used - * for is as a resolve target. If we do any compression beyond what is - * supported by the window system, we will just have to resolve so it's - * probably better to just not bother. - */ - const bool allow_internal_aux = (num_samples == 0); - - struct brw_mipmap_tree *mt = - brw_miptree_create_for_dri_image(intel, buffer, GL_TEXTURE_2D, - brw_rb_format(rb), - allow_internal_aux); - if (!mt) - return; - - if (!brw_update_winsys_renderbuffer_miptree(intel, rb, mt, - buffer->width, buffer->height, - buffer->pitch)) { - brw_miptree_release(&mt); - return; - } - - if (_mesa_is_front_buffer_drawing(fb) && - buffer_type == __DRI_IMAGE_BUFFER_FRONT && - rb->Base.Base.NumSamples > 1) { - brw_renderbuffer_upsample(intel, rb); - } - - if (buffer_type == __DRI_IMAGE_BUFFER_SHARED) { - /* The compositor and the application may access this image - * concurrently. The display hardware may even scanout the image while - * the GPU is rendering to it. Aux surfaces cause difficulty with - * concurrent access, so permanently disable aux for this miptree. - * - * Perhaps we could improve overall application performance by - * re-enabling the aux surface when EGL_RENDER_BUFFER transitions to - * EGL_BACK_BUFFER, then disabling it again when EGL_RENDER_BUFFER - * returns to EGL_SINGLE_BUFFER. I expect the wins and losses with this - * approach to be highly dependent on the application's GL usage. - * - * I [chadv] expect clever disabling/reenabling to be counterproductive - * in the use cases I care about: applications that render nearly - * realtime handwriting to the surface while possibly undergiong - * simultaneously scanout as a display plane. The app requires low - * render latency. Even though the app spends most of its time in - * shared-buffer mode, it also frequently transitions between - * shared-buffer (EGL_SINGLE_BUFFER) and double-buffer (EGL_BACK_BUFFER) - * mode. Visual sutter during the transitions should be avoided. - * - * In this case, I [chadv] believe reducing the GPU workload at - * shared-buffer/double-buffer transitions would offer a smoother app - * experience than any savings due to aux compression. But I've - * collected no data to prove my theory. - */ - brw_miptree_make_shareable(intel, mt); - } -} - -static void -brw_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable) -{ - struct gl_framebuffer *fb = drawable->driverPrivate; - __DRIscreen *dri_screen = brw->screen->driScrnPriv; - struct brw_renderbuffer *front_rb; - struct brw_renderbuffer *back_rb; - struct __DRIimageList images; - mesa_format format; - uint32_t buffer_mask = 0; - int ret; - - front_rb = brw_get_renderbuffer(fb, BUFFER_FRONT_LEFT); - back_rb = brw_get_renderbuffer(fb, BUFFER_BACK_LEFT); - - if (back_rb) - format = brw_rb_format(back_rb); - else if (front_rb) - format = brw_rb_format(front_rb); - else - return; - - if (front_rb && (_mesa_is_front_buffer_drawing(fb) || - _mesa_is_front_buffer_reading(fb) || !back_rb)) { - buffer_mask |= __DRI_IMAGE_BUFFER_FRONT; - } - - if (back_rb) - buffer_mask |= __DRI_IMAGE_BUFFER_BACK; - - ret = dri_screen->image.loader->getBuffers(drawable, - driGLFormatToImageFormat(format), - &drawable->dri2.stamp, - drawable->loaderPrivate, - buffer_mask, - &images); - if (!ret) - return; - - if (images.image_mask & __DRI_IMAGE_BUFFER_FRONT) { - drawable->w = images.front->width; - drawable->h = images.front->height; - brw_update_image_buffer(brw, drawable, front_rb, images.front, - __DRI_IMAGE_BUFFER_FRONT); - } - - if (images.image_mask & __DRI_IMAGE_BUFFER_BACK) { - drawable->w = images.back->width; - drawable->h = images.back->height; - brw_update_image_buffer(brw, drawable, back_rb, images.back, - __DRI_IMAGE_BUFFER_BACK); - } - - if (images.image_mask & __DRI_IMAGE_BUFFER_SHARED) { - assert(images.image_mask == __DRI_IMAGE_BUFFER_SHARED); - drawable->w = images.back->width; - drawable->h = images.back->height; - brw_update_image_buffer(brw, drawable, back_rb, images.back, - __DRI_IMAGE_BUFFER_SHARED); - brw->is_shared_buffer_bound = true; - } else { - brw->is_shared_buffer_bound = false; - brw->is_shared_buffer_dirty = false; - } -} diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h deleted file mode 100644 index 2061fb2..0000000 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ /dev/null @@ -1,1637 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - - -#ifndef BRWCONTEXT_INC -#define BRWCONTEXT_INC - -#include -#include "main/macros.h" -#include "main/mtypes.h" -#include "main/errors.h" -#include "brw_structs.h" -#include "brw_pipe_control.h" -#include "compiler/brw_compiler.h" - -#include "isl/isl.h" -#include "blorp/blorp.h" - -#include - -#include "dev/intel_debug.h" -#include "common/intel_decoder.h" -#include "brw_screen.h" -#include "brw_tex_obj.h" -#include "perf/intel_perf.h" -#include "perf/intel_perf_query.h" - -#ifdef __cplusplus -extern "C" { -#endif -/* Glossary: - * - * URB - uniform resource buffer. A mid-sized buffer which is - * partitioned between the fixed function units and used for passing - * values (vertices, primitives, constants) between them. - * - * CURBE - constant URB entry. An urb region (entry) used to hold - * constant values which the fixed function units can be instructed to - * preload into the GRF when spawning a thread. - * - * VUE - vertex URB entry. An urb entry holding a vertex and usually - * a vertex header. The header contains control information and - * things like primitive type, Begin/end flags and clip codes. - * - * PUE - primitive URB entry. An urb entry produced by the setup (SF) - * unit holding rasterization and interpolation parameters. - * - * GRF - general register file. One of several register files - * addressable by programmed threads. The inputs (r0, payload, curbe, - * urb) of the thread are preloaded to this area before the thread is - * spawned. The registers are individually 8 dwords wide and suitable - * for general usage. Registers holding thread input values are not - * special and may be overwritten. - * - * MRF - message register file. Threads communicate (and terminate) - * by sending messages. Message parameters are placed in contiguous - * MRF registers. All program output is via these messages. URB - * entries are populated by sending a message to the shared URB - * function containing the new data, together with a control word, - * often an unmodified copy of R0. - * - * R0 - GRF register 0. Typically holds control information used when - * sending messages to other threads. - * - * EU or GFX4 EU: The name of the programmable subsystem of the - * i965 hardware. Threads are executed by the EU, the registers - * described above are part of the EU architecture. - * - * Fixed function units: - * - * CS - Command streamer. Notional first unit, little software - * interaction. Holds the URB entries used for constant data, ie the - * CURBEs. - * - * VF/VS - Vertex Fetch / Vertex Shader. The fixed function part of - * this unit is responsible for pulling vertices out of vertex buffers - * in vram and injecting them into the processing pipe as VUEs. If - * enabled, it first passes them to a VS thread which is a good place - * for the driver to implement any active vertex shader. - * - * HS - Hull Shader (Tessellation Control Shader) - * - * TE - Tessellation Engine (Tessellation Primitive Generation) - * - * DS - Domain Shader (Tessellation Evaluation Shader) - * - * GS - Geometry Shader. This corresponds to a new DX10 concept. If - * enabled, incoming strips etc are passed to GS threads in individual - * line/triangle/point units. The GS thread may perform arbitary - * computation and emit whatever primtives with whatever vertices it - * chooses. This makes GS an excellent place to implement GL's - * unfilled polygon modes, though of course it is capable of much - * more. Additionally, GS is used to translate away primitives not - * handled by latter units, including Quads and Lineloops. - * - * CS - Clipper. Mesa's clipping algorithms are imported to run on - * this unit. The fixed function part performs cliptesting against - * the 6 fixed clipplanes and makes descisions on whether or not the - * incoming primitive needs to be passed to a thread for clipping. - * User clip planes are handled via cooperation with the VS thread. - * - * SF - Strips Fans or Setup: Triangles are prepared for - * rasterization. Interpolation coefficients are calculated. - * Flatshading and two-side lighting usually performed here. - * - * WM - Windower. Interpolation of vertex attributes performed here. - * Fragment shader implemented here. SIMD aspects of EU taken full - * advantage of, as pixels are processed in blocks of 16. - * - * CC - Color Calculator. No EU threads associated with this unit. - * Handles blending and (presumably) depth and stencil testing. - */ - -struct brw_context; -struct brw_inst; -struct brw_vs_prog_key; -struct brw_vue_prog_key; -struct brw_wm_prog_key; -struct brw_wm_prog_data; -struct brw_cs_prog_key; -struct brw_cs_prog_data; -struct brw_label; - -enum brw_pipeline { - BRW_RENDER_PIPELINE, - BRW_COMPUTE_PIPELINE, - - BRW_NUM_PIPELINES -}; - -enum brw_cache_id { - BRW_CACHE_FS_PROG, - BRW_CACHE_BLORP_PROG, - BRW_CACHE_SF_PROG, - BRW_CACHE_VS_PROG, - BRW_CACHE_FF_GS_PROG, - BRW_CACHE_GS_PROG, - BRW_CACHE_TCS_PROG, - BRW_CACHE_TES_PROG, - BRW_CACHE_CLIP_PROG, - BRW_CACHE_CS_PROG, - - BRW_MAX_CACHE -}; - -enum gfx9_astc5x5_wa_tex_type { - GFX9_ASTC5X5_WA_TEX_TYPE_ASTC5x5 = 1 << 0, - GFX9_ASTC5X5_WA_TEX_TYPE_AUX = 1 << 1, -}; - -enum brw_state_id { - /* brw_cache_ids must come first - see brw_program_cache.c */ - BRW_STATE_URB_FENCE = BRW_MAX_CACHE, - BRW_STATE_FRAGMENT_PROGRAM, - BRW_STATE_GEOMETRY_PROGRAM, - BRW_STATE_TESS_PROGRAMS, - BRW_STATE_VERTEX_PROGRAM, - BRW_STATE_REDUCED_PRIMITIVE, - BRW_STATE_PATCH_PRIMITIVE, - BRW_STATE_PRIMITIVE, - BRW_STATE_CONTEXT, - BRW_STATE_PSP, - BRW_STATE_SURFACES, - BRW_STATE_BINDING_TABLE_POINTERS, - BRW_STATE_INDICES, - BRW_STATE_VERTICES, - BRW_STATE_DEFAULT_TESS_LEVELS, - BRW_STATE_BATCH, - BRW_STATE_INDEX_BUFFER, - BRW_STATE_VS_CONSTBUF, - BRW_STATE_TCS_CONSTBUF, - BRW_STATE_TES_CONSTBUF, - BRW_STATE_GS_CONSTBUF, - BRW_STATE_PROGRAM_CACHE, - BRW_STATE_STATE_BASE_ADDRESS, - BRW_STATE_VUE_MAP_GEOM_OUT, - BRW_STATE_TRANSFORM_FEEDBACK, - BRW_STATE_RASTERIZER_DISCARD, - BRW_STATE_STATS_WM, - BRW_STATE_UNIFORM_BUFFER, - BRW_STATE_IMAGE_UNITS, - BRW_STATE_META_IN_PROGRESS, - BRW_STATE_PUSH_CONSTANT_ALLOCATION, - BRW_STATE_NUM_SAMPLES, - BRW_STATE_TEXTURE_BUFFER, - BRW_STATE_GFX4_UNIT_STATE, - BRW_STATE_CC_VP, - BRW_STATE_SF_VP, - BRW_STATE_CLIP_VP, - BRW_STATE_SAMPLER_STATE_TABLE, - BRW_STATE_VS_ATTRIB_WORKAROUNDS, - BRW_STATE_COMPUTE_PROGRAM, - BRW_STATE_CS_WORK_GROUPS, - BRW_STATE_URB_SIZE, - BRW_STATE_CC_STATE, - BRW_STATE_BLORP, - BRW_STATE_VIEWPORT_COUNT, - BRW_STATE_CONSERVATIVE_RASTERIZATION, - BRW_STATE_DRAW_CALL, - BRW_STATE_AUX, - BRW_NUM_STATE_BITS -}; - -/** - * BRW_NEW_*_PROG_DATA and BRW_NEW_*_PROGRAM are similar, but distinct. - * - * BRW_NEW_*_PROGRAM relates to the gl_shader_program/gl_program structures. - * When the currently bound shader program differs from the previous draw - * call, these will be flagged. They cover brw->{stage}_program and - * ctx->{Stage}Program->_Current. - * - * BRW_NEW_*_PROG_DATA is flagged when the effective shaders change, from a - * driver perspective. Even if the same shader is bound at the API level, - * we may need to switch between multiple versions of that shader to handle - * changes in non-orthagonal state. - * - * Additionally, multiple shader programs may have identical vertex shaders - * (for example), or compile down to the same code in the backend. We combine - * those into a single program cache entry. - * - * BRW_NEW_*_PROG_DATA occurs when switching program cache entries, which - * covers the brw_*_prog_data structures, and brw->*.prog_offset. - */ -#define BRW_NEW_FS_PROG_DATA (1ull << BRW_CACHE_FS_PROG) -/* XXX: The BRW_NEW_BLORP_BLIT_PROG_DATA dirty bit is unused (as BLORP doesn't - * use the normal state upload paths), but the cache is still used. To avoid - * polluting the brw_program_cache code with special cases, we retain the - * dirty bit for now. It should eventually be removed. - */ -#define BRW_NEW_BLORP_BLIT_PROG_DATA (1ull << BRW_CACHE_BLORP_PROG) -#define BRW_NEW_SF_PROG_DATA (1ull << BRW_CACHE_SF_PROG) -#define BRW_NEW_VS_PROG_DATA (1ull << BRW_CACHE_VS_PROG) -#define BRW_NEW_FF_GS_PROG_DATA (1ull << BRW_CACHE_FF_GS_PROG) -#define BRW_NEW_GS_PROG_DATA (1ull << BRW_CACHE_GS_PROG) -#define BRW_NEW_TCS_PROG_DATA (1ull << BRW_CACHE_TCS_PROG) -#define BRW_NEW_TES_PROG_DATA (1ull << BRW_CACHE_TES_PROG) -#define BRW_NEW_CLIP_PROG_DATA (1ull << BRW_CACHE_CLIP_PROG) -#define BRW_NEW_CS_PROG_DATA (1ull << BRW_CACHE_CS_PROG) -#define BRW_NEW_URB_FENCE (1ull << BRW_STATE_URB_FENCE) -#define BRW_NEW_FRAGMENT_PROGRAM (1ull << BRW_STATE_FRAGMENT_PROGRAM) -#define BRW_NEW_GEOMETRY_PROGRAM (1ull << BRW_STATE_GEOMETRY_PROGRAM) -#define BRW_NEW_TESS_PROGRAMS (1ull << BRW_STATE_TESS_PROGRAMS) -#define BRW_NEW_VERTEX_PROGRAM (1ull << BRW_STATE_VERTEX_PROGRAM) -#define BRW_NEW_REDUCED_PRIMITIVE (1ull << BRW_STATE_REDUCED_PRIMITIVE) -#define BRW_NEW_PATCH_PRIMITIVE (1ull << BRW_STATE_PATCH_PRIMITIVE) -#define BRW_NEW_PRIMITIVE (1ull << BRW_STATE_PRIMITIVE) -#define BRW_NEW_CONTEXT (1ull << BRW_STATE_CONTEXT) -#define BRW_NEW_PSP (1ull << BRW_STATE_PSP) -#define BRW_NEW_SURFACES (1ull << BRW_STATE_SURFACES) -#define BRW_NEW_BINDING_TABLE_POINTERS (1ull << BRW_STATE_BINDING_TABLE_POINTERS) -#define BRW_NEW_INDICES (1ull << BRW_STATE_INDICES) -#define BRW_NEW_VERTICES (1ull << BRW_STATE_VERTICES) -#define BRW_NEW_DEFAULT_TESS_LEVELS (1ull << BRW_STATE_DEFAULT_TESS_LEVELS) -/** - * Used for any batch entry with a relocated pointer that will be used - * by any 3D rendering. - */ -#define BRW_NEW_BATCH (1ull << BRW_STATE_BATCH) -/** \see brw.state.depth_region */ -#define BRW_NEW_INDEX_BUFFER (1ull << BRW_STATE_INDEX_BUFFER) -#define BRW_NEW_VS_CONSTBUF (1ull << BRW_STATE_VS_CONSTBUF) -#define BRW_NEW_TCS_CONSTBUF (1ull << BRW_STATE_TCS_CONSTBUF) -#define BRW_NEW_TES_CONSTBUF (1ull << BRW_STATE_TES_CONSTBUF) -#define BRW_NEW_GS_CONSTBUF (1ull << BRW_STATE_GS_CONSTBUF) -#define BRW_NEW_PROGRAM_CACHE (1ull << BRW_STATE_PROGRAM_CACHE) -#define BRW_NEW_STATE_BASE_ADDRESS (1ull << BRW_STATE_STATE_BASE_ADDRESS) -#define BRW_NEW_VUE_MAP_GEOM_OUT (1ull << BRW_STATE_VUE_MAP_GEOM_OUT) -#define BRW_NEW_VIEWPORT_COUNT (1ull << BRW_STATE_VIEWPORT_COUNT) -#define BRW_NEW_TRANSFORM_FEEDBACK (1ull << BRW_STATE_TRANSFORM_FEEDBACK) -#define BRW_NEW_RASTERIZER_DISCARD (1ull << BRW_STATE_RASTERIZER_DISCARD) -#define BRW_NEW_STATS_WM (1ull << BRW_STATE_STATS_WM) -#define BRW_NEW_UNIFORM_BUFFER (1ull << BRW_STATE_UNIFORM_BUFFER) -#define BRW_NEW_IMAGE_UNITS (1ull << BRW_STATE_IMAGE_UNITS) -#define BRW_NEW_META_IN_PROGRESS (1ull << BRW_STATE_META_IN_PROGRESS) -#define BRW_NEW_PUSH_CONSTANT_ALLOCATION (1ull << BRW_STATE_PUSH_CONSTANT_ALLOCATION) -#define BRW_NEW_NUM_SAMPLES (1ull << BRW_STATE_NUM_SAMPLES) -#define BRW_NEW_TEXTURE_BUFFER (1ull << BRW_STATE_TEXTURE_BUFFER) -#define BRW_NEW_GFX4_UNIT_STATE (1ull << BRW_STATE_GFX4_UNIT_STATE) -#define BRW_NEW_CC_VP (1ull << BRW_STATE_CC_VP) -#define BRW_NEW_SF_VP (1ull << BRW_STATE_SF_VP) -#define BRW_NEW_CLIP_VP (1ull << BRW_STATE_CLIP_VP) -#define BRW_NEW_SAMPLER_STATE_TABLE (1ull << BRW_STATE_SAMPLER_STATE_TABLE) -#define BRW_NEW_VS_ATTRIB_WORKAROUNDS (1ull << BRW_STATE_VS_ATTRIB_WORKAROUNDS) -#define BRW_NEW_COMPUTE_PROGRAM (1ull << BRW_STATE_COMPUTE_PROGRAM) -#define BRW_NEW_CS_WORK_GROUPS (1ull << BRW_STATE_CS_WORK_GROUPS) -#define BRW_NEW_URB_SIZE (1ull << BRW_STATE_URB_SIZE) -#define BRW_NEW_CC_STATE (1ull << BRW_STATE_CC_STATE) -#define BRW_NEW_BLORP (1ull << BRW_STATE_BLORP) -#define BRW_NEW_CONSERVATIVE_RASTERIZATION (1ull << BRW_STATE_CONSERVATIVE_RASTERIZATION) -#define BRW_NEW_DRAW_CALL (1ull << BRW_STATE_DRAW_CALL) -#define BRW_NEW_AUX_STATE (1ull << BRW_STATE_AUX) - -struct brw_state_flags { - /** State update flags signalled by mesa internals */ - GLuint mesa; - /** - * State update flags signalled as the result of brw_tracked_state updates - */ - uint64_t brw; -}; - - -/** Subclass of Mesa program */ -struct brw_program { - struct gl_program program; - GLuint id; - - bool compiled_once; -}; - -/** Number of texture sampler units */ -#define BRW_MAX_TEX_UNIT 32 - -/** Max number of UBOs in a shader */ -#define BRW_MAX_UBO 14 - -/** Max number of SSBOs in a shader */ -#define BRW_MAX_SSBO 12 - -/** Max number of atomic counter buffer objects in a shader */ -#define BRW_MAX_ABO 16 - -/** Max number of image uniforms in a shader */ -#define BRW_MAX_IMAGES 32 - -/** Maximum number of actual buffers used for stream output */ -#define BRW_MAX_SOL_BUFFERS 4 - -#define BRW_MAX_SURFACES (BRW_MAX_DRAW_BUFFERS + \ - BRW_MAX_TEX_UNIT * 2 + /* normal, gather */ \ - BRW_MAX_UBO + \ - BRW_MAX_SSBO + \ - BRW_MAX_ABO + \ - BRW_MAX_IMAGES + \ - 2 + /* shader time, pull constants */ \ - 1 /* cs num work groups */) - -struct brw_cache { - struct brw_context *brw; - - struct brw_cache_item **items; - struct brw_bo *bo; - void *map; - GLuint size, n_items; - - uint32_t next_offset; -}; - -#define perf_debug(...) do { \ - static GLuint msg_id = 0; \ - if (INTEL_DEBUG(DEBUG_PERF)) \ - dbg_printf(__VA_ARGS__); \ - if (brw->perf_debug) \ - _mesa_gl_debugf(&brw->ctx, &msg_id, \ - MESA_DEBUG_SOURCE_API, \ - MESA_DEBUG_TYPE_PERFORMANCE, \ - MESA_DEBUG_SEVERITY_MEDIUM, \ - __VA_ARGS__); \ -} while(0) - -#define WARN_ONCE(cond, fmt...) do { \ - if (unlikely(cond)) { \ - static bool _warned = false; \ - static GLuint msg_id = 0; \ - if (!_warned) { \ - fprintf(stderr, "WARNING: "); \ - fprintf(stderr, fmt); \ - _warned = true; \ - \ - _mesa_gl_debugf(ctx, &msg_id, \ - MESA_DEBUG_SOURCE_API, \ - MESA_DEBUG_TYPE_OTHER, \ - MESA_DEBUG_SEVERITY_HIGH, fmt); \ - } \ - } \ -} while (0) - -/* Considered adding a member to this struct to document which flags - * an update might raise so that ordering of the state atoms can be - * checked or derived at runtime. Dropped the idea in favor of having - * a debug mode where the state is monitored for flags which are - * raised that have already been tested against. - */ -struct brw_tracked_state { - struct brw_state_flags dirty; - void (*emit)( struct brw_context *brw ); -}; - -enum shader_time_shader_type { - ST_NONE, - ST_VS, - ST_TCS, - ST_TES, - ST_GS, - ST_FS8, - ST_FS16, - ST_FS32, - ST_CS, -}; - -struct brw_vertex_buffer { - /** Buffer object containing the uploaded vertex data */ - struct brw_bo *bo; - uint32_t offset; - uint32_t size; - /** Byte stride between elements in the uploaded array */ - GLuint stride; - GLuint step_rate; -}; -struct brw_vertex_element { - const struct gl_vertex_format *glformat; - - int buffer; - bool is_dual_slot; - /** Offset of the first element within the buffer object */ - unsigned int offset; -}; - -struct brw_query_object { - struct gl_query_object Base; - - /** Last query BO associated with this query. */ - struct brw_bo *bo; - - /** Last index in bo with query data for this object. */ - int last_index; - - /** True if we know the batch has been flushed since we ended the query. */ - bool flushed; -}; - -struct brw_reloc_list { - struct drm_i915_gem_relocation_entry *relocs; - int reloc_count; - int reloc_array_size; -}; - -struct brw_growing_bo { - struct brw_bo *bo; - uint32_t *map; - struct brw_bo *partial_bo; - uint32_t *partial_bo_map; - unsigned partial_bytes; - enum brw_memory_zone memzone; -}; - -struct brw_batch { - /** Current batchbuffer being queued up. */ - struct brw_growing_bo batch; - /** Current statebuffer being queued up. */ - struct brw_growing_bo state; - - /** Last batchbuffer submitted to the hardware. Used for glFinish(). */ - struct brw_bo *last_bo; - -#ifdef DEBUG - uint16_t emit, total; -#endif - uint32_t *map_next; - uint32_t state_used; - - bool use_shadow_copy; - bool use_batch_first; - bool needs_sol_reset; - bool state_base_address_emitted; - bool no_wrap; - bool contains_fence_signal; - - struct brw_reloc_list batch_relocs; - struct brw_reloc_list state_relocs; - unsigned int valid_reloc_flags; - - /** The validation list */ - struct drm_i915_gem_exec_object2 *validation_list; - struct brw_bo **exec_bos; - int exec_count; - int exec_array_size; - - /** The amount of aperture space (in bytes) used by all exec_bos */ - uint64_t aperture_space; - - struct { - uint32_t *map_next; - int batch_reloc_count; - int state_reloc_count; - int exec_count; - } saved; - - /** Map from batch offset to brw_state_batch data (with DEBUG_BATCH) */ - struct hash_table_u64 *state_batch_sizes; - - struct intel_batch_decode_ctx decoder; - - /** A list of drm_i915_exec_fences to have execbuf signal or wait on */ - struct util_dynarray exec_fences; -}; - -#define BRW_MAX_XFB_STREAMS 4 - -struct brw_transform_feedback_counter { - /** - * Index of the first entry of this counter within the primitive count BO. - * An entry is considered to be an N-tuple of 64bit values, where N is the - * number of vertex streams supported by the platform. - */ - unsigned bo_start; - - /** - * Index one past the last entry of this counter within the primitive - * count BO. - */ - unsigned bo_end; - - /** - * Primitive count values accumulated while this counter was active, - * excluding any entries buffered between \c bo_start and \c bo_end, which - * haven't been accounted for yet. - */ - uint64_t accum[BRW_MAX_XFB_STREAMS]; -}; - -static inline void -brw_reset_transform_feedback_counter( - struct brw_transform_feedback_counter *counter) -{ - counter->bo_start = counter->bo_end; - memset(&counter->accum, 0, sizeof(counter->accum)); -} - -struct brw_transform_feedback_object { - struct gl_transform_feedback_object base; - - /** A buffer to hold SO_WRITE_OFFSET(n) values while paused. */ - struct brw_bo *offset_bo; - - /** If true, SO_WRITE_OFFSET(n) should be reset to zero at next use. */ - bool zero_offsets; - - /** The most recent primitive mode (GL_TRIANGLES/GL_POINTS/GL_LINES). */ - GLenum primitive_mode; - - /** - * The maximum number of vertices that we can write without overflowing - * any of the buffers currently being used for transform feedback. - */ - unsigned max_index; - - struct brw_bo *prim_count_bo; - - /** - * Count of primitives generated during this transform feedback operation. - */ - struct brw_transform_feedback_counter counter; - - /** - * Count of primitives generated during the previous transform feedback - * operation. Used to implement DrawTransformFeedback(). - */ - struct brw_transform_feedback_counter previous_counter; - - /** - * Number of vertices written between last Begin/EndTransformFeedback(). - * - * Used to implement DrawTransformFeedback(). - */ - uint64_t vertices_written[BRW_MAX_XFB_STREAMS]; - bool vertices_written_valid; -}; - -/** - * Data shared between each programmable stage in the pipeline (vs, gs, and - * wm). - */ -struct brw_stage_state -{ - gl_shader_stage stage; - struct brw_stage_prog_data *prog_data; - - /** - * Optional scratch buffer used to store spilled register values and - * variably-indexed GRF arrays. - * - * The contents of this buffer are short-lived so the same memory can be - * re-used at will for multiple shader programs (executed by the same fixed - * function). However reusing a scratch BO for which shader invocations - * are still in flight with a per-thread scratch slot size other than the - * original can cause threads with different scratch slot size and FFTID - * (which may be executed in parallel depending on the shader stage and - * hardware generation) to map to an overlapping region of the scratch - * space, which can potentially lead to mutual scratch space corruption. - * For that reason if you borrow this scratch buffer you should only be - * using the slot size given by the \c per_thread_scratch member below, - * unless you're taking additional measures to synchronize thread execution - * across slot size changes. - */ - struct brw_bo *scratch_bo; - - /** - * Scratch slot size allocated for each thread in the buffer object given - * by \c scratch_bo. - */ - uint32_t per_thread_scratch; - - /** Offset in the program cache to the program */ - uint32_t prog_offset; - - /** Offset in the batchbuffer to Gfx4-5 pipelined state (VS/WM/GS_STATE). */ - uint32_t state_offset; - - struct brw_bo *push_const_bo; /* NULL if using the batchbuffer */ - uint32_t push_const_offset; /* Offset in the push constant BO or batch */ - int push_const_size; /* in 256-bit register increments */ - - /* Binding table: pointers to SURFACE_STATE entries. */ - uint32_t bind_bo_offset; - uint32_t surf_offset[BRW_MAX_SURFACES]; - - /** SAMPLER_STATE count and table offset */ - uint32_t sampler_count; - uint32_t sampler_offset; - - struct brw_image_param image_param[BRW_MAX_IMAGES]; - - /** Need to re-emit 3DSTATE_CONSTANT_XS? */ - bool push_constants_dirty; -}; - -enum brw_predicate_state { - /* The first two states are used if we can determine whether to draw - * without having to look at the values in the query object buffer. This - * will happen if there is no conditional render in progress, if the query - * object is already completed or if something else has already added - * samples to the preliminary result such as via a BLT command. - */ - BRW_PREDICATE_STATE_RENDER, - BRW_PREDICATE_STATE_DONT_RENDER, - /* In this case whether to draw or not depends on the result of an - * MI_PREDICATE command so the predicate enable bit needs to be checked. - */ - BRW_PREDICATE_STATE_USE_BIT, - /* In this case, either MI_PREDICATE doesn't exist or we lack the - * necessary kernel features to use it. Stall for the query result. - */ - BRW_PREDICATE_STATE_STALL_FOR_QUERY, -}; - -struct shader_times; - -struct intel_l3_config; -struct intel_perf; - -struct brw_uploader { - struct brw_bufmgr *bufmgr; - struct brw_bo *bo; - void *map; - uint32_t next_offset; - unsigned default_size; -}; - -/** - * brw_context is derived from gl_context. - */ -struct brw_context -{ - struct gl_context ctx; /**< base class, must be first field */ - - struct - { - /** - * Emit an MI_REPORT_PERF_COUNT command packet. - * - * This asks the GPU to write a report of the current OA counter values - * into @bo at the given offset and containing the given @report_id - * which we can cross-reference when parsing the report (gfx7+ only). - */ - void (*emit_mi_report_perf_count)(struct brw_context *brw, - struct brw_bo *bo, - uint32_t offset_in_bytes, - uint32_t report_id); - - void (*emit_compute_walker)(struct brw_context *brw); - void (*emit_raw_pipe_control)(struct brw_context *brw, uint32_t flags, - struct brw_bo *bo, uint32_t offset, - uint64_t imm); - void (*emit_state_base_address)(struct brw_context *brw); - } vtbl; - - struct brw_bufmgr *bufmgr; - - uint32_t hw_ctx; - - /** - * BO for post-sync nonzero writes for gfx6 workaround. - * - * This buffer also contains a marker + description of the driver. This - * buffer is added to all execbufs syscalls so that we can identify the - * driver that generated a hang by looking at the content of the buffer in - * the error state. - * - * Read/write should go at workaround_bo_offset in that buffer to avoid - * overriding the debug data. - */ - struct brw_bo *workaround_bo; - uint32_t workaround_bo_offset; - uint8_t pipe_controls_since_last_cs_stall; - - /** - * Set of struct brw_bo * that have been rendered to within this batchbuffer - * and would need flushing before being used from another cache domain that - * isn't coherent with it (i.e. the sampler). - */ - struct hash_table *render_cache; - - /** - * Set of struct brw_bo * that have been used as a depth buffer within this - * batchbuffer and would need flushing before being used from another cache - * domain that isn't coherent with it (i.e. the sampler). - */ - struct set *depth_cache; - - /** - * Number of resets observed in the system at context creation. - * - * This is tracked in the context so that we can determine that another - * reset has occurred. - */ - uint32_t reset_count; - - struct brw_batch batch; - - struct brw_uploader upload; - - /** - * Set if rendering has occurred to the drawable's front buffer. - * - * This is used in the DRI2 case to detect that glFlush should also copy - * the contents of the fake front buffer to the real front buffer. - */ - bool front_buffer_dirty; - - /** - * True if the __DRIdrawable's current __DRIimageBufferMask is - * __DRI_IMAGE_BUFFER_SHARED. - */ - bool is_shared_buffer_bound; - - /** - * True if a shared buffer is bound and it has received any rendering since - * the previous __DRImutableRenderBufferLoaderExtension::displaySharedBuffer(). - */ - bool is_shared_buffer_dirty; - - /** Framerate throttling: @{ */ - struct brw_bo *throttle_batch[2]; - - /* Limit the number of outstanding SwapBuffers by waiting for an earlier - * frame of rendering to complete. This gives a very precise cap to the - * latency between input and output such that rendering never gets more - * than a frame behind the user. (With the caveat that we technically are - * not using the SwapBuffers itself as a barrier but the first batch - * submitted afterwards, which may be immediately prior to the next - * SwapBuffers.) - */ - bool need_swap_throttle; - - /** General throttling, not caught by throttling between SwapBuffers */ - bool need_flush_throttle; - /** @} */ - - GLuint stats_wm; - - /** - * drirc options: - * @{ - */ - bool always_flush_batch; - bool always_flush_cache; - bool disable_throttling; - bool precompile; - bool dual_color_blend_by_location; - /** @} */ - - GLuint primitive; /**< Hardware primitive, such as _3DPRIM_TRILIST. */ - - bool object_preemption; /**< Object level preemption enabled. */ - - GLenum reduced_primitive; - - /** - * Set if we're either a debug context or the INTEL_DEBUG=perf environment - * variable is set, this is the flag indicating to do expensive work that - * might lead to a perf_debug() call. - */ - bool perf_debug; - - uint64_t max_gtt_map_object_size; - - bool has_hiz; - bool has_separate_stencil; - - bool can_push_ubos; - - /** Derived stencil states. */ - bool stencil_enabled; - bool stencil_two_sided; - bool stencil_write_enabled; - /** Derived polygon state. */ - bool polygon_front_bit; /**< 0=GL_CCW, 1=GL_CW */ - - struct isl_device isl_dev; - - struct blorp_context blorp; - - GLuint NewGLState; - struct { - struct brw_state_flags pipelines[BRW_NUM_PIPELINES]; - } state; - - enum brw_pipeline last_pipeline; - - struct brw_cache cache; - - /* Whether a meta-operation is in progress. */ - bool meta_in_progress; - - /* Whether the last depth/stencil packets were both NULL. */ - bool no_depth_or_stencil; - - /* The last PMA stall bits programmed. */ - uint32_t pma_stall_bits; - - /* Whether INTEL_black_render is active. */ - bool frontend_noop; - - struct { - struct { - /** - * Either the value of gl_BaseVertex for indexed draw calls or the - * value of the argument for non-indexed draw calls for the - * current _mesa_prim. - */ - int firstvertex; - - /** The value of gl_BaseInstance for the current _mesa_prim. */ - int gl_baseinstance; - } params; - - /** - * Buffer and offset used for GL_ARB_shader_draw_parameters which will - * point to the indirect buffer for indirect draw calls. - */ - struct brw_bo *draw_params_bo; - uint32_t draw_params_offset; - - struct { - /** - * The value of gl_DrawID for the current _mesa_prim. This always comes - * in from it's own vertex buffer since it's not part of the indirect - * draw parameters. - */ - int gl_drawid; - - /** - * Stores if the current _mesa_prim is an indexed or non-indexed draw - * (~0/0). Useful to calculate gl_BaseVertex as an AND of firstvertex - * and is_indexed_draw. - */ - int is_indexed_draw; - } derived_params; - - /** - * Buffer and offset used for GL_ARB_shader_draw_parameters which contains - * parameters that are not present in the indirect buffer. They will go in - * their own vertex element. - */ - struct brw_bo *derived_draw_params_bo; - uint32_t derived_draw_params_offset; - - /** - * Pointer to the the buffer storing the indirect draw parameters. It - * currently only stores the number of requested draw calls but more - * parameters could potentially be added. - */ - struct brw_bo *draw_params_count_bo; - uint32_t draw_params_count_offset; - - /** - * Draw indirect buffer. - */ - unsigned draw_indirect_stride; - GLsizeiptr draw_indirect_offset; - struct gl_buffer_object *draw_indirect_data; - } draw; - - struct { - /** - * For gl_NumWorkGroups: If num_work_groups_bo is non NULL, then it is - * an indirect call, and num_work_groups_offset is valid. Otherwise, - * num_work_groups is set based on glDispatchCompute. - */ - struct brw_bo *num_work_groups_bo; - GLintptr num_work_groups_offset; - const GLuint *num_work_groups; - /** - * This is only used alongside ARB_compute_variable_group_size when the - * local work group size is variable, otherwise it's NULL. - */ - const GLuint *group_size; - } compute; - - struct { - struct brw_vertex_element inputs[VERT_ATTRIB_MAX]; - struct brw_vertex_buffer buffers[VERT_ATTRIB_MAX]; - - struct brw_vertex_element *enabled[VERT_ATTRIB_MAX]; - GLuint nr_enabled; - GLuint nr_buffers; - - /* Summary of size and varying of active arrays, so we can check - * for changes to this state: - */ - bool index_bounds_valid; - unsigned int min_index, max_index; - - /* Offset from start of vertex buffer so we can avoid redefining - * the same VB packed over and over again. - */ - unsigned int start_vertex_bias; - - /** - * Certain vertex attribute formats aren't natively handled by the - * hardware and require special VS code to fix up their values. - * - * These bitfields indicate which workarounds are needed. - */ - uint8_t attrib_wa_flags[VERT_ATTRIB_MAX]; - - /* High bits of the last seen vertex buffer address (for workarounds). */ - uint16_t last_bo_high_bits[33]; - } vb; - - struct { - /** - * Index buffer for this draw_prims call. - * - * Updates are signaled by BRW_NEW_INDICES. - */ - const struct _mesa_index_buffer *ib; - - /* Updates are signaled by BRW_NEW_INDEX_BUFFER. */ - struct brw_bo *bo; - uint32_t size; - unsigned index_size; - - /* Offset to index buffer index to use in CMD_3D_PRIM so that we can - * avoid re-uploading the IB packet over and over if we're actually - * referencing the same index buffer. - */ - unsigned int start_vertex_offset; - - /* High bits of the last seen index buffer address (for workarounds). */ - uint16_t last_bo_high_bits; - - /* Used to understand is GPU state of primitive restart is up to date */ - bool enable_cut_index; - } ib; - - /* Active vertex program: - */ - struct gl_program *programs[MESA_SHADER_STAGES]; - - /** - * Number of samples in ctx->DrawBuffer, updated by BRW_NEW_NUM_SAMPLES so - * that we don't have to reemit that state every time we change FBOs. - */ - unsigned int num_samples; - - /* BRW_NEW_URB_ALLOCATIONS: - */ - struct { - GLuint vsize; /* vertex size plus header in urb registers */ - GLuint gsize; /* GS output size in urb registers */ - GLuint hsize; /* Tessellation control output size in urb registers */ - GLuint dsize; /* Tessellation evaluation output size in urb registers */ - GLuint csize; /* constant buffer size in urb registers */ - GLuint sfsize; /* setup data size in urb registers */ - - bool constrained; - - GLuint nr_vs_entries; - GLuint nr_hs_entries; - GLuint nr_ds_entries; - GLuint nr_gs_entries; - GLuint nr_clip_entries; - GLuint nr_sf_entries; - GLuint nr_cs_entries; - - GLuint vs_start; - GLuint hs_start; - GLuint ds_start; - GLuint gs_start; - GLuint clip_start; - GLuint sf_start; - GLuint cs_start; - /** - * URB size in the current configuration. The units this is expressed - * in are somewhat inconsistent, see intel_device_info::urb::size. - * - * FINISHME: Represent the URB size consistently in KB on all platforms. - */ - GLuint size; - - /* True if the most recently sent _3DSTATE_URB message allocated - * URB space for the GS. - */ - bool gs_present; - - /* True if the most recently sent _3DSTATE_URB message allocated - * URB space for the HS and DS. - */ - bool tess_present; - } urb; - - - /* BRW_NEW_PUSH_CONSTANT_ALLOCATION */ - struct { - GLuint wm_start; /**< pos of first wm const in CURBE buffer */ - GLuint wm_size; /**< number of float[4] consts, multiple of 16 */ - GLuint clip_start; - GLuint clip_size; - GLuint vs_start; - GLuint vs_size; - GLuint total_size; - - /** - * Pointer to the (intel_upload.c-generated) BO containing the uniforms - * for upload to the CURBE. - */ - struct brw_bo *curbe_bo; - /** Offset within curbe_bo of space for current curbe entry */ - GLuint curbe_offset; - } curbe; - - /** - * Layout of vertex data exiting the geometry portion of the pipleine. - * This comes from the last enabled shader stage (GS, DS, or VS). - * - * BRW_NEW_VUE_MAP_GEOM_OUT is flagged when the VUE map changes. - */ - struct brw_vue_map vue_map_geom_out; - - struct { - struct brw_stage_state base; - } vs; - - struct { - struct brw_stage_state base; - } tcs; - - struct { - struct brw_stage_state base; - } tes; - - struct { - struct brw_stage_state base; - - /** - * True if the 3DSTATE_GS command most recently emitted to the 3D - * pipeline enabled the GS; false otherwise. - */ - bool enabled; - } gs; - - struct { - struct brw_ff_gs_prog_data *prog_data; - - bool prog_active; - /** Offset in the program cache to the CLIP program pre-gfx6 */ - uint32_t prog_offset; - uint32_t state_offset; - - uint32_t bind_bo_offset; - /** - * Surface offsets for the binding table. We only need surfaces to - * implement transform feedback so BRW_MAX_SOL_BINDINGS is all that we - * need in this case. - */ - uint32_t surf_offset[BRW_MAX_SOL_BINDINGS]; - } ff_gs; - - struct { - struct brw_clip_prog_data *prog_data; - - /** Offset in the program cache to the CLIP program pre-gfx6 */ - uint32_t prog_offset; - - /* Offset in the batch to the CLIP state on pre-gfx6. */ - uint32_t state_offset; - - /* As of gfx6, this is the offset in the batch to the CLIP VP, - * instead of vp_bo. - */ - uint32_t vp_offset; - - /** - * The number of viewports to use. If gl_ViewportIndex is written, - * we can have up to ctx->Const.MaxViewports viewports. If not, - * the viewport index is always 0, so we can only emit one. - */ - uint8_t viewport_count; - } clip; - - - struct { - struct brw_sf_prog_data *prog_data; - - /** Offset in the program cache to the CLIP program pre-gfx6 */ - uint32_t prog_offset; - uint32_t state_offset; - uint32_t vp_offset; - } sf; - - struct { - struct brw_stage_state base; - - /** - * Buffer object used in place of multisampled null render targets on - * Gfx6. See brw_emit_null_surface_state(). - */ - struct brw_bo *multisampled_null_render_target_bo; - - float offset_clamp; - } wm; - - struct { - struct brw_stage_state base; - } cs; - - struct { - uint32_t state_offset; - uint32_t blend_state_offset; - uint32_t depth_stencil_state_offset; - uint32_t vp_offset; - } cc; - - struct { - struct brw_query_object *obj; - bool begin_emitted; - } query; - - struct { - enum brw_predicate_state state; - bool supported; - } predicate; - - struct intel_perf_context *perf_ctx; - - int num_atoms[BRW_NUM_PIPELINES]; - const struct brw_tracked_state render_atoms[76]; - const struct brw_tracked_state compute_atoms[11]; - - const enum isl_format *mesa_to_isl_render_format; - const bool *mesa_format_supports_render; - - /* PrimitiveRestart */ - struct { - bool in_progress; - bool enable_cut_index; - unsigned restart_index; - } prim_restart; - - /** Computed depth/stencil/hiz state from the current attached - * renderbuffers, valid only during the drawing state upload loop after - * brw_workaround_depthstencil_alignment(). - */ - struct { - /* Inter-tile (page-aligned) byte offsets. */ - uint32_t depth_offset; - /* Intra-tile x,y offsets for drawing to combined depth-stencil. Only - * used for Gen < 6. - */ - uint32_t tile_x, tile_y; - } depthstencil; - - uint32_t num_instances; - int basevertex; - int baseinstance; - - struct { - const struct intel_l3_config *config; - } l3; - - struct { - struct brw_bo *bo; - const char **names; - int *ids; - enum shader_time_shader_type *types; - struct shader_times *cumulative; - int num_entries; - int max_entries; - double report_time; - } shader_time; - - struct brw_fast_clear_state *fast_clear_state; - - /* Array of aux usages to use for drawing. Aux usage for render targets is - * a bit more complex than simply calling a single function so we need some - * way of passing it form brw_draw.c to surface state setup. - */ - enum isl_aux_usage draw_aux_usage[MAX_DRAW_BUFFERS]; - - enum gfx9_astc5x5_wa_tex_type gfx9_astc5x5_wa_tex_mask; - - /** Last rendering scale argument provided to brw_emit_hashing_mode(). */ - unsigned current_hash_scale; - - __DRIcontext *driContext; - struct brw_screen *screen; - void *mem_ctx; -}; - -/* brw_clear.c */ -extern void brw_init_clear_functions(struct dd_function_table *functions); - -/*====================================================================== - * brw_context.c - */ -extern const char *const brw_vendor_string; - -extern const char * -brw_get_renderer_string(const struct brw_screen *screen); - -enum { - DRI_CONF_BO_REUSE_DISABLED, - DRI_CONF_BO_REUSE_ALL -}; - -void brw_update_renderbuffers(__DRIcontext *context, - __DRIdrawable *drawable); -void brw_prepare_render(struct brw_context *brw); - -void gfx9_apply_single_tex_astc5x5_wa(struct brw_context *brw, - mesa_format format, - enum isl_aux_usage aux_usage); - -void brw_predraw_resolve_inputs(struct brw_context *brw, bool rendering, - bool *draw_aux_buffer_disabled); - -void brw_resolve_for_dri2_flush(struct brw_context *brw, - __DRIdrawable *drawable); - -GLboolean brw_create_context(gl_api api, - const struct gl_config *mesaVis, - __DRIcontext *driContextPriv, - const struct __DriverContextConfig *ctx_config, - unsigned *error, - void *sharedContextPrivate); - -/*====================================================================== - * brw_misc_state.c - */ -void brw_workaround_depthstencil_alignment(struct brw_context *brw, - GLbitfield clear_mask); -void brw_emit_hashing_mode(struct brw_context *brw, unsigned width, - unsigned height, unsigned scale); - -/* brw_object_purgeable.c */ -void brw_init_object_purgeable_functions(struct dd_function_table *functions); - -/*====================================================================== - * brw_queryobj.c - */ -void brw_init_common_queryobj_functions(struct dd_function_table *functions); -void gfx4_init_queryobj_functions(struct dd_function_table *functions); -void brw_emit_query_begin(struct brw_context *brw); -void brw_emit_query_end(struct brw_context *brw); -void brw_query_counter(struct gl_context *ctx, struct gl_query_object *q); -bool brw_is_query_pipelined(struct brw_query_object *query); -uint64_t brw_raw_timestamp_delta(struct brw_context *brw, - uint64_t time0, uint64_t time1); - -/** gfx6_queryobj.c */ -void gfx6_init_queryobj_functions(struct dd_function_table *functions); -void brw_write_timestamp(struct brw_context *brw, struct brw_bo *bo, int idx); -void brw_write_depth_count(struct brw_context *brw, struct brw_bo *bo, int idx); - -/** hsw_queryobj.c */ -void hsw_overflow_result_to_gpr0(struct brw_context *brw, - struct brw_query_object *query, - int count); -void hsw_init_queryobj_functions(struct dd_function_table *functions); - -/** brw_conditional_render.c */ -void brw_init_conditional_render_functions(struct dd_function_table *functions); -bool brw_check_conditional_render(struct brw_context *brw); - -/** brw_batch.c */ -void brw_load_register_mem(struct brw_context *brw, - uint32_t reg, - struct brw_bo *bo, - uint32_t offset); -void brw_load_register_mem64(struct brw_context *brw, - uint32_t reg, - struct brw_bo *bo, - uint32_t offset); -void brw_store_register_mem32(struct brw_context *brw, - struct brw_bo *bo, uint32_t reg, uint32_t offset); -void brw_store_register_mem64(struct brw_context *brw, - struct brw_bo *bo, uint32_t reg, uint32_t offset); -void brw_load_register_imm32(struct brw_context *brw, - uint32_t reg, uint32_t imm); -void brw_load_register_imm64(struct brw_context *brw, - uint32_t reg, uint64_t imm); -void brw_load_register_reg(struct brw_context *brw, uint32_t dst, - uint32_t src); -void brw_load_register_reg64(struct brw_context *brw, uint32_t dst, - uint32_t src); -void brw_store_data_imm32(struct brw_context *brw, struct brw_bo *bo, - uint32_t offset, uint32_t imm); -void brw_store_data_imm64(struct brw_context *brw, struct brw_bo *bo, - uint32_t offset, uint64_t imm); - -/*====================================================================== - * intel_tex_validate.c - */ -void brw_validate_textures( struct brw_context *brw ); - - -/*====================================================================== - * brw_program.c - */ -void brw_init_frag_prog_functions(struct dd_function_table *functions); - -void brw_get_scratch_bo(struct brw_context *brw, - struct brw_bo **scratch_bo, int size); -void brw_alloc_stage_scratch(struct brw_context *brw, - struct brw_stage_state *stage_state, - unsigned per_thread_size); -void brw_init_shader_time(struct brw_context *brw); -int brw_get_shader_time_index(struct brw_context *brw, - struct gl_program *prog, - enum shader_time_shader_type type, - bool is_glsl_sh); -void brw_collect_and_report_shader_time(struct brw_context *brw); -void brw_destroy_shader_time(struct brw_context *brw); - -/* brw_urb.c - */ -void brw_calculate_urb_fence(struct brw_context *brw, unsigned csize, - unsigned vsize, unsigned sfsize); -void brw_upload_urb_fence(struct brw_context *brw); - -/* brw_curbe.c - */ -void brw_upload_cs_urb_state(struct brw_context *brw); - -/* brw_vs.c */ -gl_clip_plane *brw_select_clip_planes(struct gl_context *ctx); - -/* brw_draw_upload.c */ -unsigned brw_get_vertex_surface_type(struct brw_context *brw, - const struct gl_vertex_format *glformat); - -static inline unsigned -brw_get_index_type(unsigned index_size) -{ - /* The hw needs 0x00, 0x01, and 0x02 for ubyte, ushort, and uint, - * respectively. - */ - return index_size >> 1; -} - -void brw_prepare_vertices(struct brw_context *brw); - -/* brw_wm_surface_state.c */ -void brw_update_buffer_texture_surface(struct gl_context *ctx, - unsigned unit, - uint32_t *surf_offset); -void -brw_update_sol_surface(struct brw_context *brw, - struct gl_buffer_object *buffer_obj, - uint32_t *out_offset, unsigned num_vector_components, - unsigned stride_dwords, unsigned offset_dwords); -void brw_upload_ubo_surfaces(struct brw_context *brw, struct gl_program *prog, - struct brw_stage_state *stage_state, - struct brw_stage_prog_data *prog_data); -void brw_upload_image_surfaces(struct brw_context *brw, - const struct gl_program *prog, - struct brw_stage_state *stage_state, - struct brw_stage_prog_data *prog_data); - -/* brw_surface_formats.c */ -void brw_screen_init_surface_formats(struct brw_screen *screen); -void brw_init_surface_formats(struct brw_context *brw); -bool brw_render_target_supported(struct brw_context *brw, - struct gl_renderbuffer *rb); -uint32_t brw_depth_format(struct brw_context *brw, mesa_format format); - -/* brw_performance_query.c */ -void brw_init_performance_queries(struct brw_context *brw); - -/* intel_extensions.c */ -extern void brw_init_extensions(struct gl_context *ctx); - -/* intel_state.c */ -extern int brw_translate_shadow_compare_func(GLenum func); -extern int brw_translate_compare_func(GLenum func); -extern int brw_translate_stencil_op(GLenum op); - -/* brw_sync.c */ -void brw_init_syncobj_functions(struct dd_function_table *functions); - -/* gfx6_sol.c */ -struct gl_transform_feedback_object * -brw_new_transform_feedback(struct gl_context *ctx, GLuint name); -void -brw_delete_transform_feedback(struct gl_context *ctx, - struct gl_transform_feedback_object *obj); -void -brw_begin_transform_feedback(struct gl_context *ctx, GLenum mode, - struct gl_transform_feedback_object *obj); -void -brw_end_transform_feedback(struct gl_context *ctx, - struct gl_transform_feedback_object *obj); -void -brw_pause_transform_feedback(struct gl_context *ctx, - struct gl_transform_feedback_object *obj); -void -brw_resume_transform_feedback(struct gl_context *ctx, - struct gl_transform_feedback_object *obj); -void -brw_save_primitives_written_counters(struct brw_context *brw, - struct brw_transform_feedback_object *obj); -GLsizei -brw_get_transform_feedback_vertex_count(struct gl_context *ctx, - struct gl_transform_feedback_object *obj, - GLuint stream); - -/* gfx7_sol_state.c */ -void -gfx7_begin_transform_feedback(struct gl_context *ctx, GLenum mode, - struct gl_transform_feedback_object *obj); -void -gfx7_end_transform_feedback(struct gl_context *ctx, - struct gl_transform_feedback_object *obj); -void -gfx7_pause_transform_feedback(struct gl_context *ctx, - struct gl_transform_feedback_object *obj); -void -gfx7_resume_transform_feedback(struct gl_context *ctx, - struct gl_transform_feedback_object *obj); - -/* hsw_sol.c */ -void -hsw_begin_transform_feedback(struct gl_context *ctx, GLenum mode, - struct gl_transform_feedback_object *obj); -void -hsw_end_transform_feedback(struct gl_context *ctx, - struct gl_transform_feedback_object *obj); -void -hsw_pause_transform_feedback(struct gl_context *ctx, - struct gl_transform_feedback_object *obj); -void -hsw_resume_transform_feedback(struct gl_context *ctx, - struct gl_transform_feedback_object *obj); - -/* brw_blorp_blit.cpp */ -GLbitfield -brw_blorp_framebuffer(struct brw_context *brw, - struct gl_framebuffer *readFb, - struct gl_framebuffer *drawFb, - GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, - GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, - GLbitfield mask, GLenum filter); - -bool -brw_blorp_copytexsubimage(struct brw_context *brw, - struct gl_renderbuffer *src_rb, - struct gl_texture_image *dst_image, - int slice, - int srcX0, int srcY0, - int dstX0, int dstY0, - int width, int height); - -/* brw_generate_mipmap.c */ -void brw_generate_mipmap(struct gl_context *ctx, GLenum target, - struct gl_texture_object *tex_obj); - -void -gfx6_get_sample_position(struct gl_context *ctx, - struct gl_framebuffer *fb, - GLuint index, - GLfloat *result); - -/* gfx8_multisample_state.c */ -void gfx8_emit_3dstate_sample_pattern(struct brw_context *brw); - -/* gfx7_l3_state.c */ -void brw_emit_l3_state(struct brw_context *brw); - -/* gfx7_urb.c */ -void -gfx7_emit_push_constant_state(struct brw_context *brw, unsigned vs_size, - unsigned hs_size, unsigned ds_size, - unsigned gs_size, unsigned fs_size); - -void -gfx6_upload_urb(struct brw_context *brw, unsigned vs_size, - bool gs_present, unsigned gs_size); -void -gfx7_upload_urb(struct brw_context *brw, unsigned vs_size, - bool gs_present, bool tess_present); - -/* brw_reset.c */ -extern GLenum -brw_get_graphics_reset_status(struct gl_context *ctx); -void -brw_check_for_reset(struct brw_context *brw); - -/* brw_compute.c */ -extern void -brw_init_compute_functions(struct dd_function_table *functions); - -/* brw_program_binary.c */ -extern void -brw_program_binary_init(unsigned device_id); -extern void -brw_get_program_binary_driver_sha1(struct gl_context *ctx, uint8_t *sha1); -void brw_serialize_program_binary(struct gl_context *ctx, - struct gl_shader_program *sh_prog, - struct gl_program *prog); -extern void -brw_deserialize_program_binary(struct gl_context *ctx, - struct gl_shader_program *shProg, - struct gl_program *prog); -void -brw_program_serialize_nir(struct gl_context *ctx, struct gl_program *prog); -void -brw_program_deserialize_driver_blob(struct gl_context *ctx, - struct gl_program *prog, - gl_shader_stage stage); - -/*====================================================================== - * Inline conversion functions. These are better-typed than the - * macros used previously: - */ -static inline struct brw_context * -brw_context( struct gl_context *ctx ) -{ - return (struct brw_context *)ctx; -} - -static inline struct brw_program * -brw_program(struct gl_program *p) -{ - return (struct brw_program *) p; -} - -static inline const struct brw_program * -brw_program_const(const struct gl_program *p) -{ - return (const struct brw_program *) p; -} - -static inline bool -brw_depth_writes_enabled(const struct brw_context *brw) -{ - const struct gl_context *ctx = &brw->ctx; - - /* We consider depth writes disabled if the depth function is GL_EQUAL, - * because it would just overwrite the existing depth value with itself. - * - * These bonus depth writes not only use bandwidth, but they also can - * prevent early depth processing. For example, if the pixel shader - * discards, the hardware must invoke the to determine whether or not - * to do the depth write. If writes are disabled, we may still be able - * to do the depth test before the shader, and skip the shader execution. - * - * The Broadwell 3DSTATE_WM_DEPTH_STENCIL documentation also contains - * a programming note saying to disable depth writes for EQUAL. - */ - return ctx->Depth.Test && ctx->Depth.Mask && ctx->Depth.Func != GL_EQUAL; -} - -void -brw_emit_depthbuffer(struct brw_context *brw); - -uint32_t get_hw_prim_for_gl_prim(int mode); - -void -gfx6_upload_push_constants(struct brw_context *brw, - const struct gl_program *prog, - const struct brw_stage_prog_data *prog_data, - struct brw_stage_state *stage_state); - -bool -gfx9_use_linear_1d_layout(const struct brw_context *brw, - const struct brw_mipmap_tree *mt); - -/* brw_queryformat.c */ -void brw_query_internal_format(struct gl_context *ctx, GLenum target, - GLenum internalFormat, GLenum pname, - GLint *params); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/mesa/drivers/dri/i965/brw_copy_image.c b/src/mesa/drivers/dri/i965/brw_copy_image.c deleted file mode 100644 index 5b9f49a..0000000 --- a/src/mesa/drivers/dri/i965/brw_copy_image.c +++ /dev/null @@ -1,139 +0,0 @@ -/* - * Mesa 3-D graphics library - * - * Copyright (C) 2014 Intel Corporation All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Jason Ekstrand - */ - -#include "brw_blorp.h" -#include "brw_fbo.h" -#include "brw_tex.h" -#include "brw_blit.h" -#include "brw_mipmap_tree.h" -#include "main/formats.h" -#include "main/teximage.h" -#include "drivers/common/meta.h" - -static void -copy_miptrees(struct brw_context *brw, - struct brw_mipmap_tree *src_mt, - int src_x, int src_y, int src_z, unsigned src_level, - struct brw_mipmap_tree *dst_mt, - int dst_x, int dst_y, int dst_z, unsigned dst_level, - int src_width, int src_height) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - if (devinfo->ver <= 5) { - /* On gfx4-5, try BLT first. - * - * Gfx4-5 have a single ring for both 3D and BLT operations, so there's - * no inter-ring synchronization issues like on Gfx6+. It is apparently - * faster than using the 3D pipeline. Original Gfx4 also has to rebase - * and copy miptree slices in order to render to unaligned locations. - */ - if (brw_miptree_copy(brw, src_mt, src_level, src_z, src_x, src_y, - dst_mt, dst_level, dst_z, dst_x, dst_y, - src_width, src_height)) - return; - } - - brw_blorp_copy_miptrees(brw, - src_mt, src_level, src_z, - dst_mt, dst_level, dst_z, - src_x, src_y, dst_x, dst_y, - src_width, src_height); -} - -static void -brw_copy_image_sub_data(struct gl_context *ctx, - struct gl_texture_image *src_image, - struct gl_renderbuffer *src_renderbuffer, - int src_x, int src_y, int src_z, - struct gl_texture_image *dst_image, - struct gl_renderbuffer *dst_renderbuffer, - int dst_x, int dst_y, int dst_z, - int src_width, int src_height) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_mipmap_tree *src_mt, *dst_mt; - unsigned src_level, dst_level; - - if (src_image) { - src_mt = brw_texture_image(src_image)->mt; - src_level = src_image->Level + src_image->TexObject->Attrib.MinLevel; - - /* Cube maps actually have different images per face */ - if (src_image->TexObject->Target == GL_TEXTURE_CUBE_MAP) - src_z = src_image->Face; - - src_z += src_image->TexObject->Attrib.MinLayer; - } else { - assert(src_renderbuffer); - src_mt = brw_renderbuffer(src_renderbuffer)->mt; - src_image = src_renderbuffer->TexImage; - src_level = 0; - } - - if (dst_image) { - dst_mt = brw_texture_image(dst_image)->mt; - - dst_level = dst_image->Level + dst_image->TexObject->Attrib.MinLevel; - - /* Cube maps actually have different images per face */ - if (dst_image->TexObject->Target == GL_TEXTURE_CUBE_MAP) - dst_z = dst_image->Face; - - dst_z += dst_image->TexObject->Attrib.MinLayer; - } else { - assert(dst_renderbuffer); - dst_mt = brw_renderbuffer(dst_renderbuffer)->mt; - dst_image = dst_renderbuffer->TexImage; - dst_level = 0; - } - - copy_miptrees(brw, src_mt, src_x, src_y, src_z, src_level, - dst_mt, dst_x, dst_y, dst_z, dst_level, - src_width, src_height); - - /* CopyImage only works for equal formats, texture view equivalence - * classes, and a couple special cases for compressed textures. - * - * Notably, GL_DEPTH_STENCIL does not appear in any equivalence - * classes, so we know the formats must be the same, and thus both - * will either have stencil, or not. They can't be mismatched. - */ - assert((src_mt->stencil_mt != NULL) == (dst_mt->stencil_mt != NULL)); - - if (dst_mt->stencil_mt) { - copy_miptrees(brw, src_mt->stencil_mt, src_x, src_y, src_z, src_level, - dst_mt->stencil_mt, dst_x, dst_y, dst_z, dst_level, - src_width, src_height); - } -} - -void -brw_init_copy_image_functions(struct dd_function_table *functions) -{ - functions->CopyImageSubData = brw_copy_image_sub_data; -} diff --git a/src/mesa/drivers/dri/i965/brw_cs.c b/src/mesa/drivers/dri/i965/brw_cs.c deleted file mode 100644 index 786dda4..0000000 --- a/src/mesa/drivers/dri/i965/brw_cs.c +++ /dev/null @@ -1,220 +0,0 @@ -/* - * Copyright (c) 2014 - 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include "util/ralloc.h" -#include "brw_context.h" -#include "brw_cs.h" -#include "brw_wm.h" -#include "brw_mipmap_tree.h" -#include "brw_state.h" -#include "brw_batch.h" -#include "compiler/brw_nir.h" -#include "brw_program.h" -#include "compiler/glsl/ir_uniform.h" - -static void -assign_cs_binding_table_offsets(const struct intel_device_info *devinfo, - const struct gl_program *prog, - struct brw_cs_prog_data *prog_data) -{ - uint32_t next_binding_table_offset = 0; - - /* May not be used if the gl_NumWorkGroups variable is not accessed. */ - prog_data->binding_table.work_groups_start = next_binding_table_offset; - next_binding_table_offset++; - - brw_assign_common_binding_table_offsets(devinfo, prog, &prog_data->base, - next_binding_table_offset); -} - -static bool -brw_codegen_cs_prog(struct brw_context *brw, - struct brw_program *cp, - struct brw_cs_prog_key *key) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - const GLuint *program; - void *mem_ctx = ralloc_context(NULL); - struct brw_cs_prog_data prog_data; - bool start_busy = false; - double start_time = 0; - nir_shader *nir = nir_shader_clone(mem_ctx, cp->program.nir); - - memset(&prog_data, 0, sizeof(prog_data)); - - if (cp->program.info.shared_size > 64 * 1024) { - cp->program.sh.data->LinkStatus = LINKING_FAILURE; - const char *error_str = - "Compute shader used more than 64KB of shared variables"; - ralloc_strcat(&cp->program.sh.data->InfoLog, error_str); - _mesa_problem(NULL, "Failed to link compute shader: %s\n", error_str); - - ralloc_free(mem_ctx); - return false; - } - - assign_cs_binding_table_offsets(devinfo, &cp->program, &prog_data); - - brw_nir_setup_glsl_uniforms(mem_ctx, nir, - &cp->program, &prog_data.base, true); - - if (unlikely(brw->perf_debug)) { - start_busy = (brw->batch.last_bo && - brw_bo_busy(brw->batch.last_bo)); - start_time = get_time(); - } - - - brw_nir_lower_cs_intrinsics(nir); - - struct brw_compile_cs_params params = { - .nir = nir, - .key = key, - .prog_data = &prog_data, - .log_data = brw, - }; - - if (INTEL_DEBUG(DEBUG_SHADER_TIME)) { - params.shader_time = true; - params.shader_time_index = - brw_get_shader_time_index(brw, &cp->program, ST_CS, true); - } - - program = brw_compile_cs(brw->screen->compiler, mem_ctx, ¶ms); - if (program == NULL) { - cp->program.sh.data->LinkStatus = LINKING_FAILURE; - ralloc_strcat(&cp->program.sh.data->InfoLog, params.error_str); - _mesa_problem(NULL, "Failed to compile compute shader: %s\n", params.error_str); - - ralloc_free(mem_ctx); - return false; - } - - if (unlikely(brw->perf_debug)) { - if (cp->compiled_once) { - brw_debug_recompile(brw, MESA_SHADER_COMPUTE, cp->program.Id, - &key->base); - } - cp->compiled_once = true; - - if (start_busy && !brw_bo_busy(brw->batch.last_bo)) { - perf_debug("CS compile took %.03f ms and stalled the GPU\n", - (get_time() - start_time) * 1000); - } - } - - brw_alloc_stage_scratch(brw, &brw->cs.base, prog_data.base.total_scratch); - - /* The param and pull_param arrays will be freed by the shader cache. */ - ralloc_steal(NULL, prog_data.base.param); - ralloc_steal(NULL, prog_data.base.pull_param); - brw_upload_cache(&brw->cache, BRW_CACHE_CS_PROG, - key, sizeof(*key), - program, prog_data.base.program_size, - &prog_data, sizeof(prog_data), - &brw->cs.base.prog_offset, &brw->cs.base.prog_data); - ralloc_free(mem_ctx); - - return true; -} - - -void -brw_cs_populate_key(struct brw_context *brw, struct brw_cs_prog_key *key) -{ - struct gl_context *ctx = &brw->ctx; - /* BRW_NEW_COMPUTE_PROGRAM */ - const struct brw_program *cp = - (struct brw_program *) brw->programs[MESA_SHADER_COMPUTE]; - - memset(key, 0, sizeof(*key)); - - /* _NEW_TEXTURE */ - brw_populate_base_prog_key(ctx, cp, &key->base); -} - - -void -brw_upload_cs_prog(struct brw_context *brw) -{ - struct gl_context *ctx = &brw->ctx; - struct brw_cs_prog_key key; - struct brw_program *cp = - (struct brw_program *) brw->programs[MESA_SHADER_COMPUTE]; - - if (!cp) - return; - - if (!brw_state_dirty(brw, _NEW_TEXTURE, BRW_NEW_COMPUTE_PROGRAM)) - return; - - brw->cs.base.sampler_count = - util_last_bit(ctx->ComputeProgram._Current->SamplersUsed); - - brw_cs_populate_key(brw, &key); - - if (brw_search_cache(&brw->cache, BRW_CACHE_CS_PROG, &key, sizeof(key), - &brw->cs.base.prog_offset, &brw->cs.base.prog_data, - true)) - return; - - if (brw_disk_cache_upload_program(brw, MESA_SHADER_COMPUTE)) - return; - - cp = (struct brw_program *) brw->programs[MESA_SHADER_COMPUTE]; - cp->id = key.base.program_string_id; - - ASSERTED bool success = brw_codegen_cs_prog(brw, cp, &key); - assert(success); -} - -void -brw_cs_populate_default_key(const struct brw_compiler *compiler, - struct brw_cs_prog_key *key, - struct gl_program *prog) -{ - const struct intel_device_info *devinfo = compiler->devinfo; - memset(key, 0, sizeof(*key)); - brw_populate_default_base_prog_key(devinfo, brw_program(prog), &key->base); -} - -bool -brw_cs_precompile(struct gl_context *ctx, struct gl_program *prog) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_cs_prog_key key; - - struct brw_program *bcp = brw_program(prog); - - brw_cs_populate_default_key(brw->screen->compiler, &key, prog); - - uint32_t old_prog_offset = brw->cs.base.prog_offset; - struct brw_stage_prog_data *old_prog_data = brw->cs.base.prog_data; - - bool success = brw_codegen_cs_prog(brw, bcp, &key); - - brw->cs.base.prog_offset = old_prog_offset; - brw->cs.base.prog_data = old_prog_data; - - return success; -} diff --git a/src/mesa/drivers/dri/i965/brw_cs.h b/src/mesa/drivers/dri/i965/brw_cs.h deleted file mode 100644 index a0d43ab..0000000 --- a/src/mesa/drivers/dri/i965/brw_cs.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - - -#ifndef BRW_CS_H -#define BRW_CS_H - -#ifdef __cplusplus -extern "C" { -#endif - -void -brw_upload_cs_prog(struct brw_context *brw); - -void -brw_cs_populate_key(struct brw_context *brw, struct brw_cs_prog_key *key); -void -brw_cs_populate_default_key(const struct brw_compiler *compiler, - struct brw_cs_prog_key *key, - struct gl_program *prog); - -#ifdef __cplusplus -} -#endif - -#endif /* BRW_CS_H */ diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c deleted file mode 100644 index 87e99ef..0000000 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ /dev/null @@ -1,356 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - -/** @file brw_curbe.c - * - * Push constant handling for gfx4/5. - * - * Push constants are constant values (such as GLSL uniforms) that are - * pre-loaded into a shader stage's register space at thread spawn time. On - * gfx4 and gfx5, we create a blob in memory containing all the push constants - * for all the stages in order. At CMD_CONST_BUFFER time that blob is loaded - * into URB space as a constant URB entry (CURBE) so that it can be accessed - * quickly at thread setup time. Each individual fixed function unit's state - * (brw_vs_state.c for example) tells the hardware which subset of the CURBE - * it wants in its register space, and we calculate those areas here under the - * BRW_NEW_PUSH_CONSTANT_ALLOCATION state flag. The brw_urb.c allocation will control - * how many CURBEs can be loaded into the hardware at once before a pipeline - * stall occurs at CMD_CONST_BUFFER time. - * - * On gfx6+, constant handling becomes a much simpler set of per-unit state. - * See gfx6_upload_vec4_push_constants() in gfx6_vs_state.c for that code. - */ - - -#include "compiler/nir/nir.h" -#include "main/context.h" -#include "main/macros.h" -#include "main/enums.h" -#include "program/prog_parameter.h" -#include "program/prog_print.h" -#include "program/prog_statevars.h" -#include "util/bitscan.h" -#include "brw_batch.h" -#include "brw_buffer_objects.h" -#include "brw_context.h" -#include "brw_defines.h" -#include "brw_state.h" -#include "brw_util.h" -#include "util/u_math.h" - - -/** - * Partition the CURBE between the various users of constant values. - * - * If the users all fit within the previous allocatation, we avoid changing - * the layout because that means reuploading all unit state and uploading new - * constant buffers. - */ -static void calculate_curbe_offsets( struct brw_context *brw ) -{ - struct gl_context *ctx = &brw->ctx; - /* BRW_NEW_FS_PROG_DATA */ - const GLuint nr_fp_regs = (brw->wm.base.prog_data->nr_params + 15) / 16; - - /* BRW_NEW_VS_PROG_DATA */ - const GLuint nr_vp_regs = (brw->vs.base.prog_data->nr_params + 15) / 16; - GLuint nr_clip_regs = 0; - GLuint total_regs; - - /* _NEW_TRANSFORM */ - if (ctx->Transform.ClipPlanesEnabled) { - GLuint nr_planes = 6 + util_bitcount(ctx->Transform.ClipPlanesEnabled); - nr_clip_regs = (nr_planes * 4 + 15) / 16; - } - - - total_regs = nr_fp_regs + nr_vp_regs + nr_clip_regs; - - /* The CURBE allocation size is limited to 32 512-bit units (128 EU - * registers, or 1024 floats). See CS_URB_STATE in the gfx4 or gfx5 - * (volume 1, part 1) PRMs. - * - * Note that in brw_fs.cpp we're only loading up to 16 EU registers of - * values as push constants before spilling to pull constants, and in - * brw_vec4.cpp we're loading up to 32 registers of push constants. An EU - * register is 1/2 of one of these URB entry units, so that leaves us 16 EU - * regs for clip. - */ - assert(total_regs <= 32); - - /* Lazy resize: - */ - if (nr_fp_regs > brw->curbe.wm_size || - nr_vp_regs > brw->curbe.vs_size || - nr_clip_regs != brw->curbe.clip_size || - (total_regs < brw->curbe.total_size / 4 && - brw->curbe.total_size > 16)) { - - GLuint reg = 0; - - /* Calculate a new layout: - */ - reg = 0; - brw->curbe.wm_start = reg; - brw->curbe.wm_size = nr_fp_regs; reg += nr_fp_regs; - brw->curbe.clip_start = reg; - brw->curbe.clip_size = nr_clip_regs; reg += nr_clip_regs; - brw->curbe.vs_start = reg; - brw->curbe.vs_size = nr_vp_regs; reg += nr_vp_regs; - brw->curbe.total_size = reg; - - if (0) - fprintf(stderr, "curbe wm %d+%d clip %d+%d vs %d+%d\n", - brw->curbe.wm_start, - brw->curbe.wm_size, - brw->curbe.clip_start, - brw->curbe.clip_size, - brw->curbe.vs_start, - brw->curbe.vs_size ); - - brw->ctx.NewDriverState |= BRW_NEW_PUSH_CONSTANT_ALLOCATION; - } -} - - -const struct brw_tracked_state brw_curbe_offsets = { - .dirty = { - .mesa = _NEW_TRANSFORM, - .brw = BRW_NEW_CONTEXT | - BRW_NEW_BLORP | - BRW_NEW_FS_PROG_DATA | - BRW_NEW_VS_PROG_DATA, - }, - .emit = calculate_curbe_offsets -}; - - - - -/** Uploads the CS_URB_STATE packet. - * - * Just like brw_vs_state.c and brw_wm_state.c define a URB entry size and - * number of entries for their stages, constant buffers do so using this state - * packet. Having multiple CURBEs in the URB at the same time allows the - * hardware to avoid a pipeline stall between primitives using different - * constant buffer contents. - */ -void brw_upload_cs_urb_state(struct brw_context *brw) -{ - BEGIN_BATCH(2); - OUT_BATCH(CMD_CS_URB_STATE << 16 | (2-2)); - - /* BRW_NEW_URB_FENCE */ - if (brw->urb.csize == 0) { - OUT_BATCH(0); - } else { - /* BRW_NEW_URB_FENCE */ - assert(brw->urb.nr_cs_entries); - OUT_BATCH((brw->urb.csize - 1) << 4 | brw->urb.nr_cs_entries); - } - ADVANCE_BATCH(); -} - -static const GLfloat fixed_plane[6][4] = { - { 0, 0, -1, 1 }, - { 0, 0, 1, 1 }, - { 0, -1, 0, 1 }, - { 0, 1, 0, 1 }, - {-1, 0, 0, 1 }, - { 1, 0, 0, 1 } -}; - -/** - * Gathers together all the uniform values into a block of memory to be - * uploaded into the CURBE, then emits the state packet telling the hardware - * the new location. - */ -static void -brw_upload_constant_buffer(struct brw_context *brw) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - struct gl_context *ctx = &brw->ctx; - /* BRW_NEW_PUSH_CONSTANT_ALLOCATION */ - const GLuint sz = brw->curbe.total_size; - const GLuint bufsz = sz * 16 * sizeof(GLfloat); - gl_constant_value *buf; - GLuint i; - gl_clip_plane *clip_planes; - - /* BRW_NEW_FRAGMENT_PROGRAM */ - struct gl_program *fp = brw->programs[MESA_SHADER_FRAGMENT]; - - /* BRW_NEW_VERTEX_PROGRAM */ - struct gl_program *vp = brw->programs[MESA_SHADER_VERTEX]; - - if (sz == 0) { - goto emit; - } - - buf = brw_upload_space(&brw->upload, bufsz, 64, - &brw->curbe.curbe_bo, &brw->curbe.curbe_offset); - - STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float)); - - /* fragment shader constants */ - if (brw->curbe.wm_size) { - _mesa_load_state_parameters(ctx, fp->Parameters); - - /* BRW_NEW_PUSH_CONSTANT_ALLOCATION */ - GLuint offset = brw->curbe.wm_start * 16; - - /* BRW_NEW_FS_PROG_DATA | _NEW_PROGRAM_CONSTANTS: copy uniform values */ - brw_populate_constant_data(brw, fp, &brw->wm.base, &buf[offset], - brw->wm.base.prog_data->param, - brw->wm.base.prog_data->nr_params); - } - - /* clipper constants */ - if (brw->curbe.clip_size) { - GLuint offset = brw->curbe.clip_start * 16; - GLbitfield mask; - - /* If any planes are going this way, send them all this way: - */ - for (i = 0; i < 6; i++) { - buf[offset + i * 4 + 0].f = fixed_plane[i][0]; - buf[offset + i * 4 + 1].f = fixed_plane[i][1]; - buf[offset + i * 4 + 2].f = fixed_plane[i][2]; - buf[offset + i * 4 + 3].f = fixed_plane[i][3]; - } - - /* Clip planes: _NEW_TRANSFORM plus _NEW_PROJECTION to get to - * clip-space: - */ - clip_planes = brw_select_clip_planes(ctx); - mask = ctx->Transform.ClipPlanesEnabled; - while (mask) { - const int j = u_bit_scan(&mask); - buf[offset + i * 4 + 0].f = clip_planes[j][0]; - buf[offset + i * 4 + 1].f = clip_planes[j][1]; - buf[offset + i * 4 + 2].f = clip_planes[j][2]; - buf[offset + i * 4 + 3].f = clip_planes[j][3]; - i++; - } - } - - /* vertex shader constants */ - if (brw->curbe.vs_size) { - _mesa_load_state_parameters(ctx, vp->Parameters); - - GLuint offset = brw->curbe.vs_start * 16; - - /* BRW_NEW_VS_PROG_DATA | _NEW_PROGRAM_CONSTANTS: copy uniform values */ - brw_populate_constant_data(brw, vp, &brw->vs.base, &buf[offset], - brw->vs.base.prog_data->param, - brw->vs.base.prog_data->nr_params); - } - - if (0) { - for (i = 0; i < sz*16; i+=4) - fprintf(stderr, "curbe %d.%d: %f %f %f %f\n", i/8, i&4, - buf[i+0].f, buf[i+1].f, buf[i+2].f, buf[i+3].f); - } - - /* Because this provokes an action (ie copy the constants into the - * URB), it shouldn't be shortcircuited if identical to the - * previous time - because eg. the urb destination may have - * changed, or the urb contents different to last time. - * - * Note that the data referred to is actually copied internally, - * not just used in place according to passed pointer. - * - * It appears that the CS unit takes care of using each available - * URB entry (Const URB Entry == CURBE) in turn, and issuing - * flushes as necessary when doublebuffering of CURBEs isn't - * possible. - */ - -emit: - /* BRW_NEW_URB_FENCE: From the gfx4 PRM, volume 1, section 3.9.8 - * (CONSTANT_BUFFER (CURBE Load)): - * - * "Modifying the CS URB allocation via URB_FENCE invalidates any - * previous CURBE entries. Therefore software must subsequently - * [re]issue a CONSTANT_BUFFER command before CURBE data can be used - * in the pipeline." - */ - BEGIN_BATCH(2); - if (brw->curbe.total_size == 0) { - OUT_BATCH((CMD_CONST_BUFFER << 16) | (2 - 2)); - OUT_BATCH(0); - } else { - OUT_BATCH((CMD_CONST_BUFFER << 16) | (1 << 8) | (2 - 2)); - OUT_RELOC(brw->curbe.curbe_bo, 0, - (brw->curbe.total_size - 1) + brw->curbe.curbe_offset); - } - ADVANCE_BATCH(); - - /* Work around a Broadwater/Crestline depth interpolator bug. The - * following sequence will cause GPU hangs: - * - * 1. Change state so that all depth related fields in CC_STATE are - * disabled, and in WM_STATE, only "PS Use Source Depth" is enabled. - * 2. Emit a CONSTANT_BUFFER packet. - * 3. Draw via 3DPRIMITIVE. - * - * The recommended workaround is to emit a non-pipelined state change after - * emitting CONSTANT_BUFFER, in order to drain the windowizer pipeline. - * - * We arbitrarily choose 3DSTATE_GLOBAL_DEPTH_CLAMP_OFFSET (as it's small), - * and always emit it when "PS Use Source Depth" is set. We could be more - * precise, but the additional complexity is probably not worth it. - * - * BRW_NEW_FRAGMENT_PROGRAM - */ - if (devinfo->verx10 == 40 && - BITSET_TEST(fp->info.system_values_read, SYSTEM_VALUE_FRAG_COORD)) { - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP << 16 | (2 - 2)); - OUT_BATCH(0); - ADVANCE_BATCH(); - } -} - -const struct brw_tracked_state brw_constant_buffer = { - .dirty = { - .mesa = _NEW_PROGRAM_CONSTANTS, - .brw = BRW_NEW_BATCH | - BRW_NEW_BLORP | - BRW_NEW_PUSH_CONSTANT_ALLOCATION | - BRW_NEW_FRAGMENT_PROGRAM | - BRW_NEW_FS_PROG_DATA | - BRW_NEW_PSP | /* Implicit - hardware requires this, not used above */ - BRW_NEW_URB_FENCE | - BRW_NEW_VS_PROG_DATA, - }, - .emit = brw_upload_constant_buffer, -}; diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h deleted file mode 100644 index a548419..0000000 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ /dev/null @@ -1,1668 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - -#ifndef BRW_DEFINES_H -#define BRW_DEFINES_H - -#include "util/macros.h" - -#define INTEL_MASK(high, low) (((1u<<((high)-(low)+1))-1)<<(low)) -/* Using the GNU statement expression extension */ -#define SET_FIELD(value, field) \ - ({ \ - uint32_t fieldval = (uint32_t)(value) << field ## _SHIFT; \ - assert((fieldval & ~ field ## _MASK) == 0); \ - fieldval & field ## _MASK; \ - }) - -#define GET_BITS(data, high, low) ((data & INTEL_MASK((high), (low))) >> (low)) -#define GET_FIELD(word, field) (((word) & field ## _MASK) >> field ## _SHIFT) - -/** - * For use with masked MMIO registers where the upper 16 bits control which - * of the lower bits are committed to the register. - */ -#define REG_MASK(value) ((value) << 16) - -/* 3D state: - */ -#define CMD_3D_PRIM 0x7b00 /* 3DPRIMITIVE */ -/* DW0 */ -# define GFX4_3DPRIM_TOPOLOGY_TYPE_SHIFT 10 -# define GFX4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 15) -# define GFX4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM (1 << 15) -# define GFX7_3DPRIM_INDIRECT_PARAMETER_ENABLE (1 << 10) -# define GFX7_3DPRIM_PREDICATE_ENABLE (1 << 8) -/* DW1 */ -# define GFX7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 8) -# define GFX7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM (1 << 8) - -#define BRW_ANISORATIO_2 0 -#define BRW_ANISORATIO_4 1 -#define BRW_ANISORATIO_6 2 -#define BRW_ANISORATIO_8 3 -#define BRW_ANISORATIO_10 4 -#define BRW_ANISORATIO_12 5 -#define BRW_ANISORATIO_14 6 -#define BRW_ANISORATIO_16 7 - -#define BRW_BLENDFACTOR_ONE 0x1 -#define BRW_BLENDFACTOR_SRC_COLOR 0x2 -#define BRW_BLENDFACTOR_SRC_ALPHA 0x3 -#define BRW_BLENDFACTOR_DST_ALPHA 0x4 -#define BRW_BLENDFACTOR_DST_COLOR 0x5 -#define BRW_BLENDFACTOR_SRC_ALPHA_SATURATE 0x6 -#define BRW_BLENDFACTOR_CONST_COLOR 0x7 -#define BRW_BLENDFACTOR_CONST_ALPHA 0x8 -#define BRW_BLENDFACTOR_SRC1_COLOR 0x9 -#define BRW_BLENDFACTOR_SRC1_ALPHA 0x0A -#define BRW_BLENDFACTOR_ZERO 0x11 -#define BRW_BLENDFACTOR_INV_SRC_COLOR 0x12 -#define BRW_BLENDFACTOR_INV_SRC_ALPHA 0x13 -#define BRW_BLENDFACTOR_INV_DST_ALPHA 0x14 -#define BRW_BLENDFACTOR_INV_DST_COLOR 0x15 -#define BRW_BLENDFACTOR_INV_CONST_COLOR 0x17 -#define BRW_BLENDFACTOR_INV_CONST_ALPHA 0x18 -#define BRW_BLENDFACTOR_INV_SRC1_COLOR 0x19 -#define BRW_BLENDFACTOR_INV_SRC1_ALPHA 0x1A - -#define BRW_BLENDFUNCTION_ADD 0 -#define BRW_BLENDFUNCTION_SUBTRACT 1 -#define BRW_BLENDFUNCTION_REVERSE_SUBTRACT 2 -#define BRW_BLENDFUNCTION_MIN 3 -#define BRW_BLENDFUNCTION_MAX 4 - -#define BRW_ALPHATEST_FORMAT_UNORM8 0 -#define BRW_ALPHATEST_FORMAT_FLOAT32 1 - -#define BRW_CHROMAKEY_KILL_ON_ANY_MATCH 0 -#define BRW_CHROMAKEY_REPLACE_BLACK 1 - -#define BRW_CLIP_API_OGL 0 -#define BRW_CLIP_API_DX 1 - -#define BRW_CLIP_NDCSPACE 0 -#define BRW_CLIP_SCREENSPACE 1 - -#define BRW_COMPAREFUNCTION_ALWAYS 0 -#define BRW_COMPAREFUNCTION_NEVER 1 -#define BRW_COMPAREFUNCTION_LESS 2 -#define BRW_COMPAREFUNCTION_EQUAL 3 -#define BRW_COMPAREFUNCTION_LEQUAL 4 -#define BRW_COMPAREFUNCTION_GREATER 5 -#define BRW_COMPAREFUNCTION_NOTEQUAL 6 -#define BRW_COMPAREFUNCTION_GEQUAL 7 - -#define BRW_COVERAGE_PIXELS_HALF 0 -#define BRW_COVERAGE_PIXELS_1 1 -#define BRW_COVERAGE_PIXELS_2 2 -#define BRW_COVERAGE_PIXELS_4 3 - -#define BRW_CULLMODE_BOTH 0 -#define BRW_CULLMODE_NONE 1 -#define BRW_CULLMODE_FRONT 2 -#define BRW_CULLMODE_BACK 3 - -#define BRW_DEFAULTCOLOR_R8G8B8A8_UNORM 0 -#define BRW_DEFAULTCOLOR_R32G32B32A32_FLOAT 1 - -#define BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT 0 -#define BRW_DEPTHFORMAT_D32_FLOAT 1 -#define BRW_DEPTHFORMAT_D24_UNORM_S8_UINT 2 -#define BRW_DEPTHFORMAT_D24_UNORM_X8_UINT 3 /* GFX5 */ -#define BRW_DEPTHFORMAT_D16_UNORM 5 - -#define BRW_FLOATING_POINT_IEEE_754 0 -#define BRW_FLOATING_POINT_NON_IEEE_754 1 - -#define BRW_FRONTWINDING_CW 0 -#define BRW_FRONTWINDING_CCW 1 - -#define BRW_CUT_INDEX_ENABLE (1 << 10) - -#define BRW_INDEX_BYTE 0 -#define BRW_INDEX_WORD 1 -#define BRW_INDEX_DWORD 2 - -#define BRW_LOGICOPFUNCTION_CLEAR 0 -#define BRW_LOGICOPFUNCTION_NOR 1 -#define BRW_LOGICOPFUNCTION_AND_INVERTED 2 -#define BRW_LOGICOPFUNCTION_COPY_INVERTED 3 -#define BRW_LOGICOPFUNCTION_AND_REVERSE 4 -#define BRW_LOGICOPFUNCTION_INVERT 5 -#define BRW_LOGICOPFUNCTION_XOR 6 -#define BRW_LOGICOPFUNCTION_NAND 7 -#define BRW_LOGICOPFUNCTION_AND 8 -#define BRW_LOGICOPFUNCTION_EQUIV 9 -#define BRW_LOGICOPFUNCTION_NOOP 10 -#define BRW_LOGICOPFUNCTION_OR_INVERTED 11 -#define BRW_LOGICOPFUNCTION_COPY 12 -#define BRW_LOGICOPFUNCTION_OR_REVERSE 13 -#define BRW_LOGICOPFUNCTION_OR 14 -#define BRW_LOGICOPFUNCTION_SET 15 - -#define BRW_MAPFILTER_NEAREST 0x0 -#define BRW_MAPFILTER_LINEAR 0x1 -#define BRW_MAPFILTER_ANISOTROPIC 0x2 - -#define BRW_MIPFILTER_NONE 0 -#define BRW_MIPFILTER_NEAREST 1 -#define BRW_MIPFILTER_LINEAR 3 - -#define BRW_ADDRESS_ROUNDING_ENABLE_U_MAG 0x20 -#define BRW_ADDRESS_ROUNDING_ENABLE_U_MIN 0x10 -#define BRW_ADDRESS_ROUNDING_ENABLE_V_MAG 0x08 -#define BRW_ADDRESS_ROUNDING_ENABLE_V_MIN 0x04 -#define BRW_ADDRESS_ROUNDING_ENABLE_R_MAG 0x02 -#define BRW_ADDRESS_ROUNDING_ENABLE_R_MIN 0x01 - -#define BRW_PREFILTER_ALWAYS 0x0 -#define BRW_PREFILTER_NEVER 0x1 -#define BRW_PREFILTER_LESS 0x2 -#define BRW_PREFILTER_EQUAL 0x3 -#define BRW_PREFILTER_LEQUAL 0x4 -#define BRW_PREFILTER_GREATER 0x5 -#define BRW_PREFILTER_NOTEQUAL 0x6 -#define BRW_PREFILTER_GEQUAL 0x7 - -#define BRW_PROVOKING_VERTEX_0 0 -#define BRW_PROVOKING_VERTEX_1 1 -#define BRW_PROVOKING_VERTEX_2 2 - -#define BRW_RASTRULE_UPPER_LEFT 0 -#define BRW_RASTRULE_UPPER_RIGHT 1 -/* These are listed as "Reserved, but not seen as useful" - * in Intel documentation (page 212, "Point Rasterization Rule", - * section 7.4 "SF Pipeline State Summary", of document - * "Intel® 965 Express Chipset Family and Intel® G35 Express - * Chipset Graphics Controller Programmer's Reference Manual, - * Volume 2: 3D/Media", Revision 1.0b as of January 2008, - * available at - * https://01.org/linuxgraphics/documentation/hardware-specification-prms - * at the time of this writing). - * - * These appear to be supported on at least some - * i965-family devices, and the BRW_RASTRULE_LOWER_RIGHT - * is useful when using OpenGL to render to a FBO - * (which has the pixel coordinate Y orientation inverted - * with respect to the normal OpenGL pixel coordinate system). - */ -#define BRW_RASTRULE_LOWER_LEFT 2 -#define BRW_RASTRULE_LOWER_RIGHT 3 - -#define BRW_RENDERTARGET_CLAMPRANGE_UNORM 0 -#define BRW_RENDERTARGET_CLAMPRANGE_SNORM 1 -#define BRW_RENDERTARGET_CLAMPRANGE_FORMAT 2 - -#define BRW_STENCILOP_KEEP 0 -#define BRW_STENCILOP_ZERO 1 -#define BRW_STENCILOP_REPLACE 2 -#define BRW_STENCILOP_INCRSAT 3 -#define BRW_STENCILOP_DECRSAT 4 -#define BRW_STENCILOP_INCR 5 -#define BRW_STENCILOP_DECR 6 -#define BRW_STENCILOP_INVERT 7 - -/* Surface state DW0 */ -#define GFX8_SURFACE_IS_ARRAY (1 << 28) -#define GFX8_SURFACE_VALIGN_4 (1 << 16) -#define GFX8_SURFACE_VALIGN_8 (2 << 16) -#define GFX8_SURFACE_VALIGN_16 (3 << 16) -#define GFX8_SURFACE_HALIGN_4 (1 << 14) -#define GFX8_SURFACE_HALIGN_8 (2 << 14) -#define GFX8_SURFACE_HALIGN_16 (3 << 14) -#define GFX8_SURFACE_TILING_NONE (0 << 12) -#define GFX8_SURFACE_TILING_W (1 << 12) -#define GFX8_SURFACE_TILING_X (2 << 12) -#define GFX8_SURFACE_TILING_Y (3 << 12) -#define GFX8_SURFACE_SAMPLER_L2_BYPASS_DISABLE (1 << 9) -#define BRW_SURFACE_RC_READ_WRITE (1 << 8) -#define BRW_SURFACE_MIPLAYOUT_SHIFT 10 -#define BRW_SURFACE_MIPMAPLAYOUT_BELOW 0 -#define BRW_SURFACE_MIPMAPLAYOUT_RIGHT 1 -#define BRW_SURFACE_CUBEFACE_ENABLES 0x3f -#define BRW_SURFACE_BLEND_ENABLED (1 << 13) -#define BRW_SURFACE_WRITEDISABLE_B_SHIFT 14 -#define BRW_SURFACE_WRITEDISABLE_G_SHIFT 15 -#define BRW_SURFACE_WRITEDISABLE_R_SHIFT 16 -#define BRW_SURFACE_WRITEDISABLE_A_SHIFT 17 - -#define GFX9_SURFACE_ASTC_HDR_FORMAT_BIT 0x100 - -#define BRW_SURFACE_FORMAT_SHIFT 18 -#define BRW_SURFACE_FORMAT_MASK INTEL_MASK(26, 18) - -#define BRW_SURFACERETURNFORMAT_FLOAT32 0 -#define BRW_SURFACERETURNFORMAT_S1 1 - -#define BRW_SURFACE_TYPE_SHIFT 29 -#define BRW_SURFACE_TYPE_MASK INTEL_MASK(31, 29) -#define BRW_SURFACE_1D 0 -#define BRW_SURFACE_2D 1 -#define BRW_SURFACE_3D 2 -#define BRW_SURFACE_CUBE 3 -#define BRW_SURFACE_BUFFER 4 -#define BRW_SURFACE_NULL 7 - -#define GFX7_SURFACE_IS_ARRAY (1 << 28) -#define GFX7_SURFACE_VALIGN_2 (0 << 16) -#define GFX7_SURFACE_VALIGN_4 (1 << 16) -#define GFX7_SURFACE_HALIGN_4 (0 << 15) -#define GFX7_SURFACE_HALIGN_8 (1 << 15) -#define GFX7_SURFACE_TILING_NONE (0 << 13) -#define GFX7_SURFACE_TILING_X (2 << 13) -#define GFX7_SURFACE_TILING_Y (3 << 13) -#define GFX7_SURFACE_ARYSPC_FULL (0 << 10) -#define GFX7_SURFACE_ARYSPC_LOD0 (1 << 10) - -/* Surface state DW2 */ -#define BRW_SURFACE_HEIGHT_SHIFT 19 -#define BRW_SURFACE_HEIGHT_MASK INTEL_MASK(31, 19) -#define BRW_SURFACE_WIDTH_SHIFT 6 -#define BRW_SURFACE_WIDTH_MASK INTEL_MASK(18, 6) -#define BRW_SURFACE_LOD_SHIFT 2 -#define BRW_SURFACE_LOD_MASK INTEL_MASK(5, 2) -#define GFX7_SURFACE_HEIGHT_SHIFT 16 -#define GFX7_SURFACE_HEIGHT_MASK INTEL_MASK(29, 16) -#define GFX7_SURFACE_WIDTH_SHIFT 0 -#define GFX7_SURFACE_WIDTH_MASK INTEL_MASK(13, 0) - -/* Surface state DW3 */ -#define BRW_SURFACE_DEPTH_SHIFT 21 -#define BRW_SURFACE_DEPTH_MASK INTEL_MASK(31, 21) -#define BRW_SURFACE_PITCH_SHIFT 3 -#define BRW_SURFACE_PITCH_MASK INTEL_MASK(19, 3) -#define BRW_SURFACE_TILED (1 << 1) -#define BRW_SURFACE_TILED_Y (1 << 0) -#define HSW_SURFACE_IS_INTEGER_FORMAT (1 << 18) - -/* Surface state DW4 */ -#define BRW_SURFACE_MIN_LOD_SHIFT 28 -#define BRW_SURFACE_MIN_LOD_MASK INTEL_MASK(31, 28) -#define BRW_SURFACE_MIN_ARRAY_ELEMENT_SHIFT 17 -#define BRW_SURFACE_MIN_ARRAY_ELEMENT_MASK INTEL_MASK(27, 17) -#define BRW_SURFACE_RENDER_TARGET_VIEW_EXTENT_SHIFT 8 -#define BRW_SURFACE_RENDER_TARGET_VIEW_EXTENT_MASK INTEL_MASK(16, 8) -#define BRW_SURFACE_MULTISAMPLECOUNT_1 (0 << 4) -#define BRW_SURFACE_MULTISAMPLECOUNT_4 (2 << 4) -#define GFX7_SURFACE_MULTISAMPLECOUNT_1 (0 << 3) -#define GFX8_SURFACE_MULTISAMPLECOUNT_2 (1 << 3) -#define GFX7_SURFACE_MULTISAMPLECOUNT_4 (2 << 3) -#define GFX7_SURFACE_MULTISAMPLECOUNT_8 (3 << 3) -#define GFX8_SURFACE_MULTISAMPLECOUNT_16 (4 << 3) -#define GFX7_SURFACE_MSFMT_MSS (0 << 6) -#define GFX7_SURFACE_MSFMT_DEPTH_STENCIL (1 << 6) -#define GFX7_SURFACE_MIN_ARRAY_ELEMENT_SHIFT 18 -#define GFX7_SURFACE_MIN_ARRAY_ELEMENT_MASK INTEL_MASK(28, 18) -#define GFX7_SURFACE_RENDER_TARGET_VIEW_EXTENT_SHIFT 7 -#define GFX7_SURFACE_RENDER_TARGET_VIEW_EXTENT_MASK INTEL_MASK(17, 7) - -/* Surface state DW5 */ -#define BRW_SURFACE_X_OFFSET_SHIFT 25 -#define BRW_SURFACE_X_OFFSET_MASK INTEL_MASK(31, 25) -#define BRW_SURFACE_VERTICAL_ALIGN_ENABLE (1 << 24) -#define BRW_SURFACE_Y_OFFSET_SHIFT 20 -#define BRW_SURFACE_Y_OFFSET_MASK INTEL_MASK(23, 20) -#define GFX7_SURFACE_MIN_LOD_SHIFT 4 -#define GFX7_SURFACE_MIN_LOD_MASK INTEL_MASK(7, 4) -#define GFX8_SURFACE_Y_OFFSET_SHIFT 21 -#define GFX8_SURFACE_Y_OFFSET_MASK INTEL_MASK(23, 21) - -#define GFX9_SURFACE_MIP_TAIL_START_LOD_SHIFT 8 -#define GFX9_SURFACE_MIP_TAIL_START_LOD_MASK INTEL_MASK(11, 8) - -/* Surface state DW6 */ -#define GFX7_SURFACE_MCS_ENABLE (1 << 0) -#define GFX7_SURFACE_MCS_PITCH_SHIFT 3 -#define GFX7_SURFACE_MCS_PITCH_MASK INTEL_MASK(11, 3) -#define GFX8_SURFACE_AUX_QPITCH_SHIFT 16 -#define GFX8_SURFACE_AUX_QPITCH_MASK INTEL_MASK(30, 16) -#define GFX8_SURFACE_AUX_PITCH_SHIFT 3 -#define GFX8_SURFACE_AUX_PITCH_MASK INTEL_MASK(11, 3) -#define GFX8_SURFACE_AUX_MODE_MASK INTEL_MASK(2, 0) - -#define GFX8_SURFACE_AUX_MODE_NONE 0 -#define GFX8_SURFACE_AUX_MODE_MCS 1 -#define GFX8_SURFACE_AUX_MODE_APPEND 2 -#define GFX8_SURFACE_AUX_MODE_HIZ 3 -#define GFX9_SURFACE_AUX_MODE_CCS_E 5 - -/* Surface state DW7 */ -#define GFX9_SURFACE_RT_COMPRESSION_SHIFT 30 -#define GFX9_SURFACE_RT_COMPRESSION_MASK INTEL_MASK(30, 30) -#define GFX7_SURFACE_CLEAR_COLOR_SHIFT 28 -#define GFX7_SURFACE_SCS_R_SHIFT 25 -#define GFX7_SURFACE_SCS_R_MASK INTEL_MASK(27, 25) -#define GFX7_SURFACE_SCS_G_SHIFT 22 -#define GFX7_SURFACE_SCS_G_MASK INTEL_MASK(24, 22) -#define GFX7_SURFACE_SCS_B_SHIFT 19 -#define GFX7_SURFACE_SCS_B_MASK INTEL_MASK(21, 19) -#define GFX7_SURFACE_SCS_A_SHIFT 16 -#define GFX7_SURFACE_SCS_A_MASK INTEL_MASK(18, 16) - -/* The actual swizzle values/what channel to use */ -#define HSW_SCS_ZERO 0 -#define HSW_SCS_ONE 1 -#define HSW_SCS_RED 4 -#define HSW_SCS_GREEN 5 -#define HSW_SCS_BLUE 6 -#define HSW_SCS_ALPHA 7 - -/* SAMPLER_STATE DW0 */ -#define BRW_SAMPLER_DISABLE (1 << 31) -#define BRW_SAMPLER_LOD_PRECLAMP_ENABLE (1 << 28) -#define GFX6_SAMPLER_MIN_MAG_NOT_EQUAL (1 << 27) /* Gfx6 only */ -#define BRW_SAMPLER_BASE_MIPLEVEL_MASK INTEL_MASK(26, 22) -#define BRW_SAMPLER_BASE_MIPLEVEL_SHIFT 22 -#define BRW_SAMPLER_MIP_FILTER_MASK INTEL_MASK(21, 20) -#define BRW_SAMPLER_MIP_FILTER_SHIFT 20 -#define BRW_SAMPLER_MAG_FILTER_MASK INTEL_MASK(19, 17) -#define BRW_SAMPLER_MAG_FILTER_SHIFT 17 -#define BRW_SAMPLER_MIN_FILTER_MASK INTEL_MASK(16, 14) -#define BRW_SAMPLER_MIN_FILTER_SHIFT 14 -#define GFX4_SAMPLER_LOD_BIAS_MASK INTEL_MASK(13, 3) -#define GFX4_SAMPLER_LOD_BIAS_SHIFT 3 -#define GFX4_SAMPLER_SHADOW_FUNCTION_MASK INTEL_MASK(2, 0) -#define GFX4_SAMPLER_SHADOW_FUNCTION_SHIFT 0 - -#define GFX7_SAMPLER_LOD_BIAS_MASK INTEL_MASK(13, 1) -#define GFX7_SAMPLER_LOD_BIAS_SHIFT 1 -#define GFX7_SAMPLER_EWA_ANISOTROPIC_ALGORITHM (1 << 0) - -/* SAMPLER_STATE DW1 */ -#define GFX4_SAMPLER_MIN_LOD_MASK INTEL_MASK(31, 22) -#define GFX4_SAMPLER_MIN_LOD_SHIFT 22 -#define GFX4_SAMPLER_MAX_LOD_MASK INTEL_MASK(21, 12) -#define GFX4_SAMPLER_MAX_LOD_SHIFT 12 -#define GFX4_SAMPLER_CUBE_CONTROL_OVERRIDE (1 << 9) -/* Wrap modes are in DW1 on Gfx4-6 and DW3 on Gfx7+ */ -#define BRW_SAMPLER_TCX_WRAP_MODE_MASK INTEL_MASK(8, 6) -#define BRW_SAMPLER_TCX_WRAP_MODE_SHIFT 6 -#define BRW_SAMPLER_TCY_WRAP_MODE_MASK INTEL_MASK(5, 3) -#define BRW_SAMPLER_TCY_WRAP_MODE_SHIFT 3 -#define BRW_SAMPLER_TCZ_WRAP_MODE_MASK INTEL_MASK(2, 0) -#define BRW_SAMPLER_TCZ_WRAP_MODE_SHIFT 0 - -#define GFX7_SAMPLER_MIN_LOD_MASK INTEL_MASK(31, 20) -#define GFX7_SAMPLER_MIN_LOD_SHIFT 20 -#define GFX7_SAMPLER_MAX_LOD_MASK INTEL_MASK(19, 8) -#define GFX7_SAMPLER_MAX_LOD_SHIFT 8 -#define GFX7_SAMPLER_SHADOW_FUNCTION_MASK INTEL_MASK(3, 1) -#define GFX7_SAMPLER_SHADOW_FUNCTION_SHIFT 1 -#define GFX7_SAMPLER_CUBE_CONTROL_OVERRIDE (1 << 0) - -/* SAMPLER_STATE DW2 - border color pointer */ - -/* SAMPLER_STATE DW3 */ -#define BRW_SAMPLER_MAX_ANISOTROPY_MASK INTEL_MASK(21, 19) -#define BRW_SAMPLER_MAX_ANISOTROPY_SHIFT 19 -#define BRW_SAMPLER_ADDRESS_ROUNDING_MASK INTEL_MASK(18, 13) -#define BRW_SAMPLER_ADDRESS_ROUNDING_SHIFT 13 -#define GFX7_SAMPLER_NON_NORMALIZED_COORDINATES (1 << 10) -/* Gfx7+ wrap modes reuse the same BRW_SAMPLER_TC*_WRAP_MODE enums. */ -#define GFX6_SAMPLER_NON_NORMALIZED_COORDINATES (1 << 0) - -enum brw_wrap_mode { - BRW_TEXCOORDMODE_WRAP = 0, - BRW_TEXCOORDMODE_MIRROR = 1, - BRW_TEXCOORDMODE_CLAMP = 2, - BRW_TEXCOORDMODE_CUBE = 3, - BRW_TEXCOORDMODE_CLAMP_BORDER = 4, - BRW_TEXCOORDMODE_MIRROR_ONCE = 5, - GFX8_TEXCOORDMODE_HALF_BORDER = 6, -}; - -#define BRW_THREAD_PRIORITY_NORMAL 0 -#define BRW_THREAD_PRIORITY_HIGH 1 - -#define BRW_TILEWALK_XMAJOR 0 -#define BRW_TILEWALK_YMAJOR 1 - -#define BRW_VERTEX_SUBPIXEL_PRECISION_8BITS 0 -#define BRW_VERTEX_SUBPIXEL_PRECISION_4BITS 1 - - -#define CMD_URB_FENCE 0x6000 -#define CMD_CS_URB_STATE 0x6001 -#define CMD_CONST_BUFFER 0x6002 - -#define CMD_STATE_BASE_ADDRESS 0x6101 -#define CMD_STATE_SIP 0x6102 -#define CMD_PIPELINE_SELECT_965 0x6104 -#define CMD_PIPELINE_SELECT_GM45 0x6904 - -#define _3DSTATE_PIPELINED_POINTERS 0x7800 -#define _3DSTATE_BINDING_TABLE_POINTERS 0x7801 -# define GFX6_BINDING_TABLE_MODIFY_VS (1 << 8) -# define GFX6_BINDING_TABLE_MODIFY_GS (1 << 9) -# define GFX6_BINDING_TABLE_MODIFY_PS (1 << 12) - -#define _3DSTATE_BINDING_TABLE_POINTERS_VS 0x7826 /* GFX7+ */ -#define _3DSTATE_BINDING_TABLE_POINTERS_HS 0x7827 /* GFX7+ */ -#define _3DSTATE_BINDING_TABLE_POINTERS_DS 0x7828 /* GFX7+ */ -#define _3DSTATE_BINDING_TABLE_POINTERS_GS 0x7829 /* GFX7+ */ -#define _3DSTATE_BINDING_TABLE_POINTERS_PS 0x782A /* GFX7+ */ - -#define _3DSTATE_SAMPLER_STATE_POINTERS 0x7802 /* GFX6+ */ -# define PS_SAMPLER_STATE_CHANGE (1 << 12) -# define GS_SAMPLER_STATE_CHANGE (1 << 9) -# define VS_SAMPLER_STATE_CHANGE (1 << 8) -/* DW1: VS */ -/* DW2: GS */ -/* DW3: PS */ - -#define _3DSTATE_SAMPLER_STATE_POINTERS_VS 0x782B /* GFX7+ */ -#define _3DSTATE_SAMPLER_STATE_POINTERS_HS 0x782C /* GFX7+ */ -#define _3DSTATE_SAMPLER_STATE_POINTERS_DS 0x782D /* GFX7+ */ -#define _3DSTATE_SAMPLER_STATE_POINTERS_GS 0x782E /* GFX7+ */ -#define _3DSTATE_SAMPLER_STATE_POINTERS_PS 0x782F /* GFX7+ */ - -#define _3DSTATE_VERTEX_BUFFERS 0x7808 -# define BRW_VB0_INDEX_SHIFT 27 -# define GFX6_VB0_INDEX_SHIFT 26 -# define BRW_VB0_ACCESS_VERTEXDATA (0 << 26) -# define BRW_VB0_ACCESS_INSTANCEDATA (1 << 26) -# define GFX6_VB0_ACCESS_VERTEXDATA (0 << 20) -# define GFX6_VB0_ACCESS_INSTANCEDATA (1 << 20) -# define GFX7_VB0_ADDRESS_MODIFYENABLE (1 << 14) -# define BRW_VB0_PITCH_SHIFT 0 - -#define _3DSTATE_VERTEX_ELEMENTS 0x7809 -# define BRW_VE0_INDEX_SHIFT 27 -# define GFX6_VE0_INDEX_SHIFT 26 -# define BRW_VE0_FORMAT_SHIFT 16 -# define BRW_VE0_VALID (1 << 26) -# define GFX6_VE0_VALID (1 << 25) -# define GFX6_VE0_EDGE_FLAG_ENABLE (1 << 15) -# define BRW_VE0_SRC_OFFSET_SHIFT 0 -# define BRW_VE1_COMPONENT_NOSTORE 0 -# define BRW_VE1_COMPONENT_STORE_SRC 1 -# define BRW_VE1_COMPONENT_STORE_0 2 -# define BRW_VE1_COMPONENT_STORE_1_FLT 3 -# define BRW_VE1_COMPONENT_STORE_1_INT 4 -# define BRW_VE1_COMPONENT_STORE_VID 5 -# define BRW_VE1_COMPONENT_STORE_IID 6 -# define BRW_VE1_COMPONENT_STORE_PID 7 -# define BRW_VE1_COMPONENT_0_SHIFT 28 -# define BRW_VE1_COMPONENT_1_SHIFT 24 -# define BRW_VE1_COMPONENT_2_SHIFT 20 -# define BRW_VE1_COMPONENT_3_SHIFT 16 -# define BRW_VE1_DST_OFFSET_SHIFT 0 - -#define CMD_INDEX_BUFFER 0x780a -#define GFX4_3DSTATE_VF_STATISTICS 0x780b -#define GM45_3DSTATE_VF_STATISTICS 0x680b -#define _3DSTATE_CC_STATE_POINTERS 0x780e /* GFX6+ */ -#define _3DSTATE_BLEND_STATE_POINTERS 0x7824 /* GFX7+ */ -#define _3DSTATE_DEPTH_STENCIL_STATE_POINTERS 0x7825 /* GFX7+ */ - -#define _3DSTATE_URB 0x7805 /* GFX6 */ -# define GFX6_URB_VS_SIZE_SHIFT 16 -# define GFX6_URB_VS_ENTRIES_SHIFT 0 -# define GFX6_URB_GS_ENTRIES_SHIFT 8 -# define GFX6_URB_GS_SIZE_SHIFT 0 - -#define _3DSTATE_VF 0x780c /* GFX7.5+ */ -#define HSW_CUT_INDEX_ENABLE (1 << 8) - -#define _3DSTATE_VF_INSTANCING 0x7849 /* GFX8+ */ -# define GFX8_VF_INSTANCING_ENABLE (1 << 8) - -#define _3DSTATE_VF_SGVS 0x784a /* GFX8+ */ -# define GFX8_SGVS_ENABLE_INSTANCE_ID (1 << 31) -# define GFX8_SGVS_INSTANCE_ID_COMPONENT_SHIFT 29 -# define GFX8_SGVS_INSTANCE_ID_ELEMENT_OFFSET_SHIFT 16 -# define GFX8_SGVS_ENABLE_VERTEX_ID (1 << 15) -# define GFX8_SGVS_VERTEX_ID_COMPONENT_SHIFT 13 -# define GFX8_SGVS_VERTEX_ID_ELEMENT_OFFSET_SHIFT 0 - -#define _3DSTATE_VF_TOPOLOGY 0x784b /* GFX8+ */ - -#define _3DSTATE_WM_CHROMAKEY 0x784c /* GFX8+ */ - -#define _3DSTATE_URB_VS 0x7830 /* GFX7+ */ -#define _3DSTATE_URB_HS 0x7831 /* GFX7+ */ -#define _3DSTATE_URB_DS 0x7832 /* GFX7+ */ -#define _3DSTATE_URB_GS 0x7833 /* GFX7+ */ -# define GFX7_URB_ENTRY_SIZE_SHIFT 16 -# define GFX7_URB_STARTING_ADDRESS_SHIFT 25 - -#define _3DSTATE_PUSH_CONSTANT_ALLOC_VS 0x7912 /* GFX7+ */ -#define _3DSTATE_PUSH_CONSTANT_ALLOC_HS 0x7913 /* GFX7+ */ -#define _3DSTATE_PUSH_CONSTANT_ALLOC_DS 0x7914 /* GFX7+ */ -#define _3DSTATE_PUSH_CONSTANT_ALLOC_GS 0x7915 /* GFX7+ */ -#define _3DSTATE_PUSH_CONSTANT_ALLOC_PS 0x7916 /* GFX7+ */ -# define GFX7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT 16 - -#define _3DSTATE_VIEWPORT_STATE_POINTERS 0x780d /* GFX6+ */ -# define GFX6_CC_VIEWPORT_MODIFY (1 << 12) -# define GFX6_SF_VIEWPORT_MODIFY (1 << 11) -# define GFX6_CLIP_VIEWPORT_MODIFY (1 << 10) -# define GFX6_NUM_VIEWPORTS 16 - -#define _3DSTATE_VIEWPORT_STATE_POINTERS_CC 0x7823 /* GFX7+ */ -#define _3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL 0x7821 /* GFX7+ */ - -#define _3DSTATE_SCISSOR_STATE_POINTERS 0x780f /* GFX6+ */ - -#define _3DSTATE_VS 0x7810 /* GFX6+ */ -/* DW2 */ -# define GFX6_VS_SPF_MODE (1 << 31) -# define GFX6_VS_VECTOR_MASK_ENABLE (1 << 30) -# define GFX6_VS_SAMPLER_COUNT_SHIFT 27 -# define GFX6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 -# define GFX6_VS_FLOATING_POINT_MODE_IEEE_754 (0 << 16) -# define GFX6_VS_FLOATING_POINT_MODE_ALT (1 << 16) -# define HSW_VS_UAV_ACCESS_ENABLE (1 << 12) -/* DW4 */ -# define GFX6_VS_DISPATCH_START_GRF_SHIFT 20 -# define GFX6_VS_URB_READ_LENGTH_SHIFT 11 -# define GFX6_VS_URB_ENTRY_READ_OFFSET_SHIFT 4 -/* DW5 */ -# define GFX6_VS_MAX_THREADS_SHIFT 25 -# define HSW_VS_MAX_THREADS_SHIFT 23 -# define GFX6_VS_STATISTICS_ENABLE (1 << 10) -# define GFX6_VS_CACHE_DISABLE (1 << 1) -# define GFX6_VS_ENABLE (1 << 0) -/* Gfx8+ DW7 */ -# define GFX8_VS_SIMD8_ENABLE (1 << 2) -/* Gfx8+ DW8 */ -# define GFX8_VS_URB_ENTRY_OUTPUT_OFFSET_SHIFT 21 -# define GFX8_VS_URB_OUTPUT_LENGTH_SHIFT 16 -# define GFX8_VS_USER_CLIP_DISTANCE_SHIFT 8 - -#define _3DSTATE_GS 0x7811 /* GFX6+ */ -/* DW2 */ -# define GFX6_GS_SPF_MODE (1 << 31) -# define GFX6_GS_VECTOR_MASK_ENABLE (1 << 30) -# define GFX6_GS_SAMPLER_COUNT_SHIFT 27 -# define GFX6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 -# define GFX6_GS_FLOATING_POINT_MODE_IEEE_754 (0 << 16) -# define GFX6_GS_FLOATING_POINT_MODE_ALT (1 << 16) -# define HSW_GS_UAV_ACCESS_ENABLE (1 << 12) -/* DW4 */ -# define GFX7_GS_OUTPUT_VERTEX_SIZE_SHIFT 23 -# define GFX7_GS_OUTPUT_TOPOLOGY_SHIFT 17 -# define GFX6_GS_URB_READ_LENGTH_SHIFT 11 -# define GFX7_GS_INCLUDE_VERTEX_HANDLES (1 << 10) -# define GFX6_GS_URB_ENTRY_READ_OFFSET_SHIFT 4 -# define GFX6_GS_DISPATCH_START_GRF_SHIFT 0 -/* DW5 */ -# define GFX6_GS_MAX_THREADS_SHIFT 25 -# define HSW_GS_MAX_THREADS_SHIFT 24 -# define IVB_GS_CONTROL_DATA_FORMAT_SHIFT 24 -# define GFX7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT 0 -# define GFX7_GS_CONTROL_DATA_FORMAT_GSCTL_SID 1 -# define GFX7_GS_CONTROL_DATA_HEADER_SIZE_SHIFT 20 -# define GFX7_GS_INSTANCE_CONTROL_SHIFT 15 -# define GFX7_GS_DISPATCH_MODE_SHIFT 11 -# define GFX7_GS_DISPATCH_MODE_MASK INTEL_MASK(12, 11) -# define GFX6_GS_STATISTICS_ENABLE (1 << 10) -# define GFX6_GS_SO_STATISTICS_ENABLE (1 << 9) -# define GFX6_GS_RENDERING_ENABLE (1 << 8) -# define GFX7_GS_INCLUDE_PRIMITIVE_ID (1 << 4) -# define GFX7_GS_REORDER_TRAILING (1 << 2) -# define GFX7_GS_ENABLE (1 << 0) -/* DW6 */ -# define HSW_GS_CONTROL_DATA_FORMAT_SHIFT 31 -# define GFX6_GS_REORDER (1 << 30) -# define GFX6_GS_DISCARD_ADJACENCY (1 << 29) -# define GFX6_GS_SVBI_PAYLOAD_ENABLE (1 << 28) -# define GFX6_GS_SVBI_POSTINCREMENT_ENABLE (1 << 27) -# define GFX6_GS_SVBI_POSTINCREMENT_VALUE_SHIFT 16 -# define GFX6_GS_SVBI_POSTINCREMENT_VALUE_MASK INTEL_MASK(25, 16) -# define GFX6_GS_ENABLE (1 << 15) - -/* Gfx8+ DW8 */ -# define GFX8_GS_STATIC_OUTPUT (1 << 30) -# define GFX8_GS_STATIC_VERTEX_COUNT_SHIFT 16 -# define GFX8_GS_STATIC_VERTEX_COUNT_MASK INTEL_MASK(26, 16) - -/* Gfx8+ DW9 */ -# define GFX8_GS_URB_ENTRY_OUTPUT_OFFSET_SHIFT 21 -# define GFX8_GS_URB_OUTPUT_LENGTH_SHIFT 16 -# define GFX8_GS_USER_CLIP_DISTANCE_SHIFT 8 - -# define BRW_GS_EDGE_INDICATOR_0 (1 << 8) -# define BRW_GS_EDGE_INDICATOR_1 (1 << 9) - -#define _3DSTATE_HS 0x781B /* GFX7+ */ -/* DW1 */ -# define GFX7_HS_SAMPLER_COUNT_MASK INTEL_MASK(29, 27) -# define GFX7_HS_SAMPLER_COUNT_SHIFT 27 -# define GFX7_HS_BINDING_TABLE_ENTRY_COUNT_MASK INTEL_MASK(25, 18) -# define GFX7_HS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 -# define GFX7_HS_FLOATING_POINT_MODE_IEEE_754 (0 << 16) -# define GFX7_HS_FLOATING_POINT_MODE_ALT (1 << 16) -# define GFX7_HS_MAX_THREADS_SHIFT 0 -/* DW2 */ -# define GFX7_HS_ENABLE (1 << 31) -# define GFX7_HS_STATISTICS_ENABLE (1 << 29) -# define GFX8_HS_MAX_THREADS_SHIFT 8 -# define GFX7_HS_INSTANCE_COUNT_MASK INTEL_MASK(3, 0) -# define GFX7_HS_INSTANCE_COUNT_SHIFT 0 -/* DW5 */ -# define GFX7_HS_SINGLE_PROGRAM_FLOW (1 << 27) -# define GFX7_HS_VECTOR_MASK_ENABLE (1 << 26) -# define HSW_HS_ACCESSES_UAV (1 << 25) -# define GFX7_HS_INCLUDE_VERTEX_HANDLES (1 << 24) -# define GFX7_HS_DISPATCH_START_GRF_MASK INTEL_MASK(23, 19) -# define GFX7_HS_DISPATCH_START_GRF_SHIFT 19 -# define GFX7_HS_URB_READ_LENGTH_MASK INTEL_MASK(16, 11) -# define GFX7_HS_URB_READ_LENGTH_SHIFT 11 -# define GFX7_HS_URB_ENTRY_READ_OFFSET_MASK INTEL_MASK(9, 4) -# define GFX7_HS_URB_ENTRY_READ_OFFSET_SHIFT 4 - -#define _3DSTATE_TE 0x781C /* GFX7+ */ -/* DW1 */ -# define GFX7_TE_PARTITIONING_SHIFT 12 -# define GFX7_TE_OUTPUT_TOPOLOGY_SHIFT 8 -# define GFX7_TE_DOMAIN_SHIFT 4 -//# define GFX7_TE_MODE_SW (1 << 1) -# define GFX7_TE_ENABLE (1 << 0) - -#define _3DSTATE_DS 0x781D /* GFX7+ */ -/* DW2 */ -# define GFX7_DS_SINGLE_DOMAIN_POINT_DISPATCH (1 << 31) -# define GFX7_DS_VECTOR_MASK_ENABLE (1 << 30) -# define GFX7_DS_SAMPLER_COUNT_MASK INTEL_MASK(29, 27) -# define GFX7_DS_SAMPLER_COUNT_SHIFT 27 -# define GFX7_DS_BINDING_TABLE_ENTRY_COUNT_MASK INTEL_MASK(25, 18) -# define GFX7_DS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 -# define GFX7_DS_FLOATING_POINT_MODE_IEEE_754 (0 << 16) -# define GFX7_DS_FLOATING_POINT_MODE_ALT (1 << 16) -# define HSW_DS_ACCESSES_UAV (1 << 14) -/* DW4 */ -# define GFX7_DS_DISPATCH_START_GRF_MASK INTEL_MASK(24, 20) -# define GFX7_DS_DISPATCH_START_GRF_SHIFT 20 -# define GFX7_DS_URB_READ_LENGTH_MASK INTEL_MASK(17, 11) -# define GFX7_DS_URB_READ_LENGTH_SHIFT 11 -# define GFX7_DS_URB_ENTRY_READ_OFFSET_MASK INTEL_MASK(9, 4) -# define GFX7_DS_URB_ENTRY_READ_OFFSET_SHIFT 4 -/* DW5 */ -# define GFX7_DS_MAX_THREADS_SHIFT 25 -# define HSW_DS_MAX_THREADS_SHIFT 21 -# define GFX7_DS_STATISTICS_ENABLE (1 << 10) -# define GFX7_DS_SIMD8_DISPATCH_ENABLE (1 << 3) -# define GFX7_DS_COMPUTE_W_COORDINATE_ENABLE (1 << 2) -# define GFX7_DS_CACHE_DISABLE (1 << 1) -# define GFX7_DS_ENABLE (1 << 0) -/* Gfx8+ DW8 */ -# define GFX8_DS_URB_ENTRY_OUTPUT_OFFSET_MASK INTEL_MASK(26, 21) -# define GFX8_DS_URB_ENTRY_OUTPUT_OFFSET_SHIFT 21 -# define GFX8_DS_URB_OUTPUT_LENGTH_MASK INTEL_MASK(20, 16) -# define GFX8_DS_URB_OUTPUT_LENGTH_SHIFT 16 -# define GFX8_DS_USER_CLIP_DISTANCE_MASK INTEL_MASK(15, 8) -# define GFX8_DS_USER_CLIP_DISTANCE_SHIFT 8 -# define GFX8_DS_USER_CULL_DISTANCE_MASK INTEL_MASK(7, 0) -# define GFX8_DS_USER_CULL_DISTANCE_SHIFT 0 - - -#define _3DSTATE_CLIP 0x7812 /* GFX6+ */ -/* DW1 */ -# define GFX7_CLIP_WINDING_CW (0 << 20) -# define GFX7_CLIP_WINDING_CCW (1 << 20) -# define GFX7_CLIP_VERTEX_SUBPIXEL_PRECISION_8 (0 << 19) -# define GFX7_CLIP_VERTEX_SUBPIXEL_PRECISION_4 (1 << 19) -# define GFX7_CLIP_EARLY_CULL (1 << 18) -# define GFX8_CLIP_FORCE_USER_CLIP_DISTANCE_BITMASK (1 << 17) -# define GFX7_CLIP_CULLMODE_BOTH (0 << 16) -# define GFX7_CLIP_CULLMODE_NONE (1 << 16) -# define GFX7_CLIP_CULLMODE_FRONT (2 << 16) -# define GFX7_CLIP_CULLMODE_BACK (3 << 16) -# define GFX6_CLIP_STATISTICS_ENABLE (1 << 10) -/** - * Just does cheap culling based on the clip distance. Bits must be - * disjoint with USER_CLIP_CLIP_DISTANCE bits. - */ -# define GFX6_USER_CLIP_CULL_DISTANCES_SHIFT 0 -/* DW2 */ -# define GFX6_CLIP_ENABLE (1 << 31) -# define GFX6_CLIP_API_OGL (0 << 30) -# define GFX6_CLIP_API_D3D (1 << 30) -# define GFX6_CLIP_XY_TEST (1 << 28) -# define GFX6_CLIP_Z_TEST (1 << 27) -# define GFX6_CLIP_GB_TEST (1 << 26) -/** 8-bit field of which user clip distances to clip aganist. */ -# define GFX6_USER_CLIP_CLIP_DISTANCES_SHIFT 16 -# define GFX6_CLIP_MODE_NORMAL (0 << 13) -# define GFX6_CLIP_MODE_REJECT_ALL (3 << 13) -# define GFX6_CLIP_MODE_ACCEPT_ALL (4 << 13) -# define GFX6_CLIP_PERSPECTIVE_DIVIDE_DISABLE (1 << 9) -# define GFX6_CLIP_NON_PERSPECTIVE_BARYCENTRIC_ENABLE (1 << 8) -# define GFX6_CLIP_TRI_PROVOKE_SHIFT 4 -# define GFX6_CLIP_LINE_PROVOKE_SHIFT 2 -# define GFX6_CLIP_TRIFAN_PROVOKE_SHIFT 0 -/* DW3 */ -# define GFX6_CLIP_MIN_POINT_WIDTH_SHIFT 17 -# define GFX6_CLIP_MAX_POINT_WIDTH_SHIFT 6 -# define GFX6_CLIP_FORCE_ZERO_RTAINDEX (1 << 5) -# define GFX6_CLIP_MAX_VP_INDEX_MASK INTEL_MASK(3, 0) - -#define _3DSTATE_SF 0x7813 /* GFX6+ */ -/* DW1 (for gfx6) */ -# define GFX6_SF_NUM_OUTPUTS_SHIFT 22 -# define GFX6_SF_SWIZZLE_ENABLE (1 << 21) -# define GFX6_SF_POINT_SPRITE_UPPERLEFT (0 << 20) -# define GFX6_SF_POINT_SPRITE_LOWERLEFT (1 << 20) -# define GFX9_SF_LINE_WIDTH_SHIFT 12 /* U11.7 */ -# define GFX6_SF_URB_ENTRY_READ_LENGTH_SHIFT 11 -# define GFX6_SF_URB_ENTRY_READ_OFFSET_SHIFT 4 -/* DW2 */ -# define GFX6_SF_LEGACY_GLOBAL_DEPTH_BIAS (1 << 11) -# define GFX6_SF_STATISTICS_ENABLE (1 << 10) -# define GFX6_SF_GLOBAL_DEPTH_OFFSET_SOLID (1 << 9) -# define GFX6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME (1 << 8) -# define GFX6_SF_GLOBAL_DEPTH_OFFSET_POINT (1 << 7) -# define GFX6_SF_FRONT_SOLID (0 << 5) -# define GFX6_SF_FRONT_WIREFRAME (1 << 5) -# define GFX6_SF_FRONT_POINT (2 << 5) -# define GFX6_SF_BACK_SOLID (0 << 3) -# define GFX6_SF_BACK_WIREFRAME (1 << 3) -# define GFX6_SF_BACK_POINT (2 << 3) -# define GFX6_SF_VIEWPORT_TRANSFORM_ENABLE (1 << 1) -# define GFX6_SF_WINDING_CCW (1 << 0) -/* DW3 */ -# define GFX6_SF_LINE_AA_ENABLE (1 << 31) -# define GFX6_SF_CULL_BOTH (0 << 29) -# define GFX6_SF_CULL_NONE (1 << 29) -# define GFX6_SF_CULL_FRONT (2 << 29) -# define GFX6_SF_CULL_BACK (3 << 29) -# define GFX6_SF_LINE_WIDTH_SHIFT 18 /* U3.7 */ -# define GFX6_SF_LINE_END_CAP_WIDTH_0_5 (0 << 16) -# define GFX6_SF_LINE_END_CAP_WIDTH_1_0 (1 << 16) -# define GFX6_SF_LINE_END_CAP_WIDTH_2_0 (2 << 16) -# define GFX6_SF_LINE_END_CAP_WIDTH_4_0 (3 << 16) -# define GFX6_SF_SCISSOR_ENABLE (1 << 11) -# define GFX6_SF_MSRAST_OFF_PIXEL (0 << 8) -# define GFX6_SF_MSRAST_OFF_PATTERN (1 << 8) -# define GFX6_SF_MSRAST_ON_PIXEL (2 << 8) -# define GFX6_SF_MSRAST_ON_PATTERN (3 << 8) -/* DW4 */ -# define GFX6_SF_TRI_PROVOKE_SHIFT 29 -# define GFX6_SF_LINE_PROVOKE_SHIFT 27 -# define GFX6_SF_TRIFAN_PROVOKE_SHIFT 25 -# define GFX6_SF_LINE_AA_MODE_MANHATTAN (0 << 14) -# define GFX6_SF_LINE_AA_MODE_TRUE (1 << 14) -# define GFX6_SF_VERTEX_SUBPIXEL_8BITS (0 << 12) -# define GFX6_SF_VERTEX_SUBPIXEL_4BITS (1 << 12) -# define GFX6_SF_USE_STATE_POINT_WIDTH (1 << 11) -# define GFX6_SF_POINT_WIDTH_SHIFT 0 /* U8.3 */ -/* DW5: depth offset constant */ -/* DW6: depth offset scale */ -/* DW7: depth offset clamp */ -/* DW8 */ -# define ATTRIBUTE_1_OVERRIDE_W (1 << 31) -# define ATTRIBUTE_1_OVERRIDE_Z (1 << 30) -# define ATTRIBUTE_1_OVERRIDE_Y (1 << 29) -# define ATTRIBUTE_1_OVERRIDE_X (1 << 28) -# define ATTRIBUTE_1_CONST_SOURCE_SHIFT 25 -# define ATTRIBUTE_1_SWIZZLE_SHIFT 22 -# define ATTRIBUTE_1_SOURCE_SHIFT 16 -# define ATTRIBUTE_0_OVERRIDE_W (1 << 15) -# define ATTRIBUTE_0_OVERRIDE_Z (1 << 14) -# define ATTRIBUTE_0_OVERRIDE_Y (1 << 13) -# define ATTRIBUTE_0_OVERRIDE_X (1 << 12) -# define ATTRIBUTE_0_CONST_SOURCE_SHIFT 9 -# define ATTRIBUTE_CONST_0000 0 -# define ATTRIBUTE_CONST_0001_FLOAT 1 -# define ATTRIBUTE_CONST_1111_FLOAT 2 -# define ATTRIBUTE_CONST_PRIM_ID 3 -# define ATTRIBUTE_0_SWIZZLE_SHIFT 6 -# define ATTRIBUTE_0_SOURCE_SHIFT 0 - -# define ATTRIBUTE_SWIZZLE_INPUTATTR 0 -# define ATTRIBUTE_SWIZZLE_INPUTATTR_FACING 1 -# define ATTRIBUTE_SWIZZLE_INPUTATTR_W 2 -# define ATTRIBUTE_SWIZZLE_INPUTATTR_FACING_W 3 -# define ATTRIBUTE_SWIZZLE_SHIFT 6 - -/* DW16: Point sprite texture coordinate enables */ -/* DW17: Constant interpolation enables */ -/* DW18: attr 0-7 wrap shortest enables */ -/* DW19: attr 8-16 wrap shortest enables */ - -/* On GFX7, many fields of 3DSTATE_SF were split out into a new command: - * 3DSTATE_SBE. The remaining fields live in different DWords, but retain - * the same bit-offset. The only new field: - */ -/* GFX7/DW1: */ -# define GFX7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT 12 -/* GFX7/DW2: */ -# define HSW_SF_LINE_STIPPLE_ENABLE (1 << 14) - -# define GFX8_SF_SMOOTH_POINT_ENABLE (1 << 13) - -#define _3DSTATE_SBE 0x781F /* GFX7+ */ -/* DW1 */ -# define GFX8_SBE_FORCE_URB_ENTRY_READ_LENGTH (1 << 29) -# define GFX8_SBE_FORCE_URB_ENTRY_READ_OFFSET (1 << 28) -# define GFX7_SBE_SWIZZLE_CONTROL_MODE (1 << 28) -# define GFX7_SBE_NUM_OUTPUTS_SHIFT 22 -# define GFX7_SBE_SWIZZLE_ENABLE (1 << 21) -# define GFX7_SBE_POINT_SPRITE_LOWERLEFT (1 << 20) -# define GFX7_SBE_URB_ENTRY_READ_LENGTH_SHIFT 11 -# define GFX7_SBE_URB_ENTRY_READ_OFFSET_SHIFT 4 -# define GFX8_SBE_URB_ENTRY_READ_OFFSET_SHIFT 5 -/* DW2-9: Attribute setup (same as DW8-15 of gfx6 _3DSTATE_SF) */ -/* DW10: Point sprite texture coordinate enables */ -/* DW11: Constant interpolation enables */ -/* DW12: attr 0-7 wrap shortest enables */ -/* DW13: attr 8-16 wrap shortest enables */ - -/* DW4-5: Attribute active components (gfx9) */ -#define GFX9_SBE_ACTIVE_COMPONENT_NONE 0 -#define GFX9_SBE_ACTIVE_COMPONENT_XY 1 -#define GFX9_SBE_ACTIVE_COMPONENT_XYZ 2 -#define GFX9_SBE_ACTIVE_COMPONENT_XYZW 3 - -#define _3DSTATE_SBE_SWIZ 0x7851 /* GFX8+ */ - -#define _3DSTATE_RASTER 0x7850 /* GFX8+ */ -/* DW1 */ -# define GFX9_RASTER_VIEWPORT_Z_FAR_CLIP_TEST_ENABLE (1 << 26) -# define GFX9_RASTER_CONSERVATIVE_RASTERIZATION_ENABLE (1 << 24) -# define GFX8_RASTER_FRONT_WINDING_CCW (1 << 21) -# define GFX8_RASTER_CULL_BOTH (0 << 16) -# define GFX8_RASTER_CULL_NONE (1 << 16) -# define GFX8_RASTER_CULL_FRONT (2 << 16) -# define GFX8_RASTER_CULL_BACK (3 << 16) -# define GFX8_RASTER_SMOOTH_POINT_ENABLE (1 << 13) -# define GFX8_RASTER_API_MULTISAMPLE_ENABLE (1 << 12) -# define GFX8_RASTER_LINE_AA_ENABLE (1 << 2) -# define GFX8_RASTER_SCISSOR_ENABLE (1 << 1) -# define GFX8_RASTER_VIEWPORT_Z_CLIP_TEST_ENABLE (1 << 0) -# define GFX9_RASTER_VIEWPORT_Z_NEAR_CLIP_TEST_ENABLE (1 << 0) - -/* Gfx8 BLEND_STATE */ -/* DW0 */ -#define GFX8_BLEND_ALPHA_TO_COVERAGE_ENABLE (1 << 31) -#define GFX8_BLEND_INDEPENDENT_ALPHA_BLEND_ENABLE (1 << 30) -#define GFX8_BLEND_ALPHA_TO_ONE_ENABLE (1 << 29) -#define GFX8_BLEND_ALPHA_TO_COVERAGE_DITHER_ENABLE (1 << 28) -#define GFX8_BLEND_ALPHA_TEST_ENABLE (1 << 27) -#define GFX8_BLEND_ALPHA_TEST_FUNCTION_MASK INTEL_MASK(26, 24) -#define GFX8_BLEND_ALPHA_TEST_FUNCTION_SHIFT 24 -#define GFX8_BLEND_COLOR_DITHER_ENABLE (1 << 23) -#define GFX8_BLEND_X_DITHER_OFFSET_MASK INTEL_MASK(22, 21) -#define GFX8_BLEND_X_DITHER_OFFSET_SHIFT 21 -#define GFX8_BLEND_Y_DITHER_OFFSET_MASK INTEL_MASK(20, 19) -#define GFX8_BLEND_Y_DITHER_OFFSET_SHIFT 19 -/* DW1 + 2n */ -#define GFX8_BLEND_COLOR_BUFFER_BLEND_ENABLE (1 << 31) -#define GFX8_BLEND_SRC_BLEND_FACTOR_MASK INTEL_MASK(30, 26) -#define GFX8_BLEND_SRC_BLEND_FACTOR_SHIFT 26 -#define GFX8_BLEND_DST_BLEND_FACTOR_MASK INTEL_MASK(25, 21) -#define GFX8_BLEND_DST_BLEND_FACTOR_SHIFT 21 -#define GFX8_BLEND_COLOR_BLEND_FUNCTION_MASK INTEL_MASK(20, 18) -#define GFX8_BLEND_COLOR_BLEND_FUNCTION_SHIFT 18 -#define GFX8_BLEND_SRC_ALPHA_BLEND_FACTOR_MASK INTEL_MASK(17, 13) -#define GFX8_BLEND_SRC_ALPHA_BLEND_FACTOR_SHIFT 13 -#define GFX8_BLEND_DST_ALPHA_BLEND_FACTOR_MASK INTEL_MASK(12, 8) -#define GFX8_BLEND_DST_ALPHA_BLEND_FACTOR_SHIFT 8 -#define GFX8_BLEND_ALPHA_BLEND_FUNCTION_MASK INTEL_MASK(7, 5) -#define GFX8_BLEND_ALPHA_BLEND_FUNCTION_SHIFT 5 -#define GFX8_BLEND_WRITE_DISABLE_ALPHA (1 << 3) -#define GFX8_BLEND_WRITE_DISABLE_RED (1 << 2) -#define GFX8_BLEND_WRITE_DISABLE_GREEN (1 << 1) -#define GFX8_BLEND_WRITE_DISABLE_BLUE (1 << 0) -/* DW1 + 2n + 1 */ -#define GFX8_BLEND_LOGIC_OP_ENABLE (1 << 31) -#define GFX8_BLEND_LOGIC_OP_FUNCTION_MASK INTEL_MASK(30, 27) -#define GFX8_BLEND_LOGIC_OP_FUNCTION_SHIFT 27 -#define GFX8_BLEND_PRE_BLEND_SRC_ONLY_CLAMP_ENABLE (1 << 4) -#define GFX8_BLEND_COLOR_CLAMP_RANGE_RTFORMAT (2 << 2) -#define GFX8_BLEND_PRE_BLEND_COLOR_CLAMP_ENABLE (1 << 1) -#define GFX8_BLEND_POST_BLEND_COLOR_CLAMP_ENABLE (1 << 0) - -#define _3DSTATE_WM_HZ_OP 0x7852 /* GFX8+ */ -/* DW1 */ -# define GFX8_WM_HZ_STENCIL_CLEAR (1 << 31) -# define GFX8_WM_HZ_DEPTH_CLEAR (1 << 30) -# define GFX8_WM_HZ_DEPTH_RESOLVE (1 << 28) -# define GFX8_WM_HZ_HIZ_RESOLVE (1 << 27) -# define GFX8_WM_HZ_PIXEL_OFFSET_ENABLE (1 << 26) -# define GFX8_WM_HZ_FULL_SURFACE_DEPTH_CLEAR (1 << 25) -# define GFX8_WM_HZ_STENCIL_CLEAR_VALUE_MASK INTEL_MASK(23, 16) -# define GFX8_WM_HZ_STENCIL_CLEAR_VALUE_SHIFT 16 -# define GFX8_WM_HZ_NUM_SAMPLES_MASK INTEL_MASK(15, 13) -# define GFX8_WM_HZ_NUM_SAMPLES_SHIFT 13 -/* DW2 */ -# define GFX8_WM_HZ_CLEAR_RECTANGLE_Y_MIN_MASK INTEL_MASK(31, 16) -# define GFX8_WM_HZ_CLEAR_RECTANGLE_Y_MIN_SHIFT 16 -# define GFX8_WM_HZ_CLEAR_RECTANGLE_X_MIN_MASK INTEL_MASK(15, 0) -# define GFX8_WM_HZ_CLEAR_RECTANGLE_X_MIN_SHIFT 0 -/* DW3 */ -# define GFX8_WM_HZ_CLEAR_RECTANGLE_Y_MAX_MASK INTEL_MASK(31, 16) -# define GFX8_WM_HZ_CLEAR_RECTANGLE_Y_MAX_SHIFT 16 -# define GFX8_WM_HZ_CLEAR_RECTANGLE_X_MAX_MASK INTEL_MASK(15, 0) -# define GFX8_WM_HZ_CLEAR_RECTANGLE_X_MAX_SHIFT 0 -/* DW4 */ -# define GFX8_WM_HZ_SAMPLE_MASK_MASK INTEL_MASK(15, 0) -# define GFX8_WM_HZ_SAMPLE_MASK_SHIFT 0 - - -#define _3DSTATE_PS_BLEND 0x784D /* GFX8+ */ -/* DW1 */ -# define GFX8_PS_BLEND_ALPHA_TO_COVERAGE_ENABLE (1 << 31) -# define GFX8_PS_BLEND_HAS_WRITEABLE_RT (1 << 30) -# define GFX8_PS_BLEND_COLOR_BUFFER_BLEND_ENABLE (1 << 29) -# define GFX8_PS_BLEND_SRC_ALPHA_BLEND_FACTOR_MASK INTEL_MASK(28, 24) -# define GFX8_PS_BLEND_SRC_ALPHA_BLEND_FACTOR_SHIFT 24 -# define GFX8_PS_BLEND_DST_ALPHA_BLEND_FACTOR_MASK INTEL_MASK(23, 19) -# define GFX8_PS_BLEND_DST_ALPHA_BLEND_FACTOR_SHIFT 19 -# define GFX8_PS_BLEND_SRC_BLEND_FACTOR_MASK INTEL_MASK(18, 14) -# define GFX8_PS_BLEND_SRC_BLEND_FACTOR_SHIFT 14 -# define GFX8_PS_BLEND_DST_BLEND_FACTOR_MASK INTEL_MASK(13, 9) -# define GFX8_PS_BLEND_DST_BLEND_FACTOR_SHIFT 9 -# define GFX8_PS_BLEND_ALPHA_TEST_ENABLE (1 << 8) -# define GFX8_PS_BLEND_INDEPENDENT_ALPHA_BLEND_ENABLE (1 << 7) - -#define _3DSTATE_WM_DEPTH_STENCIL 0x784E /* GFX8+ */ -/* DW1 */ -# define GFX8_WM_DS_STENCIL_FAIL_OP_SHIFT 29 -# define GFX8_WM_DS_Z_FAIL_OP_SHIFT 26 -# define GFX8_WM_DS_Z_PASS_OP_SHIFT 23 -# define GFX8_WM_DS_BF_STENCIL_FUNC_SHIFT 20 -# define GFX8_WM_DS_BF_STENCIL_FAIL_OP_SHIFT 17 -# define GFX8_WM_DS_BF_Z_FAIL_OP_SHIFT 14 -# define GFX8_WM_DS_BF_Z_PASS_OP_SHIFT 11 -# define GFX8_WM_DS_STENCIL_FUNC_SHIFT 8 -# define GFX8_WM_DS_DEPTH_FUNC_SHIFT 5 -# define GFX8_WM_DS_DOUBLE_SIDED_STENCIL_ENABLE (1 << 4) -# define GFX8_WM_DS_STENCIL_TEST_ENABLE (1 << 3) -# define GFX8_WM_DS_STENCIL_BUFFER_WRITE_ENABLE (1 << 2) -# define GFX8_WM_DS_DEPTH_TEST_ENABLE (1 << 1) -# define GFX8_WM_DS_DEPTH_BUFFER_WRITE_ENABLE (1 << 0) -/* DW2 */ -# define GFX8_WM_DS_STENCIL_TEST_MASK_MASK INTEL_MASK(31, 24) -# define GFX8_WM_DS_STENCIL_TEST_MASK_SHIFT 24 -# define GFX8_WM_DS_STENCIL_WRITE_MASK_MASK INTEL_MASK(23, 16) -# define GFX8_WM_DS_STENCIL_WRITE_MASK_SHIFT 16 -# define GFX8_WM_DS_BF_STENCIL_TEST_MASK_MASK INTEL_MASK(15, 8) -# define GFX8_WM_DS_BF_STENCIL_TEST_MASK_SHIFT 8 -# define GFX8_WM_DS_BF_STENCIL_WRITE_MASK_MASK INTEL_MASK(7, 0) -# define GFX8_WM_DS_BF_STENCIL_WRITE_MASK_SHIFT 0 -/* DW3 */ -# define GFX9_WM_DS_STENCIL_REF_MASK INTEL_MASK(15, 8) -# define GFX9_WM_DS_STENCIL_REF_SHIFT 8 -# define GFX9_WM_DS_BF_STENCIL_REF_MASK INTEL_MASK(7, 0) -# define GFX9_WM_DS_BF_STENCIL_REF_SHIFT 0 - -enum brw_pixel_shader_coverage_mask_mode { - BRW_PSICMS_OFF = 0, /* PS does not use input coverage masks. */ - BRW_PSICMS_NORMAL = 1, /* Input Coverage masks based on outer conservatism - * and factors in SAMPLE_MASK. If Pixel is - * conservatively covered, all samples are enabled. - */ - - BRW_PSICMS_INNER = 2, /* Input Coverage masks based on inner conservatism - * and factors in SAMPLE_MASK. If Pixel is - * conservatively *FULLY* covered, all samples are - * enabled. - */ - BRW_PCICMS_DEPTH = 3, -}; - -#define _3DSTATE_PS_EXTRA 0x784F /* GFX8+ */ -/* DW1 */ -# define GFX8_PSX_PIXEL_SHADER_VALID (1 << 31) -# define GFX8_PSX_PIXEL_SHADER_NO_RT_WRITE (1 << 30) -# define GFX8_PSX_OMASK_TO_RENDER_TARGET (1 << 29) -# define GFX8_PSX_KILL_ENABLE (1 << 28) -# define GFX8_PSX_COMPUTED_DEPTH_MODE_SHIFT 26 -# define GFX8_PSX_FORCE_COMPUTED_DEPTH (1 << 25) -# define GFX8_PSX_USES_SOURCE_DEPTH (1 << 24) -# define GFX8_PSX_USES_SOURCE_W (1 << 23) -# define GFX8_PSX_ATTRIBUTE_ENABLE (1 << 8) -# define GFX8_PSX_SHADER_DISABLES_ALPHA_TO_COVERAGE (1 << 7) -# define GFX8_PSX_SHADER_IS_PER_SAMPLE (1 << 6) -# define GFX9_PSX_SHADER_COMPUTES_STENCIL (1 << 5) -# define GFX9_PSX_SHADER_PULLS_BARY (1 << 3) -# define GFX8_PSX_SHADER_HAS_UAV (1 << 2) -# define GFX8_PSX_SHADER_USES_INPUT_COVERAGE_MASK (1 << 1) -# define GFX9_PSX_SHADER_NORMAL_COVERAGE_MASK_SHIFT 0 - -#define _3DSTATE_WM 0x7814 /* GFX6+ */ -/* DW1: kernel pointer */ -/* DW2 */ -# define GFX6_WM_SPF_MODE (1 << 31) -# define GFX6_WM_VECTOR_MASK_ENABLE (1 << 30) -# define GFX6_WM_SAMPLER_COUNT_SHIFT 27 -# define GFX6_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 -# define GFX6_WM_FLOATING_POINT_MODE_IEEE_754 (0 << 16) -# define GFX6_WM_FLOATING_POINT_MODE_ALT (1 << 16) -/* DW3: scratch space */ -/* DW4 */ -# define GFX6_WM_STATISTICS_ENABLE (1 << 31) -# define GFX6_WM_DEPTH_CLEAR (1 << 30) -# define GFX6_WM_DEPTH_RESOLVE (1 << 28) -# define GFX6_WM_HIERARCHICAL_DEPTH_RESOLVE (1 << 27) -# define GFX6_WM_DISPATCH_START_GRF_SHIFT_0 16 -# define GFX6_WM_DISPATCH_START_GRF_SHIFT_1 8 -# define GFX6_WM_DISPATCH_START_GRF_SHIFT_2 0 -/* DW5 */ -# define GFX6_WM_MAX_THREADS_SHIFT 25 -# define GFX6_WM_KILL_ENABLE (1 << 22) -# define GFX6_WM_COMPUTED_DEPTH (1 << 21) -# define GFX6_WM_USES_SOURCE_DEPTH (1 << 20) -# define GFX6_WM_DISPATCH_ENABLE (1 << 19) -# define GFX6_WM_LINE_END_CAP_AA_WIDTH_0_5 (0 << 16) -# define GFX6_WM_LINE_END_CAP_AA_WIDTH_1_0 (1 << 16) -# define GFX6_WM_LINE_END_CAP_AA_WIDTH_2_0 (2 << 16) -# define GFX6_WM_LINE_END_CAP_AA_WIDTH_4_0 (3 << 16) -# define GFX6_WM_LINE_AA_WIDTH_0_5 (0 << 14) -# define GFX6_WM_LINE_AA_WIDTH_1_0 (1 << 14) -# define GFX6_WM_LINE_AA_WIDTH_2_0 (2 << 14) -# define GFX6_WM_LINE_AA_WIDTH_4_0 (3 << 14) -# define GFX6_WM_POLYGON_STIPPLE_ENABLE (1 << 13) -# define GFX6_WM_LINE_STIPPLE_ENABLE (1 << 11) -# define GFX6_WM_OMASK_TO_RENDER_TARGET (1 << 9) -# define GFX6_WM_USES_SOURCE_W (1 << 8) -# define GFX6_WM_DUAL_SOURCE_BLEND_ENABLE (1 << 7) -# define GFX6_WM_32_DISPATCH_ENABLE (1 << 2) -# define GFX6_WM_16_DISPATCH_ENABLE (1 << 1) -# define GFX6_WM_8_DISPATCH_ENABLE (1 << 0) -/* DW6 */ -# define GFX6_WM_NUM_SF_OUTPUTS_SHIFT 20 -# define GFX6_WM_POSOFFSET_NONE (0 << 18) -# define GFX6_WM_POSOFFSET_CENTROID (2 << 18) -# define GFX6_WM_POSOFFSET_SAMPLE (3 << 18) -# define GFX6_WM_POSITION_ZW_PIXEL (0 << 16) -# define GFX6_WM_POSITION_ZW_CENTROID (2 << 16) -# define GFX6_WM_POSITION_ZW_SAMPLE (3 << 16) -# define GFX6_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 15) -# define GFX6_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC (1 << 14) -# define GFX6_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC (1 << 13) -# define GFX6_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 12) -# define GFX6_WM_PERSPECTIVE_CENTROID_BARYCENTRIC (1 << 11) -# define GFX6_WM_PERSPECTIVE_PIXEL_BARYCENTRIC (1 << 10) -# define GFX6_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT 10 -# define GFX6_WM_POINT_RASTRULE_UPPER_RIGHT (1 << 9) -# define GFX6_WM_MSRAST_OFF_PIXEL (0 << 1) -# define GFX6_WM_MSRAST_OFF_PATTERN (1 << 1) -# define GFX6_WM_MSRAST_ON_PIXEL (2 << 1) -# define GFX6_WM_MSRAST_ON_PATTERN (3 << 1) -# define GFX6_WM_MSDISPMODE_PERSAMPLE (0 << 0) -# define GFX6_WM_MSDISPMODE_PERPIXEL (1 << 0) -/* DW7: kernel 1 pointer */ -/* DW8: kernel 2 pointer */ - -#define _3DSTATE_CONSTANT_VS 0x7815 /* GFX6+ */ -#define _3DSTATE_CONSTANT_GS 0x7816 /* GFX6+ */ -#define _3DSTATE_CONSTANT_PS 0x7817 /* GFX6+ */ -# define GFX6_CONSTANT_BUFFER_3_ENABLE (1 << 15) -# define GFX6_CONSTANT_BUFFER_2_ENABLE (1 << 14) -# define GFX6_CONSTANT_BUFFER_1_ENABLE (1 << 13) -# define GFX6_CONSTANT_BUFFER_0_ENABLE (1 << 12) - -#define _3DSTATE_CONSTANT_HS 0x7819 /* GFX7+ */ -#define _3DSTATE_CONSTANT_DS 0x781A /* GFX7+ */ - -#define _3DSTATE_STREAMOUT 0x781e /* GFX7+ */ -/* DW1 */ -# define SO_FUNCTION_ENABLE (1 << 31) -# define SO_RENDERING_DISABLE (1 << 30) -/* This selects which incoming rendering stream goes down the pipeline. The - * rendering stream is 0 if not defined by special cases in the GS state. - */ -# define SO_RENDER_STREAM_SELECT_SHIFT 27 -# define SO_RENDER_STREAM_SELECT_MASK INTEL_MASK(28, 27) -/* Controls reordering of TRISTRIP_* elements in stream output (not rendering). - */ -# define SO_REORDER_TRAILING (1 << 26) -/* Controls SO_NUM_PRIMS_WRITTEN_* and SO_PRIM_STORAGE_* */ -# define SO_STATISTICS_ENABLE (1 << 25) -# define SO_BUFFER_ENABLE(n) (1 << (8 + (n))) -/* DW2 */ -# define SO_STREAM_3_VERTEX_READ_OFFSET_SHIFT 29 -# define SO_STREAM_3_VERTEX_READ_OFFSET_MASK INTEL_MASK(29, 29) -# define SO_STREAM_3_VERTEX_READ_LENGTH_SHIFT 24 -# define SO_STREAM_3_VERTEX_READ_LENGTH_MASK INTEL_MASK(28, 24) -# define SO_STREAM_2_VERTEX_READ_OFFSET_SHIFT 21 -# define SO_STREAM_2_VERTEX_READ_OFFSET_MASK INTEL_MASK(21, 21) -# define SO_STREAM_2_VERTEX_READ_LENGTH_SHIFT 16 -# define SO_STREAM_2_VERTEX_READ_LENGTH_MASK INTEL_MASK(20, 16) -# define SO_STREAM_1_VERTEX_READ_OFFSET_SHIFT 13 -# define SO_STREAM_1_VERTEX_READ_OFFSET_MASK INTEL_MASK(13, 13) -# define SO_STREAM_1_VERTEX_READ_LENGTH_SHIFT 8 -# define SO_STREAM_1_VERTEX_READ_LENGTH_MASK INTEL_MASK(12, 8) -# define SO_STREAM_0_VERTEX_READ_OFFSET_SHIFT 5 -# define SO_STREAM_0_VERTEX_READ_OFFSET_MASK INTEL_MASK(5, 5) -# define SO_STREAM_0_VERTEX_READ_LENGTH_SHIFT 0 -# define SO_STREAM_0_VERTEX_READ_LENGTH_MASK INTEL_MASK(4, 0) - -/* 3DSTATE_WM for Gfx7 */ -/* DW1 */ -# define GFX7_WM_STATISTICS_ENABLE (1 << 31) -# define GFX7_WM_DEPTH_CLEAR (1 << 30) -# define GFX7_WM_DISPATCH_ENABLE (1 << 29) -# define GFX7_WM_DEPTH_RESOLVE (1 << 28) -# define GFX7_WM_HIERARCHICAL_DEPTH_RESOLVE (1 << 27) -# define GFX7_WM_KILL_ENABLE (1 << 25) -# define GFX7_WM_COMPUTED_DEPTH_MODE_SHIFT 23 -# define GFX7_WM_USES_SOURCE_DEPTH (1 << 20) -# define GFX7_WM_EARLY_DS_CONTROL_NORMAL (0 << 21) -# define GFX7_WM_EARLY_DS_CONTROL_PSEXEC (1 << 21) -# define GFX7_WM_EARLY_DS_CONTROL_PREPS (2 << 21) -# define GFX7_WM_USES_SOURCE_W (1 << 19) -# define GFX7_WM_POSITION_ZW_PIXEL (0 << 17) -# define GFX7_WM_POSITION_ZW_CENTROID (2 << 17) -# define GFX7_WM_POSITION_ZW_SAMPLE (3 << 17) -# define GFX7_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT 11 -# define GFX7_WM_USES_INPUT_COVERAGE_MASK (1 << 10) -# define GFX7_WM_LINE_END_CAP_AA_WIDTH_0_5 (0 << 8) -# define GFX7_WM_LINE_END_CAP_AA_WIDTH_1_0 (1 << 8) -# define GFX7_WM_LINE_END_CAP_AA_WIDTH_2_0 (2 << 8) -# define GFX7_WM_LINE_END_CAP_AA_WIDTH_4_0 (3 << 8) -# define GFX7_WM_LINE_AA_WIDTH_0_5 (0 << 6) -# define GFX7_WM_LINE_AA_WIDTH_1_0 (1 << 6) -# define GFX7_WM_LINE_AA_WIDTH_2_0 (2 << 6) -# define GFX7_WM_LINE_AA_WIDTH_4_0 (3 << 6) -# define GFX7_WM_POLYGON_STIPPLE_ENABLE (1 << 4) -# define GFX7_WM_LINE_STIPPLE_ENABLE (1 << 3) -# define GFX7_WM_POINT_RASTRULE_UPPER_RIGHT (1 << 2) -# define GFX7_WM_MSRAST_OFF_PIXEL (0 << 0) -# define GFX7_WM_MSRAST_OFF_PATTERN (1 << 0) -# define GFX7_WM_MSRAST_ON_PIXEL (2 << 0) -# define GFX7_WM_MSRAST_ON_PATTERN (3 << 0) -/* DW2 */ -# define GFX7_WM_MSDISPMODE_PERSAMPLE (0 << 31) -# define GFX7_WM_MSDISPMODE_PERPIXEL (1 << 31) -# define HSW_WM_UAV_ONLY (1 << 30) - -#define _3DSTATE_PS 0x7820 /* GFX7+ */ -/* DW1: kernel pointer */ -/* DW2 */ -# define GFX7_PS_SPF_MODE (1 << 31) -# define GFX7_PS_VECTOR_MASK_ENABLE (1 << 30) -# define GFX7_PS_SAMPLER_COUNT_SHIFT 27 -# define GFX7_PS_SAMPLER_COUNT_MASK INTEL_MASK(29, 27) -# define GFX7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 -# define GFX7_PS_FLOATING_POINT_MODE_IEEE_754 (0 << 16) -# define GFX7_PS_FLOATING_POINT_MODE_ALT (1 << 16) -/* DW3: scratch space */ -/* DW4 */ -# define IVB_PS_MAX_THREADS_SHIFT 24 -# define HSW_PS_MAX_THREADS_SHIFT 23 -# define HSW_PS_SAMPLE_MASK_SHIFT 12 -# define HSW_PS_SAMPLE_MASK_MASK INTEL_MASK(19, 12) -# define GFX7_PS_PUSH_CONSTANT_ENABLE (1 << 11) -# define GFX7_PS_ATTRIBUTE_ENABLE (1 << 10) -# define GFX7_PS_OMASK_TO_RENDER_TARGET (1 << 9) -# define GFX7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE (1 << 8) -# define GFX7_PS_DUAL_SOURCE_BLEND_ENABLE (1 << 7) -# define GFX7_PS_RENDER_TARGET_RESOLVE_ENABLE (1 << 6) -# define GFX9_PS_RENDER_TARGET_RESOLVE_FULL (3 << 6) -# define HSW_PS_UAV_ACCESS_ENABLE (1 << 5) -# define GFX7_PS_POSOFFSET_NONE (0 << 3) -# define GFX7_PS_POSOFFSET_CENTROID (2 << 3) -# define GFX7_PS_POSOFFSET_SAMPLE (3 << 3) -# define GFX7_PS_32_DISPATCH_ENABLE (1 << 2) -# define GFX7_PS_16_DISPATCH_ENABLE (1 << 1) -# define GFX7_PS_8_DISPATCH_ENABLE (1 << 0) -/* DW5 */ -# define GFX7_PS_DISPATCH_START_GRF_SHIFT_0 16 -# define GFX7_PS_DISPATCH_START_GRF_SHIFT_1 8 -# define GFX7_PS_DISPATCH_START_GRF_SHIFT_2 0 -/* DW6: kernel 1 pointer */ -/* DW7: kernel 2 pointer */ - -#define _3DSTATE_SAMPLE_MASK 0x7818 /* GFX6+ */ - -#define _3DSTATE_DRAWING_RECTANGLE 0x7900 -#define _3DSTATE_BLEND_CONSTANT_COLOR 0x7901 -#define _3DSTATE_CHROMA_KEY 0x7904 -#define _3DSTATE_DEPTH_BUFFER 0x7905 /* GFX4-6 */ -#define _3DSTATE_POLY_STIPPLE_OFFSET 0x7906 -#define _3DSTATE_POLY_STIPPLE_PATTERN 0x7907 -#define _3DSTATE_LINE_STIPPLE_PATTERN 0x7908 -#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP 0x7909 -#define _3DSTATE_AA_LINE_PARAMETERS 0x790a /* G45+ */ - -#define _3DSTATE_GS_SVB_INDEX 0x790b /* CTG+ */ -/* DW1 */ -# define SVB_INDEX_SHIFT 29 -# define SVB_LOAD_INTERNAL_VERTEX_COUNT (1 << 0) /* SNB+ */ -/* DW2: SVB index */ -/* DW3: SVB maximum index */ - -#define _3DSTATE_MULTISAMPLE 0x790d /* GFX6+ */ -#define GFX8_3DSTATE_MULTISAMPLE 0x780d /* GFX8+ */ -/* DW1 */ -# define MS_PIXEL_LOCATION_CENTER (0 << 4) -# define MS_PIXEL_LOCATION_UPPER_LEFT (1 << 4) -# define MS_NUMSAMPLES_1 (0 << 1) -# define MS_NUMSAMPLES_2 (1 << 1) -# define MS_NUMSAMPLES_4 (2 << 1) -# define MS_NUMSAMPLES_8 (3 << 1) -# define MS_NUMSAMPLES_16 (4 << 1) - -#define _3DSTATE_SAMPLE_PATTERN 0x791c - -#define _3DSTATE_STENCIL_BUFFER 0x790e /* ILK, SNB */ -#define _3DSTATE_HIER_DEPTH_BUFFER 0x790f /* ILK, SNB */ - -#define GFX7_3DSTATE_CLEAR_PARAMS 0x7804 -#define GFX7_3DSTATE_DEPTH_BUFFER 0x7805 -#define GFX7_3DSTATE_STENCIL_BUFFER 0x7806 -# define HSW_STENCIL_ENABLED (1 << 31) -#define GFX7_3DSTATE_HIER_DEPTH_BUFFER 0x7807 - -#define _3DSTATE_CLEAR_PARAMS 0x7910 /* ILK, SNB */ -# define GFX5_DEPTH_CLEAR_VALID (1 << 15) -/* DW1: depth clear value */ -/* DW2 */ -# define GFX7_DEPTH_CLEAR_VALID (1 << 0) - -#define _3DSTATE_SO_DECL_LIST 0x7917 /* GFX7+ */ -/* DW1 */ -# define SO_STREAM_TO_BUFFER_SELECTS_3_SHIFT 12 -# define SO_STREAM_TO_BUFFER_SELECTS_3_MASK INTEL_MASK(15, 12) -# define SO_STREAM_TO_BUFFER_SELECTS_2_SHIFT 8 -# define SO_STREAM_TO_BUFFER_SELECTS_2_MASK INTEL_MASK(11, 8) -# define SO_STREAM_TO_BUFFER_SELECTS_1_SHIFT 4 -# define SO_STREAM_TO_BUFFER_SELECTS_1_MASK INTEL_MASK(7, 4) -# define SO_STREAM_TO_BUFFER_SELECTS_0_SHIFT 0 -# define SO_STREAM_TO_BUFFER_SELECTS_0_MASK INTEL_MASK(3, 0) -/* DW2 */ -# define SO_NUM_ENTRIES_3_SHIFT 24 -# define SO_NUM_ENTRIES_3_MASK INTEL_MASK(31, 24) -# define SO_NUM_ENTRIES_2_SHIFT 16 -# define SO_NUM_ENTRIES_2_MASK INTEL_MASK(23, 16) -# define SO_NUM_ENTRIES_1_SHIFT 8 -# define SO_NUM_ENTRIES_1_MASK INTEL_MASK(15, 8) -# define SO_NUM_ENTRIES_0_SHIFT 0 -# define SO_NUM_ENTRIES_0_MASK INTEL_MASK(7, 0) - -/* SO_DECL DW0 */ -# define SO_DECL_OUTPUT_BUFFER_SLOT_SHIFT 12 -# define SO_DECL_OUTPUT_BUFFER_SLOT_MASK INTEL_MASK(13, 12) -# define SO_DECL_HOLE_FLAG (1 << 11) -# define SO_DECL_REGISTER_INDEX_SHIFT 4 -# define SO_DECL_REGISTER_INDEX_MASK INTEL_MASK(9, 4) -# define SO_DECL_COMPONENT_MASK_SHIFT 0 -# define SO_DECL_COMPONENT_MASK_MASK INTEL_MASK(3, 0) - -#define _3DSTATE_SO_BUFFER 0x7918 /* GFX7+ */ -/* DW1 */ -# define GFX8_SO_BUFFER_ENABLE (1 << 31) -# define SO_BUFFER_INDEX_SHIFT 29 -# define SO_BUFFER_INDEX_MASK INTEL_MASK(30, 29) -# define GFX8_SO_BUFFER_OFFSET_WRITE_ENABLE (1 << 21) -# define GFX8_SO_BUFFER_OFFSET_ADDRESS_ENABLE (1 << 20) -# define SO_BUFFER_PITCH_SHIFT 0 -# define SO_BUFFER_PITCH_MASK INTEL_MASK(11, 0) -/* DW2: start address */ -/* DW3: end address. */ - -#define _3DSTATE_3D_MODE 0x791e -# define SLICE_HASHING_TABLE_ENABLE (1 << 6) -# define SLICE_HASHING_TABLE_ENABLE_MASK REG_MASK(1 << 6) - -#define _3DSTATE_SLICE_TABLE_STATE_POINTERS 0x7920 - -#define CMD_MI_FLUSH 0x0200 - -# define BLT_X_SHIFT 0 -# define BLT_X_MASK INTEL_MASK(15, 0) -# define BLT_Y_SHIFT 16 -# define BLT_Y_MASK INTEL_MASK(31, 16) - -#define GFX5_MI_REPORT_PERF_COUNT ((0x26 << 23) | (3 - 2)) -/* DW0 */ -# define GFX5_MI_COUNTER_SET_0 (0 << 6) -# define GFX5_MI_COUNTER_SET_1 (1 << 6) -/* DW1 */ -# define MI_COUNTER_ADDRESS_GTT (1 << 0) -/* DW2: a user-defined report ID (written to the buffer but can be anything) */ - -#define GFX6_MI_REPORT_PERF_COUNT ((0x28 << 23) | (3 - 2)) - -#define GFX8_MI_REPORT_PERF_COUNT ((0x28 << 23) | (4 - 2)) - -/* Maximum number of entries that can be addressed using a binding table - * pointer of type SURFTYPE_BUFFER - */ -#define BRW_MAX_NUM_BUFFER_ENTRIES (1 << 27) - -#define MEDIA_VFE_STATE 0x7000 -/* GFX7 DW2, GFX8+ DW3 */ -# define MEDIA_VFE_STATE_MAX_THREADS_SHIFT 16 -# define MEDIA_VFE_STATE_MAX_THREADS_MASK INTEL_MASK(31, 16) -# define MEDIA_VFE_STATE_URB_ENTRIES_SHIFT 8 -# define MEDIA_VFE_STATE_URB_ENTRIES_MASK INTEL_MASK(15, 8) -# define MEDIA_VFE_STATE_RESET_GTW_TIMER_SHIFT 7 -# define MEDIA_VFE_STATE_RESET_GTW_TIMER_MASK INTEL_MASK(7, 7) -# define MEDIA_VFE_STATE_BYPASS_GTW_SHIFT 6 -# define MEDIA_VFE_STATE_BYPASS_GTW_MASK INTEL_MASK(6, 6) -# define GFX7_MEDIA_VFE_STATE_GPGPU_MODE_SHIFT 2 -# define GFX7_MEDIA_VFE_STATE_GPGPU_MODE_MASK INTEL_MASK(2, 2) -/* GFX7 DW4, GFX8+ DW5 */ -# define MEDIA_VFE_STATE_URB_ALLOC_SHIFT 16 -# define MEDIA_VFE_STATE_URB_ALLOC_MASK INTEL_MASK(31, 16) -# define MEDIA_VFE_STATE_CURBE_ALLOC_SHIFT 0 -# define MEDIA_VFE_STATE_CURBE_ALLOC_MASK INTEL_MASK(15, 0) - -#define MEDIA_CURBE_LOAD 0x7001 -#define MEDIA_INTERFACE_DESCRIPTOR_LOAD 0x7002 -/* GFX7 DW4, GFX8+ DW5 */ -# define MEDIA_CURBE_READ_LENGTH_SHIFT 16 -# define MEDIA_CURBE_READ_LENGTH_MASK INTEL_MASK(31, 16) -# define MEDIA_CURBE_READ_OFFSET_SHIFT 0 -# define MEDIA_CURBE_READ_OFFSET_MASK INTEL_MASK(15, 0) -/* GFX7 DW5, GFX8+ DW6 */ -# define MEDIA_BARRIER_ENABLE_SHIFT 21 -# define MEDIA_BARRIER_ENABLE_MASK INTEL_MASK(21, 21) -# define MEDIA_SHARED_LOCAL_MEMORY_SIZE_SHIFT 16 -# define MEDIA_SHARED_LOCAL_MEMORY_SIZE_MASK INTEL_MASK(20, 16) -# define MEDIA_GPGPU_THREAD_COUNT_SHIFT 0 -# define MEDIA_GPGPU_THREAD_COUNT_MASK INTEL_MASK(7, 0) -# define GFX8_MEDIA_GPGPU_THREAD_COUNT_SHIFT 0 -# define GFX8_MEDIA_GPGPU_THREAD_COUNT_MASK INTEL_MASK(9, 0) -/* GFX7 DW6, GFX8+ DW7 */ -# define CROSS_THREAD_READ_LENGTH_SHIFT 0 -# define CROSS_THREAD_READ_LENGTH_MASK INTEL_MASK(7, 0) -#define MEDIA_STATE_FLUSH 0x7004 -#define GPGPU_WALKER 0x7105 -/* GFX7 DW0 */ -# define GFX7_GPGPU_INDIRECT_PARAMETER_ENABLE (1 << 10) -# define GFX7_GPGPU_PREDICATE_ENABLE (1 << 8) -/* GFX8+ DW2 */ -# define GPGPU_WALKER_INDIRECT_LENGTH_SHIFT 0 -# define GPGPU_WALKER_INDIRECT_LENGTH_MASK INTEL_MASK(15, 0) -/* GFX7 DW2, GFX8+ DW4 */ -# define GPGPU_WALKER_SIMD_SIZE_SHIFT 30 -# define GPGPU_WALKER_SIMD_SIZE_MASK INTEL_MASK(31, 30) -# define GPGPU_WALKER_THREAD_DEPTH_MAX_SHIFT 16 -# define GPGPU_WALKER_THREAD_DEPTH_MAX_MASK INTEL_MASK(21, 16) -# define GPGPU_WALKER_THREAD_HEIGHT_MAX_SHIFT 8 -# define GPGPU_WALKER_THREAD_HEIGHT_MAX_MASK INTEL_MASK(31, 8) -# define GPGPU_WALKER_THREAD_WIDTH_MAX_SHIFT 0 -# define GPGPU_WALKER_THREAD_WIDTH_MAX_MASK INTEL_MASK(5, 0) - -#define CMD_MI (0x0 << 29) -#define CMD_2D (0x2 << 29) -#define CMD_3D (0x3 << 29) - -#define MI_NOOP (CMD_MI | 0) - -#define MI_BATCH_BUFFER_END (CMD_MI | 0xA << 23) - -#define MI_FLUSH (CMD_MI | (4 << 23)) -#define FLUSH_MAP_CACHE (1 << 0) -#define INHIBIT_FLUSH_RENDER_CACHE (1 << 2) - -#define MI_STORE_DATA_IMM (CMD_MI | (0x20 << 23)) -#define MI_LOAD_REGISTER_IMM (CMD_MI | (0x22 << 23)) -#define MI_LOAD_REGISTER_REG (CMD_MI | (0x2A << 23)) - -#define MI_FLUSH_DW (CMD_MI | (0x26 << 23)) - -#define MI_STORE_REGISTER_MEM (CMD_MI | (0x24 << 23)) -# define MI_STORE_REGISTER_MEM_USE_GGTT (1 << 22) -# define MI_STORE_REGISTER_MEM_PREDICATE (1 << 21) - -/* Load a value from memory into a register. Only available on Gfx7+. */ -#define GFX7_MI_LOAD_REGISTER_MEM (CMD_MI | (0x29 << 23)) -# define MI_LOAD_REGISTER_MEM_USE_GGTT (1 << 22) - -/* Manipulate the predicate bit based on some register values. Only on Gfx7+ */ -#define GFX7_MI_PREDICATE (CMD_MI | (0xC << 23)) -# define MI_PREDICATE_LOADOP_KEEP (0 << 6) -# define MI_PREDICATE_LOADOP_LOAD (2 << 6) -# define MI_PREDICATE_LOADOP_LOADINV (3 << 6) -# define MI_PREDICATE_COMBINEOP_SET (0 << 3) -# define MI_PREDICATE_COMBINEOP_AND (1 << 3) -# define MI_PREDICATE_COMBINEOP_OR (2 << 3) -# define MI_PREDICATE_COMBINEOP_XOR (3 << 3) -# define MI_PREDICATE_COMPAREOP_TRUE (0 << 0) -# define MI_PREDICATE_COMPAREOP_FALSE (1 << 0) -# define MI_PREDICATE_COMPAREOP_SRCS_EQUAL (2 << 0) -# define MI_PREDICATE_COMPAREOP_DELTAS_EQUAL (3 << 0) - -#define HSW_MI_MATH (CMD_MI | (0x1a << 23)) - -#define MI_MATH_ALU2(opcode, operand1, operand2) \ - ( ((MI_MATH_OPCODE_##opcode) << 20) | ((MI_MATH_OPERAND_##operand1) << 10) | \ - ((MI_MATH_OPERAND_##operand2) << 0) ) - -#define MI_MATH_ALU1(opcode, operand1) \ - ( ((MI_MATH_OPCODE_##opcode) << 20) | ((MI_MATH_OPERAND_##operand1) << 10) ) - -#define MI_MATH_ALU0(opcode) \ - ( ((MI_MATH_OPCODE_##opcode) << 20) ) - -#define MI_MATH_OPCODE_NOOP 0x000 -#define MI_MATH_OPCODE_LOAD 0x080 -#define MI_MATH_OPCODE_LOADINV 0x480 -#define MI_MATH_OPCODE_LOAD0 0x081 -#define MI_MATH_OPCODE_LOAD1 0x481 -#define MI_MATH_OPCODE_ADD 0x100 -#define MI_MATH_OPCODE_SUB 0x101 -#define MI_MATH_OPCODE_AND 0x102 -#define MI_MATH_OPCODE_OR 0x103 -#define MI_MATH_OPCODE_XOR 0x104 -#define MI_MATH_OPCODE_STORE 0x180 -#define MI_MATH_OPCODE_STOREINV 0x580 - -#define MI_MATH_OPERAND_R0 0x00 -#define MI_MATH_OPERAND_R1 0x01 -#define MI_MATH_OPERAND_R2 0x02 -#define MI_MATH_OPERAND_R3 0x03 -#define MI_MATH_OPERAND_R4 0x04 -#define MI_MATH_OPERAND_SRCA 0x20 -#define MI_MATH_OPERAND_SRCB 0x21 -#define MI_MATH_OPERAND_ACCU 0x31 -#define MI_MATH_OPERAND_ZF 0x32 -#define MI_MATH_OPERAND_CF 0x33 - -#define XY_SETUP_BLT_CMD (CMD_2D | (0x01 << 22)) - -#define XY_COLOR_BLT_CMD (CMD_2D | (0x50 << 22)) - -#define XY_SRC_COPY_BLT_CMD (CMD_2D | (0x53 << 22)) - -#define XY_FAST_COPY_BLT_CMD (CMD_2D | (0x42 << 22)) - -#define XY_TEXT_IMMEDIATE_BLIT_CMD (CMD_2D | (0x31 << 22)) -# define XY_TEXT_BYTE_PACKED (1 << 16) - -/* BR00 */ -#define XY_BLT_WRITE_ALPHA (1 << 21) -#define XY_BLT_WRITE_RGB (1 << 20) -#define XY_SRC_TILED (1 << 15) -#define XY_DST_TILED (1 << 11) - -/* BR00 */ -#define XY_FAST_SRC_TILED_64K (3 << 20) -#define XY_FAST_SRC_TILED_Y (2 << 20) -#define XY_FAST_SRC_TILED_X (1 << 20) - -#define XY_FAST_DST_TILED_64K (3 << 13) -#define XY_FAST_DST_TILED_Y (2 << 13) -#define XY_FAST_DST_TILED_X (1 << 13) - -/* BR13 */ -#define BR13_8 (0x0 << 24) -#define BR13_565 (0x1 << 24) -#define BR13_8888 (0x3 << 24) -#define BR13_16161616 (0x4 << 24) -#define BR13_32323232 (0x5 << 24) - -#define GFX6_SO_PRIM_STORAGE_NEEDED 0x2280 -#define GFX7_SO_PRIM_STORAGE_NEEDED(n) (0x5240 + (n) * 8) - -#define GFX6_SO_NUM_PRIMS_WRITTEN 0x2288 -#define GFX7_SO_NUM_PRIMS_WRITTEN(n) (0x5200 + (n) * 8) - -#define GFX7_SO_WRITE_OFFSET(n) (0x5280 + (n) * 4) - -#define TIMESTAMP 0x2358 - -#define BCS_SWCTRL 0x22200 -# define BCS_SWCTRL_SRC_Y (1 << 0) -# define BCS_SWCTRL_DST_Y (1 << 1) - -#define OACONTROL 0x2360 -# define OACONTROL_COUNTER_SELECT_SHIFT 2 -# define OACONTROL_ENABLE_COUNTERS (1 << 0) - -/* Auto-Draw / Indirect Registers */ -#define GFX7_3DPRIM_END_OFFSET 0x2420 -#define GFX7_3DPRIM_START_VERTEX 0x2430 -#define GFX7_3DPRIM_VERTEX_COUNT 0x2434 -#define GFX7_3DPRIM_INSTANCE_COUNT 0x2438 -#define GFX7_3DPRIM_START_INSTANCE 0x243C -#define GFX7_3DPRIM_BASE_VERTEX 0x2440 - -/* Auto-Compute / Indirect Registers */ -#define GFX7_GPGPU_DISPATCHDIMX 0x2500 -#define GFX7_GPGPU_DISPATCHDIMY 0x2504 -#define GFX7_GPGPU_DISPATCHDIMZ 0x2508 - -#define GFX7_CACHE_MODE_0 0x7000 -#define GFX7_CACHE_MODE_1 0x7004 -# define GFX9_FLOAT_BLEND_OPTIMIZATION_ENABLE (1 << 4) -# define GFX9_MSC_RAW_HAZARD_AVOIDANCE_BIT (1 << 9) -# define GFX8_HIZ_NP_PMA_FIX_ENABLE (1 << 11) -# define GFX8_HIZ_NP_EARLY_Z_FAILS_DISABLE (1 << 13) -# define GFX9_PARTIAL_RESOLVE_DISABLE_IN_VC (1 << 1) -# define GFX8_HIZ_PMA_MASK_BITS \ - REG_MASK(GFX8_HIZ_NP_PMA_FIX_ENABLE | GFX8_HIZ_NP_EARLY_Z_FAILS_DISABLE) -# define GFX11_DISABLE_REPACKING_FOR_COMPRESSION (1 << 15) - -#define GFX7_GT_MODE 0x7008 -# define GFX9_SUBSLICE_HASHING_8x8 (0 << 8) -# define GFX9_SUBSLICE_HASHING_16x4 (1 << 8) -# define GFX9_SUBSLICE_HASHING_8x4 (2 << 8) -# define GFX9_SUBSLICE_HASHING_16x16 (3 << 8) -# define GFX9_SUBSLICE_HASHING_MASK_BITS REG_MASK(3 << 8) -# define GFX9_SLICE_HASHING_NORMAL (0 << 11) -# define GFX9_SLICE_HASHING_DISABLED (1 << 11) -# define GFX9_SLICE_HASHING_32x16 (2 << 11) -# define GFX9_SLICE_HASHING_32x32 (3 << 11) -# define GFX9_SLICE_HASHING_MASK_BITS REG_MASK(3 << 11) - -/* Predicate registers */ -#define MI_PREDICATE_SRC0 0x2400 -#define MI_PREDICATE_SRC1 0x2408 -#define MI_PREDICATE_DATA 0x2410 -#define MI_PREDICATE_RESULT 0x2418 -#define MI_PREDICATE_RESULT_1 0x241C -#define MI_PREDICATE_RESULT_2 0x2214 - -#define HSW_CS_GPR(n) (0x2600 + (n) * 8) - -/* L3 cache control registers. */ -#define GFX7_L3SQCREG1 0xb010 -/* L3SQ general and high priority credit initialization. */ -# define IVB_L3SQCREG1_SQGHPCI_DEFAULT 0x00730000 -# define VLV_L3SQCREG1_SQGHPCI_DEFAULT 0x00d30000 -# define HSW_L3SQCREG1_SQGHPCI_DEFAULT 0x00610000 -# define GFX7_L3SQCREG1_CONV_DC_UC (1 << 24) -# define GFX7_L3SQCREG1_CONV_IS_UC (1 << 25) -# define GFX7_L3SQCREG1_CONV_C_UC (1 << 26) -# define GFX7_L3SQCREG1_CONV_T_UC (1 << 27) - -#define GFX7_L3CNTLREG2 0xb020 -# define GFX7_L3CNTLREG2_SLM_ENABLE (1 << 0) -# define GFX7_L3CNTLREG2_URB_ALLOC_SHIFT 1 -# define GFX7_L3CNTLREG2_URB_ALLOC_MASK INTEL_MASK(6, 1) -# define GFX7_L3CNTLREG2_URB_LOW_BW (1 << 7) -# define GFX7_L3CNTLREG2_ALL_ALLOC_SHIFT 8 -# define GFX7_L3CNTLREG2_ALL_ALLOC_MASK INTEL_MASK(13, 8) -# define GFX7_L3CNTLREG2_RO_ALLOC_SHIFT 14 -# define GFX7_L3CNTLREG2_RO_ALLOC_MASK INTEL_MASK(19, 14) -# define GFX7_L3CNTLREG2_RO_LOW_BW (1 << 20) -# define GFX7_L3CNTLREG2_DC_ALLOC_SHIFT 21 -# define GFX7_L3CNTLREG2_DC_ALLOC_MASK INTEL_MASK(26, 21) -# define GFX7_L3CNTLREG2_DC_LOW_BW (1 << 27) - -#define GFX7_L3CNTLREG3 0xb024 -# define GFX7_L3CNTLREG3_IS_ALLOC_SHIFT 1 -# define GFX7_L3CNTLREG3_IS_ALLOC_MASK INTEL_MASK(6, 1) -# define GFX7_L3CNTLREG3_IS_LOW_BW (1 << 7) -# define GFX7_L3CNTLREG3_C_ALLOC_SHIFT 8 -# define GFX7_L3CNTLREG3_C_ALLOC_MASK INTEL_MASK(13, 8) -# define GFX7_L3CNTLREG3_C_LOW_BW (1 << 14) -# define GFX7_L3CNTLREG3_T_ALLOC_SHIFT 15 -# define GFX7_L3CNTLREG3_T_ALLOC_MASK INTEL_MASK(20, 15) -# define GFX7_L3CNTLREG3_T_LOW_BW (1 << 21) - -#define HSW_SCRATCH1 0xb038 -#define HSW_SCRATCH1_L3_ATOMIC_DISABLE (1 << 27) - -#define HSW_ROW_CHICKEN3 0xe49c -#define HSW_ROW_CHICKEN3_L3_ATOMIC_DISABLE (1 << 6) - -#define GFX8_L3CNTLREG 0x7034 -# define GFX8_L3CNTLREG_SLM_ENABLE (1 << 0) -# define GFX8_L3CNTLREG_URB_ALLOC_SHIFT 1 -# define GFX8_L3CNTLREG_URB_ALLOC_MASK INTEL_MASK(7, 1) -# define GFX8_L3CNTLREG_RO_ALLOC_SHIFT 11 -# define GFX8_L3CNTLREG_RO_ALLOC_MASK INTEL_MASK(17, 11) -# define GFX8_L3CNTLREG_DC_ALLOC_SHIFT 18 -# define GFX8_L3CNTLREG_DC_ALLOC_MASK INTEL_MASK(24, 18) -# define GFX8_L3CNTLREG_ALL_ALLOC_SHIFT 25 -# define GFX8_L3CNTLREG_ALL_ALLOC_MASK INTEL_MASK(31, 25) -# define GFX8_L3CNTLREG_EDBC_NO_HANG (1 << 9) -# define GFX11_L3CNTLREG_USE_FULL_WAYS (1 << 10) - -#define GFX10_CACHE_MODE_SS 0x0e420 -#define GFX10_FLOAT_BLEND_OPTIMIZATION_ENABLE (1 << 4) - -#define INSTPM 0x20c0 -# define INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE (1 << 6) - -#define CS_DEBUG_MODE2 0x20d8 /* Gfx9+ */ -# define CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE (1 << 4) - -#define SLICE_COMMON_ECO_CHICKEN1 0x731c /* Gfx9+ */ -# define GLK_SCEC_BARRIER_MODE_GPGPU (0 << 7) -# define GLK_SCEC_BARRIER_MODE_3D_HULL (1 << 7) -# define GLK_SCEC_BARRIER_MODE_MASK REG_MASK(1 << 7) -# define GFX11_STATE_CACHE_REDIRECT_TO_CS_SECTION_ENABLE (1 << 11) - -#define HALF_SLICE_CHICKEN7 0xE194 -# define TEXEL_OFFSET_FIX_ENABLE (1 << 1) -# define TEXEL_OFFSET_FIX_MASK REG_MASK(1 << 1) - -#define GFX11_SAMPLER_MODE 0xE18C -# define HEADERLESS_MESSAGE_FOR_PREEMPTABLE_CONTEXTS (1 << 5) -# define HEADERLESS_MESSAGE_FOR_PREEMPTABLE_CONTEXTS_MASK REG_MASK(1 << 5) - -#define CS_CHICKEN1 0x2580 /* Gfx9+ */ -# define GFX9_REPLAY_MODE_MIDBUFFER (0 << 0) -# define GFX9_REPLAY_MODE_MIDOBJECT (1 << 0) -# define GFX9_REPLAY_MODE_MASK REG_MASK(1 << 0) - -#endif diff --git a/src/mesa/drivers/dri/i965/brw_disk_cache.c b/src/mesa/drivers/dri/i965/brw_disk_cache.c deleted file mode 100644 index b3fcb5e..0000000 --- a/src/mesa/drivers/dri/i965/brw_disk_cache.c +++ /dev/null @@ -1,417 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "compiler/glsl/ir_uniform.h" -#include "compiler/glsl/shader_cache.h" -#include "main/mtypes.h" -#include "util/blob.h" -#include "util/build_id.h" -#include "util/debug.h" -#include "util/disk_cache.h" -#include "util/macros.h" -#include "util/mesa-sha1.h" - -#include "compiler/brw_eu.h" -#include "dev/intel_debug.h" - -#include "brw_context.h" -#include "brw_program.h" -#include "brw_cs.h" -#include "brw_gs.h" -#include "brw_state.h" -#include "brw_vs.h" -#include "brw_wm.h" - -static bool -debug_enabled_for_stage(gl_shader_stage stage) -{ - static const uint64_t stage_debug_flags[] = { - DEBUG_VS, DEBUG_TCS, DEBUG_TES, DEBUG_GS, DEBUG_WM, DEBUG_CS, - }; - assert((int)stage >= 0 && stage < ARRAY_SIZE(stage_debug_flags)); - return INTEL_DEBUG(stage_debug_flags[stage]); -} - -static void -intel_shader_sha1(struct gl_program *prog, gl_shader_stage stage, - void *key, unsigned char *out_sha1) -{ - char sha1_buf[41]; - unsigned char sha1[20]; - char manifest[256]; - int offset = 0; - - _mesa_sha1_format(sha1_buf, prog->sh.data->sha1); - offset += snprintf(manifest, sizeof(manifest), "program: %s\n", sha1_buf); - - _mesa_sha1_compute(key, brw_prog_key_size(stage), sha1); - _mesa_sha1_format(sha1_buf, sha1); - offset += snprintf(manifest + offset, sizeof(manifest) - offset, - "%s_key: %s\n", _mesa_shader_stage_to_abbrev(stage), - sha1_buf); - - _mesa_sha1_compute(manifest, strlen(manifest), out_sha1); -} - -static bool -read_blob_program_data(struct blob_reader *binary, struct gl_program *prog, - gl_shader_stage stage, const uint8_t **program, - struct brw_stage_prog_data *prog_data) -{ - return - brw_read_blob_program_data(binary, prog, stage, program, prog_data) && - (binary->current == binary->end); -} - -static bool -read_and_upload(struct brw_context *brw, struct disk_cache *cache, - struct gl_program *prog, gl_shader_stage stage) -{ - unsigned char binary_sha1[20]; - - union brw_any_prog_key prog_key; - - switch (stage) { - case MESA_SHADER_VERTEX: - brw_vs_populate_key(brw, &prog_key.vs); - break; - case MESA_SHADER_TESS_CTRL: - brw_tcs_populate_key(brw, &prog_key.tcs); - break; - case MESA_SHADER_TESS_EVAL: - brw_tes_populate_key(brw, &prog_key.tes); - break; - case MESA_SHADER_GEOMETRY: - brw_gs_populate_key(brw, &prog_key.gs); - break; - case MESA_SHADER_FRAGMENT: - brw_wm_populate_key(brw, &prog_key.wm); - break; - case MESA_SHADER_COMPUTE: - brw_cs_populate_key(brw, &prog_key.cs); - break; - default: - unreachable("Unsupported stage!"); - } - - /* We don't care what instance of the program it is for the disk cache hash - * lookup, so set the id to 0 for the sha1 hashing. program_string_id will - * be set below. - */ - prog_key.base.program_string_id = 0; - - intel_shader_sha1(prog, stage, &prog_key, binary_sha1); - - size_t buffer_size; - uint8_t *buffer = disk_cache_get(cache, binary_sha1, &buffer_size); - if (buffer == NULL) { - if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) { - char sha1_buf[41]; - _mesa_sha1_format(sha1_buf, binary_sha1); - fprintf(stderr, "No cached %s binary found for: %s\n", - _mesa_shader_stage_to_abbrev(stage), sha1_buf); - } - return false; - } - - if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) { - char sha1_buf[41]; - _mesa_sha1_format(sha1_buf, binary_sha1); - fprintf(stderr, "attempting to populate bo cache with binary: %s\n", - sha1_buf); - } - - struct blob_reader binary; - blob_reader_init(&binary, buffer, buffer_size); - - const uint8_t *program; - struct brw_stage_prog_data *prog_data = - ralloc_size(NULL, sizeof(union brw_any_prog_data)); - if (!read_blob_program_data(&binary, prog, stage, &program, prog_data)) { - /* Something very bad has gone wrong discard the item from the cache and - * rebuild from source. - */ - if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) { - fprintf(stderr, "Error reading program from cache (invalid i965 " - "cache item)\n"); - } - - disk_cache_remove(cache, binary_sha1); - ralloc_free(prog_data); - free(buffer); - return false; - } - - enum brw_cache_id cache_id; - struct brw_stage_state *stage_state; - - switch (stage) { - case MESA_SHADER_VERTEX: - cache_id = BRW_CACHE_VS_PROG; - stage_state = &brw->vs.base; - break; - case MESA_SHADER_TESS_CTRL: - cache_id = BRW_CACHE_TCS_PROG; - stage_state = &brw->tcs.base; - break; - case MESA_SHADER_TESS_EVAL: - cache_id = BRW_CACHE_TES_PROG; - stage_state = &brw->tes.base; - break; - case MESA_SHADER_GEOMETRY: - cache_id = BRW_CACHE_GS_PROG; - stage_state = &brw->gs.base; - break; - case MESA_SHADER_FRAGMENT: - cache_id = BRW_CACHE_FS_PROG; - stage_state = &brw->wm.base; - break; - case MESA_SHADER_COMPUTE: - cache_id = BRW_CACHE_CS_PROG; - stage_state = &brw->cs.base; - break; - default: - unreachable("Unsupported stage!"); - } - - prog_key.base.program_string_id = brw_program(prog)->id; - - brw_alloc_stage_scratch(brw, stage_state, prog_data->total_scratch); - - if (unlikely(debug_enabled_for_stage(stage))) { - fprintf(stderr, "NIR for %s program %d loaded from disk shader cache:\n", - _mesa_shader_stage_to_abbrev(stage), brw_program(prog)->id); - brw_program_deserialize_driver_blob(&brw->ctx, prog, stage); - nir_shader *nir = prog->nir; - nir_print_shader(nir, stderr); - fprintf(stderr, "Native code for %s %s shader %s from disk cache:\n", - nir->info.label ? nir->info.label : "unnamed", - _mesa_shader_stage_to_string(nir->info.stage), nir->info.name); - brw_disassemble_with_labels(&brw->screen->devinfo, program, 0, - prog_data->program_size, stderr); - } - - brw_upload_cache(&brw->cache, cache_id, &prog_key, brw_prog_key_size(stage), - program, prog_data->program_size, prog_data, - brw_prog_data_size(stage), &stage_state->prog_offset, - &stage_state->prog_data); - - prog->program_written_to_cache = true; - - ralloc_free(prog_data); - free(buffer); - - return true; -} - -bool -brw_disk_cache_upload_program(struct brw_context *brw, gl_shader_stage stage) -{ - struct disk_cache *cache = brw->ctx.Cache; - if (cache == NULL) - return false; - - struct gl_program *prog = brw->ctx._Shader->CurrentProgram[stage]; - if (prog == NULL) - return false; - - if (prog->sh.data->spirv) - return false; - - if (brw->ctx._Shader->Flags & GLSL_CACHE_FALLBACK) - goto fail; - - if (!read_and_upload(brw, cache, prog, stage)) - goto fail; - - if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) { - fprintf(stderr, "read gen program from cache\n"); - } - - return true; - -fail: - prog->program_written_to_cache = false; - if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) { - fprintf(stderr, "falling back to nir %s.\n", - _mesa_shader_stage_to_abbrev(prog->info.stage)); - } - - brw_program_deserialize_driver_blob(&brw->ctx, prog, stage); - - return false; -} - -static void -write_program_data(struct brw_context *brw, struct gl_program *prog, - void *key, struct brw_stage_prog_data *prog_data, - uint32_t prog_offset, struct disk_cache *cache, - gl_shader_stage stage) -{ - struct blob binary; - blob_init(&binary); - - const void *program_map = brw->cache.map + prog_offset; - /* TODO: Improve perf for non-LLC. It would be best to save it at program - * generation time when the program is in normal memory accessible with - * cache to the CPU. Another easier change would be to use - * _mesa_streaming_load_memcpy to read from the program mapped memory. */ - brw_write_blob_program_data(&binary, stage, program_map, prog_data); - - unsigned char sha1[20]; - char buf[41]; - intel_shader_sha1(prog, stage, key, sha1); - _mesa_sha1_format(buf, sha1); - if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) { - fprintf(stderr, "putting binary in cache: %s\n", buf); - } - - disk_cache_put(cache, sha1, binary.data, binary.size, NULL); - - prog->program_written_to_cache = true; - blob_finish(&binary); -} - -void -brw_disk_cache_write_render_programs(struct brw_context *brw) -{ - struct disk_cache *cache = brw->ctx.Cache; - if (cache == NULL) - return; - - struct gl_program *prog; - gl_shader_stage stage; - for (stage = MESA_SHADER_VERTEX; stage <= MESA_SHADER_FRAGMENT; stage++) { - prog = brw->ctx._Shader->CurrentProgram[stage]; - if (prog && prog->sh.data->spirv) - return; - } - - prog = brw->ctx._Shader->CurrentProgram[MESA_SHADER_VERTEX]; - if (prog && !prog->program_written_to_cache) { - struct brw_vs_prog_key vs_key; - brw_vs_populate_key(brw, &vs_key); - vs_key.base.program_string_id = 0; - - write_program_data(brw, prog, &vs_key, brw->vs.base.prog_data, - brw->vs.base.prog_offset, cache, - MESA_SHADER_VERTEX); - } - - prog = brw->ctx._Shader->CurrentProgram[MESA_SHADER_TESS_CTRL]; - if (prog && !prog->program_written_to_cache) { - struct brw_tcs_prog_key tcs_key; - brw_tcs_populate_key(brw, &tcs_key); - tcs_key.base.program_string_id = 0; - - write_program_data(brw, prog, &tcs_key, brw->tcs.base.prog_data, - brw->tcs.base.prog_offset, cache, - MESA_SHADER_TESS_CTRL); - } - - prog = brw->ctx._Shader->CurrentProgram[MESA_SHADER_TESS_EVAL]; - if (prog && !prog->program_written_to_cache) { - struct brw_tes_prog_key tes_key; - brw_tes_populate_key(brw, &tes_key); - tes_key.base.program_string_id = 0; - - write_program_data(brw, prog, &tes_key, brw->tes.base.prog_data, - brw->tes.base.prog_offset, cache, - MESA_SHADER_TESS_EVAL); - } - - prog = brw->ctx._Shader->CurrentProgram[MESA_SHADER_GEOMETRY]; - if (prog && !prog->program_written_to_cache) { - struct brw_gs_prog_key gs_key; - brw_gs_populate_key(brw, &gs_key); - gs_key.base.program_string_id = 0; - - write_program_data(brw, prog, &gs_key, brw->gs.base.prog_data, - brw->gs.base.prog_offset, cache, - MESA_SHADER_GEOMETRY); - } - - prog = brw->ctx._Shader->CurrentProgram[MESA_SHADER_FRAGMENT]; - if (prog && !prog->program_written_to_cache) { - struct brw_wm_prog_key wm_key; - brw_wm_populate_key(brw, &wm_key); - wm_key.base.program_string_id = 0; - - write_program_data(brw, prog, &wm_key, brw->wm.base.prog_data, - brw->wm.base.prog_offset, cache, - MESA_SHADER_FRAGMENT); - } -} - -void -brw_disk_cache_write_compute_program(struct brw_context *brw) -{ - struct disk_cache *cache = brw->ctx.Cache; - if (cache == NULL) - return; - - struct gl_program *prog = - brw->ctx._Shader->CurrentProgram[MESA_SHADER_COMPUTE]; - - if (prog && prog->sh.data->spirv) - return; - - if (prog && !prog->program_written_to_cache) { - struct brw_cs_prog_key cs_key; - brw_cs_populate_key(brw, &cs_key); - cs_key.base.program_string_id = 0; - - write_program_data(brw, prog, &cs_key, brw->cs.base.prog_data, - brw->cs.base.prog_offset, cache, - MESA_SHADER_COMPUTE); - } -} - -void -brw_disk_cache_init(struct brw_screen *screen) -{ -#ifdef ENABLE_SHADER_CACHE - if (INTEL_DEBUG(DEBUG_DISK_CACHE_DISABLE_MASK)) - return; - - /* array length: print length + null char + 1 extra to verify it is unused */ - char renderer[11]; - ASSERTED int len = snprintf(renderer, sizeof(renderer), "i965_%04x", - screen->deviceID); - assert(len == sizeof(renderer) - 2); - - const struct build_id_note *note = - build_id_find_nhdr_for_addr(brw_disk_cache_init); - assert(note && build_id_length(note) == 20 /* sha1 */); - - const uint8_t *id_sha1 = build_id_data(note); - assert(id_sha1); - - char timestamp[41]; - _mesa_sha1_format(timestamp, id_sha1); - - const uint64_t driver_flags = - brw_get_compiler_config_value(screen->compiler); - screen->disk_cache = disk_cache_create(renderer, timestamp, driver_flags); -#endif -} diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c deleted file mode 100644 index 5d4f066..0000000 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ /dev/null @@ -1,1361 +0,0 @@ -/* - * Copyright 2003 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include - -#include "main/arrayobj.h" -#include "main/blend.h" -#include "main/context.h" -#include "main/condrender.h" -#include "main/samplerobj.h" -#include "main/state.h" -#include "main/enums.h" -#include "main/macros.h" -#include "main/transformfeedback.h" -#include "main/framebuffer.h" -#include "main/varray.h" -#include "tnl/tnl.h" -#include "vbo/vbo.h" -#include "swrast/swrast.h" -#include "swrast_setup/swrast_setup.h" -#include "drivers/common/meta.h" -#include "util/bitscan.h" -#include "util/bitset.h" - -#include "brw_blorp.h" -#include "brw_draw.h" -#include "brw_defines.h" -#include "compiler/brw_eu_defines.h" -#include "brw_context.h" -#include "brw_state.h" - -#include "brw_batch.h" -#include "brw_buffers.h" -#include "brw_fbo.h" -#include "brw_mipmap_tree.h" -#include "brw_buffer_objects.h" - -#define FILE_DEBUG_FLAG DEBUG_PRIMS - - -static const GLenum reduced_prim[GL_POLYGON+1] = { - [GL_POINTS] = GL_POINTS, - [GL_LINES] = GL_LINES, - [GL_LINE_LOOP] = GL_LINES, - [GL_LINE_STRIP] = GL_LINES, - [GL_TRIANGLES] = GL_TRIANGLES, - [GL_TRIANGLE_STRIP] = GL_TRIANGLES, - [GL_TRIANGLE_FAN] = GL_TRIANGLES, - [GL_QUADS] = GL_TRIANGLES, - [GL_QUAD_STRIP] = GL_TRIANGLES, - [GL_POLYGON] = GL_TRIANGLES -}; - -/* When the primitive changes, set a state bit and re-validate. Not - * the nicest and would rather deal with this by having all the - * programs be immune to the active primitive (ie. cope with all - * possibilities). That may not be realistic however. - */ -static void -brw_set_prim(struct brw_context *brw, const struct _mesa_prim *prim) -{ - struct gl_context *ctx = &brw->ctx; - uint32_t hw_prim = get_hw_prim_for_gl_prim(prim->mode); - - DBG("PRIM: %s\n", _mesa_enum_to_string(prim->mode)); - - /* Slight optimization to avoid the GS program when not needed: - */ - if (prim->mode == GL_QUAD_STRIP && - ctx->Light.ShadeModel != GL_FLAT && - ctx->Polygon.FrontMode == GL_FILL && - ctx->Polygon.BackMode == GL_FILL) - hw_prim = _3DPRIM_TRISTRIP; - - if (prim->mode == GL_QUADS && prim->count == 4 && - ctx->Light.ShadeModel != GL_FLAT && - ctx->Polygon.FrontMode == GL_FILL && - ctx->Polygon.BackMode == GL_FILL) { - hw_prim = _3DPRIM_TRIFAN; - } - - if (hw_prim != brw->primitive) { - brw->primitive = hw_prim; - brw->ctx.NewDriverState |= BRW_NEW_PRIMITIVE; - - if (reduced_prim[prim->mode] != brw->reduced_primitive) { - brw->reduced_primitive = reduced_prim[prim->mode]; - brw->ctx.NewDriverState |= BRW_NEW_REDUCED_PRIMITIVE; - } - } -} - -static void -gfx6_set_prim(struct brw_context *brw, const struct _mesa_prim *prim) -{ - const struct gl_context *ctx = &brw->ctx; - uint32_t hw_prim; - - DBG("PRIM: %s\n", _mesa_enum_to_string(prim->mode)); - - if (prim->mode == GL_PATCHES) { - hw_prim = _3DPRIM_PATCHLIST(ctx->TessCtrlProgram.patch_vertices); - } else { - hw_prim = get_hw_prim_for_gl_prim(prim->mode); - } - - if (hw_prim != brw->primitive) { - brw->primitive = hw_prim; - brw->ctx.NewDriverState |= BRW_NEW_PRIMITIVE; - if (prim->mode == GL_PATCHES) - brw->ctx.NewDriverState |= BRW_NEW_PATCH_PRIMITIVE; - } -} - - -/** - * The hardware is capable of removing dangling vertices on its own; however, - * prior to Gfx6, we sometimes convert quads into trifans (and quad strips - * into tristrips), since pre-Gfx6 hardware requires a GS to render quads. - * This function manually trims dangling vertices from a draw call involving - * quads so that those dangling vertices won't get drawn when we convert to - * trifans/tristrips. - */ -static GLuint -trim(GLenum prim, GLuint length) -{ - if (prim == GL_QUAD_STRIP) - return length > 3 ? (length - length % 2) : 0; - else if (prim == GL_QUADS) - return length - length % 4; - else - return length; -} - - -static void -brw_emit_prim(struct brw_context *brw, - const struct _mesa_prim *prim, - uint32_t hw_prim, - bool is_indexed, - GLuint num_instances, GLuint base_instance, - struct brw_transform_feedback_object *xfb_obj, - unsigned stream, - bool is_indirect, - GLsizeiptr indirect_offset) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - int verts_per_instance; - int vertex_access_type; - int indirect_flag; - - DBG("PRIM: %s %d %d\n", _mesa_enum_to_string(prim->mode), - prim->start, prim->count); - - int start_vertex_location = prim->start; - int base_vertex_location = prim->basevertex; - - if (is_indexed) { - vertex_access_type = devinfo->ver >= 7 ? - GFX7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM : - GFX4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM; - start_vertex_location += brw->ib.start_vertex_offset; - base_vertex_location += brw->vb.start_vertex_bias; - } else { - vertex_access_type = devinfo->ver >= 7 ? - GFX7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL : - GFX4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL; - start_vertex_location += brw->vb.start_vertex_bias; - } - - /* We only need to trim the primitive count on pre-Gfx6. */ - if (devinfo->ver < 6) - verts_per_instance = trim(prim->mode, prim->count); - else - verts_per_instance = prim->count; - - /* If nothing to emit, just return. */ - if (verts_per_instance == 0 && !is_indirect && !xfb_obj) - return; - - /* If we're set to always flush, do it before and after the primitive emit. - * We want to catch both missed flushes that hurt instruction/state cache - * and missed flushes of the render cache as it heads to other parts of - * the besides the draw code. - */ - if (brw->always_flush_cache) - brw_emit_mi_flush(brw); - - /* If indirect, emit a bunch of loads from the indirect BO. */ - if (xfb_obj) { - indirect_flag = GFX7_3DPRIM_INDIRECT_PARAMETER_ENABLE; - - brw_load_register_mem(brw, GFX7_3DPRIM_VERTEX_COUNT, - xfb_obj->prim_count_bo, - stream * sizeof(uint32_t)); - BEGIN_BATCH(9); - OUT_BATCH(MI_LOAD_REGISTER_IMM | (9 - 2)); - OUT_BATCH(GFX7_3DPRIM_INSTANCE_COUNT); - OUT_BATCH(num_instances); - OUT_BATCH(GFX7_3DPRIM_START_VERTEX); - OUT_BATCH(0); - OUT_BATCH(GFX7_3DPRIM_BASE_VERTEX); - OUT_BATCH(0); - OUT_BATCH(GFX7_3DPRIM_START_INSTANCE); - OUT_BATCH(0); - ADVANCE_BATCH(); - } else if (is_indirect) { - struct gl_buffer_object *indirect_buffer = brw->ctx.DrawIndirectBuffer; - struct brw_bo *bo = brw_bufferobj_buffer(brw, - brw_buffer_object(indirect_buffer), - indirect_offset, 5 * sizeof(GLuint), false); - - indirect_flag = GFX7_3DPRIM_INDIRECT_PARAMETER_ENABLE; - - brw_load_register_mem(brw, GFX7_3DPRIM_VERTEX_COUNT, bo, - indirect_offset + 0); - brw_load_register_mem(brw, GFX7_3DPRIM_INSTANCE_COUNT, bo, - indirect_offset + 4); - - brw_load_register_mem(brw, GFX7_3DPRIM_START_VERTEX, bo, - indirect_offset + 8); - if (is_indexed) { - brw_load_register_mem(brw, GFX7_3DPRIM_BASE_VERTEX, bo, - indirect_offset + 12); - brw_load_register_mem(brw, GFX7_3DPRIM_START_INSTANCE, bo, - indirect_offset + 16); - } else { - brw_load_register_mem(brw, GFX7_3DPRIM_START_INSTANCE, bo, - indirect_offset + 12); - brw_load_register_imm32(brw, GFX7_3DPRIM_BASE_VERTEX, 0); - } - } else { - indirect_flag = 0; - } - - BEGIN_BATCH(devinfo->ver >= 7 ? 7 : 6); - - if (devinfo->ver >= 7) { - const int predicate_enable = - (brw->predicate.state == BRW_PREDICATE_STATE_USE_BIT) - ? GFX7_3DPRIM_PREDICATE_ENABLE : 0; - - OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2) | indirect_flag | predicate_enable); - OUT_BATCH(hw_prim | vertex_access_type); - } else { - OUT_BATCH(CMD_3D_PRIM << 16 | (6 - 2) | - hw_prim << GFX4_3DPRIM_TOPOLOGY_TYPE_SHIFT | - vertex_access_type); - } - OUT_BATCH(verts_per_instance); - OUT_BATCH(start_vertex_location); - OUT_BATCH(num_instances); - OUT_BATCH(base_instance); - OUT_BATCH(base_vertex_location); - ADVANCE_BATCH(); - - if (brw->always_flush_cache) - brw_emit_mi_flush(brw); -} - - -static void -brw_clear_buffers(struct brw_context *brw) -{ - for (unsigned i = 0; i < brw->vb.nr_buffers; ++i) { - brw_bo_unreference(brw->vb.buffers[i].bo); - brw->vb.buffers[i].bo = NULL; - } - brw->vb.nr_buffers = 0; - - for (unsigned i = 0; i < brw->vb.nr_enabled; ++i) { - brw->vb.enabled[i]->buffer = -1; - } -#ifndef NDEBUG - for (unsigned i = 0; i < VERT_ATTRIB_MAX; i++) { - assert(brw->vb.inputs[i].buffer == -1); - } -#endif -} - - -static uint8_t get_wa_flags(const struct gl_vertex_format *glformat) -{ - uint8_t wa_flags = 0; - - switch (glformat->Type) { - case GL_FIXED: - wa_flags = glformat->Size; - break; - - case GL_INT_2_10_10_10_REV: - wa_flags |= BRW_ATTRIB_WA_SIGN; - FALLTHROUGH; - - case GL_UNSIGNED_INT_2_10_10_10_REV: - if (glformat->Format == GL_BGRA) - wa_flags |= BRW_ATTRIB_WA_BGRA; - - if (glformat->Normalized) - wa_flags |= BRW_ATTRIB_WA_NORMALIZE; - else if (!glformat->Integer) - wa_flags |= BRW_ATTRIB_WA_SCALE; - - break; - } - - return wa_flags; -} - - -static void -brw_merge_inputs(struct brw_context *brw) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - const struct gl_context *ctx = &brw->ctx; - - if (devinfo->verx10 <= 70) { - /* Prior to Haswell, the hardware can't natively support GL_FIXED or - * 2_10_10_10_REV vertex formats. Set appropriate workaround flags. - */ - const struct gl_vertex_array_object *vao = ctx->Array._DrawVAO; - const uint64_t vs_inputs = ctx->VertexProgram._Current->info.inputs_read; - assert((vs_inputs & ~((uint64_t)VERT_BIT_ALL)) == 0); - - unsigned vaomask = vs_inputs & _mesa_draw_array_bits(ctx); - while (vaomask) { - const gl_vert_attrib i = u_bit_scan(&vaomask); - const uint8_t wa_flags = - get_wa_flags(_mesa_draw_array_format(vao, i)); - - if (brw->vb.attrib_wa_flags[i] != wa_flags) { - brw->vb.attrib_wa_flags[i] = wa_flags; - brw->ctx.NewDriverState |= BRW_NEW_VS_ATTRIB_WORKAROUNDS; - } - } - - unsigned currmask = vs_inputs & _mesa_draw_current_bits(ctx); - while (currmask) { - const gl_vert_attrib i = u_bit_scan(&currmask); - const uint8_t wa_flags = - get_wa_flags(_mesa_draw_current_format(ctx, i)); - - if (brw->vb.attrib_wa_flags[i] != wa_flags) { - brw->vb.attrib_wa_flags[i] = wa_flags; - brw->ctx.NewDriverState |= BRW_NEW_VS_ATTRIB_WORKAROUNDS; - } - } - } -} - -/* Disable auxiliary buffers if a renderbuffer is also bound as a texture - * or shader image. This causes a self-dependency, where both rendering - * and sampling may concurrently read or write the CCS buffer, causing - * incorrect pixels. - */ -static bool -brw_disable_rb_aux_buffer(struct brw_context *brw, - bool *draw_aux_buffer_disabled, - struct brw_mipmap_tree *tex_mt, - unsigned min_level, unsigned num_levels, - const char *usage) -{ - const struct gl_framebuffer *fb = brw->ctx.DrawBuffer; - bool found = false; - - /* We only need to worry about color compression and fast clears. */ - if (tex_mt->aux_usage != ISL_AUX_USAGE_CCS_D && - tex_mt->aux_usage != ISL_AUX_USAGE_CCS_E) - return false; - - for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) { - const struct brw_renderbuffer *irb = - brw_renderbuffer(fb->_ColorDrawBuffers[i]); - - if (irb && irb->mt->bo == tex_mt->bo && - irb->mt_level >= min_level && - irb->mt_level < min_level + num_levels) { - found = draw_aux_buffer_disabled[i] = true; - } - } - - if (found) { - perf_debug("Disabling CCS because a renderbuffer is also bound %s.\n", - usage); - } - - return found; -} - -/** Implement the ASTC 5x5 sampler workaround - * - * Gfx9 sampling hardware has a bug where an ASTC 5x5 compressed surface - * cannot live in the sampler cache at the same time as an aux compressed - * surface. In order to work around the bug we have to stall rendering with a - * CS and pixel scoreboard stall (implicit in the CS stall) and invalidate the - * texture cache whenever one of ASTC 5x5 or aux compressed may be in the - * sampler cache and we're about to render with something which samples from - * the other. - * - * In the case of a single shader which textures from both ASTC 5x5 and - * a texture which is CCS or HiZ compressed, we have to resolve the aux - * compressed texture prior to rendering. This second part is handled in - * brw_predraw_resolve_inputs() below. - * - * We have observed this issue to affect CCS and HiZ sampling but whether or - * not it also affects MCS is unknown. Because MCS has no concept of a - * resolve (and doing one would be stupid expensive), we choose to simply - * ignore the possibility and hope for the best. - */ -static void -gfx9_apply_astc5x5_wa_flush(struct brw_context *brw, - enum gfx9_astc5x5_wa_tex_type curr_mask) -{ - assert(brw->screen->devinfo.ver == 9); - - if (((brw->gfx9_astc5x5_wa_tex_mask & GFX9_ASTC5X5_WA_TEX_TYPE_ASTC5x5) && - (curr_mask & GFX9_ASTC5X5_WA_TEX_TYPE_AUX)) || - ((brw->gfx9_astc5x5_wa_tex_mask & GFX9_ASTC5X5_WA_TEX_TYPE_AUX) && - (curr_mask & GFX9_ASTC5X5_WA_TEX_TYPE_ASTC5x5))) { - brw_emit_pipe_control_flush(brw, PIPE_CONTROL_CS_STALL); - brw_emit_pipe_control_flush(brw, PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE); - } - - brw->gfx9_astc5x5_wa_tex_mask = curr_mask; -} - -static enum gfx9_astc5x5_wa_tex_type -gfx9_astc5x5_wa_bits(mesa_format format, enum isl_aux_usage aux_usage) -{ - if (aux_usage != ISL_AUX_USAGE_NONE && - aux_usage != ISL_AUX_USAGE_MCS) - return GFX9_ASTC5X5_WA_TEX_TYPE_AUX; - - if (format == MESA_FORMAT_RGBA_ASTC_5x5 || - format == MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5) - return GFX9_ASTC5X5_WA_TEX_TYPE_ASTC5x5; - - return 0; -} - -/* Helper for the gfx9 ASTC 5x5 workaround. This version exists for BLORP's - * use-cases where only a single texture is bound. - */ -void -gfx9_apply_single_tex_astc5x5_wa(struct brw_context *brw, - mesa_format format, - enum isl_aux_usage aux_usage) -{ - gfx9_apply_astc5x5_wa_flush(brw, gfx9_astc5x5_wa_bits(format, aux_usage)); -} - -static void -mark_textures_used_for_txf(BITSET_WORD *used_for_txf, - const struct gl_program *prog) -{ - if (!prog) - return; - - unsigned s; - BITSET_FOREACH_SET(s, prog->info.textures_used_by_txf, 32) - BITSET_SET(used_for_txf, prog->SamplerUnits[s]); -} - -/** - * \brief Resolve buffers before drawing. - * - * Resolve the depth buffer's HiZ buffer, resolve the depth buffer of each - * enabled depth texture, and flush the render cache for any dirty textures. - */ -void -brw_predraw_resolve_inputs(struct brw_context *brw, bool rendering, - bool *draw_aux_buffer_disabled) -{ - struct gl_context *ctx = &brw->ctx; - struct brw_texture_object *tex_obj; - - BITSET_DECLARE(used_for_txf, MAX_COMBINED_TEXTURE_IMAGE_UNITS); - memset(used_for_txf, 0, sizeof(used_for_txf)); - if (rendering) { - mark_textures_used_for_txf(used_for_txf, ctx->VertexProgram._Current); - mark_textures_used_for_txf(used_for_txf, ctx->TessCtrlProgram._Current); - mark_textures_used_for_txf(used_for_txf, ctx->TessEvalProgram._Current); - mark_textures_used_for_txf(used_for_txf, ctx->GeometryProgram._Current); - mark_textures_used_for_txf(used_for_txf, ctx->FragmentProgram._Current); - } else { - mark_textures_used_for_txf(used_for_txf, ctx->ComputeProgram._Current); - } - - int maxEnabledUnit = ctx->Texture._MaxEnabledTexImageUnit; - - enum gfx9_astc5x5_wa_tex_type astc5x5_wa_bits = 0; - if (brw->screen->devinfo.ver == 9) { - /* In order to properly implement the ASTC 5x5 workaround for an - * arbitrary draw or dispatch call, we have to walk the entire list of - * textures looking for ASTC 5x5. If there is any ASTC 5x5 in this draw - * call, all aux compressed textures must be resolved and have aux - * compression disabled while sampling. - */ - for (int i = 0; i <= maxEnabledUnit; i++) { - if (!ctx->Texture.Unit[i]._Current) - continue; - tex_obj = brw_texture_object(ctx->Texture.Unit[i]._Current); - if (!tex_obj || !tex_obj->mt) - continue; - - astc5x5_wa_bits |= gfx9_astc5x5_wa_bits(tex_obj->_Format, - tex_obj->mt->aux_usage); - } - gfx9_apply_astc5x5_wa_flush(brw, astc5x5_wa_bits); - } - - /* Resolve depth buffer and render cache of each enabled texture. */ - for (int i = 0; i <= maxEnabledUnit; i++) { - if (!ctx->Texture.Unit[i]._Current) - continue; - tex_obj = brw_texture_object(ctx->Texture.Unit[i]._Current); - if (!tex_obj || !tex_obj->mt) - continue; - - struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, i); - enum isl_format view_format = - translate_tex_format(brw, tex_obj->_Format, sampler->Attrib.sRGBDecode); - - unsigned min_level, min_layer, num_levels, num_layers; - if (tex_obj->base.Immutable) { - min_level = tex_obj->base.Attrib.MinLevel; - num_levels = MIN2(tex_obj->base.Attrib.NumLevels, tex_obj->_MaxLevel + 1); - min_layer = tex_obj->base.Attrib.MinLayer; - num_layers = tex_obj->base.Target != GL_TEXTURE_3D ? - tex_obj->base.Attrib.NumLayers : INTEL_REMAINING_LAYERS; - } else { - min_level = tex_obj->base.Attrib.BaseLevel; - num_levels = tex_obj->_MaxLevel - tex_obj->base.Attrib.BaseLevel + 1; - min_layer = 0; - num_layers = INTEL_REMAINING_LAYERS; - } - - if (rendering) { - brw_disable_rb_aux_buffer(brw, draw_aux_buffer_disabled, - tex_obj->mt, min_level, num_levels, - "for sampling"); - } - - brw_miptree_prepare_texture(brw, tex_obj->mt, view_format, - min_level, num_levels, - min_layer, num_layers, - astc5x5_wa_bits); - - /* If any programs are using it with texelFetch, we may need to also do - * a prepare with an sRGB format to ensure texelFetch works "properly". - */ - if (BITSET_TEST(used_for_txf, i)) { - enum isl_format txf_format = - translate_tex_format(brw, tex_obj->_Format, GL_DECODE_EXT); - if (txf_format != view_format) { - brw_miptree_prepare_texture(brw, tex_obj->mt, txf_format, - min_level, num_levels, - min_layer, num_layers, - astc5x5_wa_bits); - } - } - - brw_cache_flush_for_read(brw, tex_obj->mt->bo); - - if (tex_obj->base.StencilSampling || - tex_obj->mt->format == MESA_FORMAT_S_UINT8) { - brw_update_r8stencil(brw, tex_obj->mt); - } - - if (brw_miptree_has_etc_shadow(brw, tex_obj->mt) && - tex_obj->mt->shadow_needs_update) { - brw_miptree_update_etc_shadow_levels(brw, tex_obj->mt); - } - } - - /* Resolve color for each active shader image. */ - for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { - const struct gl_program *prog = ctx->_Shader->CurrentProgram[i]; - - if (unlikely(prog && prog->info.num_images)) { - for (unsigned j = 0; j < prog->info.num_images; j++) { - struct gl_image_unit *u = - &ctx->ImageUnits[prog->sh.ImageUnits[j]]; - tex_obj = brw_texture_object(u->TexObj); - - if (tex_obj && tex_obj->mt) { - if (rendering) { - brw_disable_rb_aux_buffer(brw, draw_aux_buffer_disabled, - tex_obj->mt, 0, ~0, - "as a shader image"); - } - - brw_miptree_prepare_image(brw, tex_obj->mt); - - brw_cache_flush_for_read(brw, tex_obj->mt->bo); - } - } - } - } -} - -static void -brw_predraw_resolve_framebuffer(struct brw_context *brw, - bool *draw_aux_buffer_disabled) -{ - struct gl_context *ctx = &brw->ctx; - struct brw_renderbuffer *depth_irb; - - /* Resolve the depth buffer's HiZ buffer. */ - depth_irb = brw_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH); - if (depth_irb && depth_irb->mt) { - brw_miptree_prepare_depth(brw, depth_irb->mt, - depth_irb->mt_level, - depth_irb->mt_layer, - depth_irb->layer_count); - } - - /* Resolve color buffers for non-coherent framebuffer fetch. */ - if (!ctx->Extensions.EXT_shader_framebuffer_fetch && - ctx->FragmentProgram._Current && - ctx->FragmentProgram._Current->info.outputs_read) { - const struct gl_framebuffer *fb = ctx->DrawBuffer; - - /* This is only used for non-coherent framebuffer fetch, so we don't - * need to worry about CCS_E and can simply pass 'false' below. - */ - assert(brw->screen->devinfo.ver < 9); - - for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) { - const struct brw_renderbuffer *irb = - brw_renderbuffer(fb->_ColorDrawBuffers[i]); - - if (irb) { - brw_miptree_prepare_texture(brw, irb->mt, irb->mt->surf.format, - irb->mt_level, 1, - irb->mt_layer, irb->layer_count, - brw->gfx9_astc5x5_wa_tex_mask); - } - } - } - - struct gl_framebuffer *fb = ctx->DrawBuffer; - for (int i = 0; i < fb->_NumColorDrawBuffers; i++) { - struct brw_renderbuffer *irb = - brw_renderbuffer(fb->_ColorDrawBuffers[i]); - - if (irb == NULL || irb->mt == NULL) - continue; - - mesa_format mesa_format = - _mesa_get_render_format(ctx, brw_rb_format(irb)); - enum isl_format isl_format = brw_isl_format_for_mesa_format(mesa_format); - bool blend_enabled = ctx->Color.BlendEnabled & (1 << i); - enum isl_aux_usage aux_usage = - brw_miptree_render_aux_usage(brw, irb->mt, isl_format, - blend_enabled, - draw_aux_buffer_disabled[i]); - if (brw->draw_aux_usage[i] != aux_usage) { - brw->ctx.NewDriverState |= BRW_NEW_AUX_STATE; - brw->draw_aux_usage[i] = aux_usage; - } - - brw_miptree_prepare_render(brw, irb->mt, irb->mt_level, - irb->mt_layer, irb->layer_count, - aux_usage); - - brw_cache_flush_for_render(brw, irb->mt->bo, - isl_format, aux_usage); - } -} - -/** - * \brief Call this after drawing to mark which buffers need resolving - * - * If the depth buffer was written to and if it has an accompanying HiZ - * buffer, then mark that it needs a depth resolve. - * - * If the stencil buffer was written to then mark that it may need to be - * copied to an R8 texture. - * - * If the color buffer is a multisample window system buffer, then - * mark that it needs a downsample. - * - * Also mark any render targets which will be textured as needing a render - * cache flush. - */ -static void -brw_postdraw_set_buffers_need_resolve(struct brw_context *brw) -{ - struct gl_context *ctx = &brw->ctx; - struct gl_framebuffer *fb = ctx->DrawBuffer; - - struct brw_renderbuffer *front_irb = NULL; - struct brw_renderbuffer *back_irb = brw_get_renderbuffer(fb, BUFFER_BACK_LEFT); - struct brw_renderbuffer *depth_irb = brw_get_renderbuffer(fb, BUFFER_DEPTH); - struct brw_renderbuffer *stencil_irb = brw_get_renderbuffer(fb, BUFFER_STENCIL); - struct gl_renderbuffer_attachment *depth_att = &fb->Attachment[BUFFER_DEPTH]; - - if (_mesa_is_front_buffer_drawing(fb)) - front_irb = brw_get_renderbuffer(fb, BUFFER_FRONT_LEFT); - - if (front_irb) - front_irb->need_downsample = true; - if (back_irb) - back_irb->need_downsample = true; - if (depth_irb) { - bool depth_written = brw_depth_writes_enabled(brw); - if (depth_att->Layered) { - brw_miptree_finish_depth(brw, depth_irb->mt, - depth_irb->mt_level, - depth_irb->mt_layer, - depth_irb->layer_count, - depth_written); - } else { - brw_miptree_finish_depth(brw, depth_irb->mt, - depth_irb->mt_level, - depth_irb->mt_layer, 1, - depth_written); - } - if (depth_written) - brw_depth_cache_add_bo(brw, depth_irb->mt->bo); - } - - if (stencil_irb && brw->stencil_write_enabled) { - struct brw_mipmap_tree *stencil_mt = - stencil_irb->mt->stencil_mt != NULL ? - stencil_irb->mt->stencil_mt : stencil_irb->mt; - brw_depth_cache_add_bo(brw, stencil_mt->bo); - brw_miptree_finish_write(brw, stencil_mt, stencil_irb->mt_level, - stencil_irb->mt_layer, - stencil_irb->layer_count, ISL_AUX_USAGE_NONE); - } - - for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) { - struct brw_renderbuffer *irb = - brw_renderbuffer(fb->_ColorDrawBuffers[i]); - - if (!irb) - continue; - - mesa_format mesa_format = - _mesa_get_render_format(ctx, brw_rb_format(irb)); - enum isl_format isl_format = brw_isl_format_for_mesa_format(mesa_format); - enum isl_aux_usage aux_usage = brw->draw_aux_usage[i]; - - brw_render_cache_add_bo(brw, irb->mt->bo, isl_format, aux_usage); - - brw_miptree_finish_render(brw, irb->mt, irb->mt_level, - irb->mt_layer, irb->layer_count, - aux_usage); - } -} - -static void -brw_renderbuffer_move_temp_back(struct brw_context *brw, - struct brw_renderbuffer *irb) -{ - if (irb->align_wa_mt == NULL) - return; - - brw_cache_flush_for_read(brw, irb->align_wa_mt->bo); - - brw_miptree_copy_slice(brw, irb->align_wa_mt, 0, 0, - irb->mt, - irb->Base.Base.TexImage->Level, irb->mt_layer); - - brw_miptree_reference(&irb->align_wa_mt, NULL); - - /* Finally restore the x,y to correspond to full miptree. */ - brw_renderbuffer_set_draw_offset(irb); - - /* Make sure render surface state gets re-emitted with updated miptree. */ - brw->NewGLState |= _NEW_BUFFERS; -} - -static void -brw_postdraw_reconcile_align_wa_slices(struct brw_context *brw) -{ - struct gl_context *ctx = &brw->ctx; - struct gl_framebuffer *fb = ctx->DrawBuffer; - - struct brw_renderbuffer *depth_irb = - brw_get_renderbuffer(fb, BUFFER_DEPTH); - struct brw_renderbuffer *stencil_irb = - brw_get_renderbuffer(fb, BUFFER_STENCIL); - - if (depth_irb && depth_irb->align_wa_mt) - brw_renderbuffer_move_temp_back(brw, depth_irb); - - if (stencil_irb && stencil_irb->align_wa_mt) - brw_renderbuffer_move_temp_back(brw, stencil_irb); - - for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) { - struct brw_renderbuffer *irb = - brw_renderbuffer(fb->_ColorDrawBuffers[i]); - - if (!irb || irb->align_wa_mt == NULL) - continue; - - brw_renderbuffer_move_temp_back(brw, irb); - } -} - -static void -brw_prepare_drawing(struct gl_context *ctx, - const struct _mesa_index_buffer *ib, - bool index_bounds_valid, - GLuint min_index, - GLuint max_index) -{ - struct brw_context *brw = brw_context(ctx); - - if (ctx->NewState) - _mesa_update_state(ctx); - - /* We have to validate the textures *before* checking for fallbacks; - * otherwise, the software fallback won't be able to rely on the - * texture state, the firstLevel and lastLevel fields won't be - * set in the intel texture object (they'll both be 0), and the - * software fallback will segfault if it attempts to access any - * texture level other than level 0. - */ - brw_validate_textures(brw); - - /* Find the highest sampler unit used by each shader program. A bit-count - * won't work since ARB programs use the texture unit number as the sampler - * index. - */ - brw->wm.base.sampler_count = - BITSET_LAST_BIT(ctx->FragmentProgram._Current->info.textures_used); - brw->gs.base.sampler_count = ctx->GeometryProgram._Current ? - BITSET_LAST_BIT(ctx->GeometryProgram._Current->info.textures_used) : 0; - brw->tes.base.sampler_count = ctx->TessEvalProgram._Current ? - BITSET_LAST_BIT(ctx->TessEvalProgram._Current->info.textures_used) : 0; - brw->tcs.base.sampler_count = ctx->TessCtrlProgram._Current ? - BITSET_LAST_BIT(ctx->TessCtrlProgram._Current->info.textures_used) : 0; - brw->vs.base.sampler_count = - BITSET_LAST_BIT(ctx->VertexProgram._Current->info.textures_used); - - brw_prepare_render(brw); - - /* This workaround has to happen outside of brw_upload_render_state() - * because it may flush the batchbuffer for a blit, affecting the state - * flags. - */ - brw_workaround_depthstencil_alignment(brw, 0); - - /* Resolves must occur after updating renderbuffers, updating context state, - * and finalizing textures but before setting up any hardware state for - * this draw call. - */ - bool draw_aux_buffer_disabled[MAX_DRAW_BUFFERS] = { }; - brw_predraw_resolve_inputs(brw, true, draw_aux_buffer_disabled); - brw_predraw_resolve_framebuffer(brw, draw_aux_buffer_disabled); - - /* Bind all inputs, derive varying and size information: - */ - brw_clear_buffers(brw); - brw_merge_inputs(brw); - - brw->ib.ib = ib; - brw->ctx.NewDriverState |= BRW_NEW_INDICES; - - brw->vb.index_bounds_valid = index_bounds_valid; - brw->vb.min_index = min_index; - brw->vb.max_index = max_index; - brw->ctx.NewDriverState |= BRW_NEW_VERTICES; -} - -static void -brw_finish_drawing(struct gl_context *ctx) -{ - struct brw_context *brw = brw_context(ctx); - - if (brw->always_flush_batch) - brw_batch_flush(brw); - - brw_program_cache_check_size(brw); - brw_postdraw_reconcile_align_wa_slices(brw); - brw_postdraw_set_buffers_need_resolve(brw); - - if (brw->draw.draw_params_count_bo) { - brw_bo_unreference(brw->draw.draw_params_count_bo); - brw->draw.draw_params_count_bo = NULL; - } - - if (brw->draw.draw_params_bo) { - brw_bo_unreference(brw->draw.draw_params_bo); - brw->draw.draw_params_bo = NULL; - } - - if (brw->draw.derived_draw_params_bo) { - brw_bo_unreference(brw->draw.derived_draw_params_bo); - brw->draw.derived_draw_params_bo = NULL; - } -} - -/** - * Implement workarounds for preemption: - * - WaDisableMidObjectPreemptionForGSLineStripAdj - * - WaDisableMidObjectPreemptionForTrifanOrPolygon - * - WaDisableMidObjectPreemptionForLineLoop - * - WA#0798 - */ -static void -gfx9_emit_preempt_wa(struct brw_context *brw, - const struct _mesa_prim *prim, GLuint num_instances) -{ - bool object_preemption = true; - ASSERTED const struct intel_device_info *devinfo = &brw->screen->devinfo; - - /* Only apply these workarounds for gfx9 */ - assert(devinfo->ver == 9); - - /* WaDisableMidObjectPreemptionForGSLineStripAdj - * - * WA: Disable mid-draw preemption when draw-call is a linestrip_adj and - * GS is enabled. - */ - if (brw->primitive == _3DPRIM_LINESTRIP_ADJ && brw->gs.enabled) - object_preemption = false; - - /* WaDisableMidObjectPreemptionForTrifanOrPolygon - * - * TriFan miscompare in Execlist Preemption test. Cut index that is on a - * previous context. End the previous, the resume another context with a - * tri-fan or polygon, and the vertex count is corrupted. If we prempt - * again we will cause corruption. - * - * WA: Disable mid-draw preemption when draw-call has a tri-fan. - */ - if (brw->primitive == _3DPRIM_TRIFAN) - object_preemption = false; - - /* WaDisableMidObjectPreemptionForLineLoop - * - * VF Stats Counters Missing a vertex when preemption enabled. - * - * WA: Disable mid-draw preemption when the draw uses a lineloop - * topology. - */ - if (brw->primitive == _3DPRIM_LINELOOP) - object_preemption = false; - - /* WA#0798 - * - * VF is corrupting GAFS data when preempted on an instance boundary and - * replayed with instancing enabled. - * - * WA: Disable preemption when using instanceing. - */ - if (num_instances > 1) - object_preemption = false; - - brw_enable_obj_preemption(brw, object_preemption); -} - -/* May fail if out of video memory for texture or vbo upload, or on - * fallback conditions. - */ -static void -brw_draw_single_prim(struct gl_context *ctx, - const struct _mesa_prim *prim, - unsigned prim_id, - bool is_indexed, - GLuint num_instances, GLuint base_instance, - struct brw_transform_feedback_object *xfb_obj, - unsigned stream, - GLsizeiptr indirect_offset) -{ - struct brw_context *brw = brw_context(ctx); - const struct intel_device_info *devinfo = &brw->screen->devinfo; - bool fail_next; - bool is_indirect = brw->draw.draw_indirect_data != NULL; - - /* Flag BRW_NEW_DRAW_CALL on every draw. This allows us to have - * atoms that happen on every draw call. - */ - brw->ctx.NewDriverState |= BRW_NEW_DRAW_CALL; - - /* Flush the batch if the batch/state buffers are nearly full. We can - * grow them if needed, but this is not free, so we'd like to avoid it. - */ - brw_batch_require_space(brw, 1500); - brw_require_statebuffer_space(brw, 2400); - brw_batch_save_state(brw); - fail_next = brw_batch_saved_state_is_empty(brw); - - if (brw->num_instances != num_instances || - brw->basevertex != prim->basevertex || - brw->baseinstance != base_instance) { - brw->num_instances = num_instances; - brw->basevertex = prim->basevertex; - brw->baseinstance = base_instance; - if (prim_id > 0) { /* For i == 0 we just did this before the loop */ - brw->ctx.NewDriverState |= BRW_NEW_VERTICES; - brw_clear_buffers(brw); - } - } - - /* Determine if we need to flag BRW_NEW_VERTICES for updating the - * gl_BaseVertexARB or gl_BaseInstanceARB values. For indirect draw, we - * always flag if the shader uses one of the values. For direct draws, - * we only flag if the values change. - */ - const int new_firstvertex = - is_indexed ? prim->basevertex : prim->start; - const int new_baseinstance = base_instance; - const struct brw_vs_prog_data *vs_prog_data = - brw_vs_prog_data(brw->vs.base.prog_data); - if (prim_id > 0) { - const bool uses_draw_parameters = - vs_prog_data->uses_firstvertex || - vs_prog_data->uses_baseinstance; - - if ((uses_draw_parameters && is_indirect) || - (vs_prog_data->uses_firstvertex && - brw->draw.params.firstvertex != new_firstvertex) || - (vs_prog_data->uses_baseinstance && - brw->draw.params.gl_baseinstance != new_baseinstance)) - brw->ctx.NewDriverState |= BRW_NEW_VERTICES; - } - - brw->draw.params.firstvertex = new_firstvertex; - brw->draw.params.gl_baseinstance = new_baseinstance; - brw_bo_unreference(brw->draw.draw_params_bo); - - if (is_indirect) { - /* Point draw_params_bo at the indirect buffer. */ - brw->draw.draw_params_bo = - brw_buffer_object(ctx->DrawIndirectBuffer)->buffer; - brw_bo_reference(brw->draw.draw_params_bo); - brw->draw.draw_params_offset = - indirect_offset + (is_indexed ? 12 : 8); - } else { - /* Set draw_params_bo to NULL so brw_prepare_vertices knows it - * has to upload gl_BaseVertex and such if they're needed. - */ - brw->draw.draw_params_bo = NULL; - brw->draw.draw_params_offset = 0; - } - - /* gl_DrawID always needs its own vertex buffer since it's not part of - * the indirect parameter buffer. Same for is_indexed_draw, which shares - * the buffer with gl_DrawID. If the program uses gl_DrawID, we need to - * flag BRW_NEW_VERTICES. For the first iteration, we don't have valid - * vs_prog_data, but we always flag BRW_NEW_VERTICES before the loop. - */ - if (prim_id > 0 && vs_prog_data->uses_drawid) - brw->ctx.NewDriverState |= BRW_NEW_VERTICES; - - brw->draw.derived_params.gl_drawid = prim->draw_id; - brw->draw.derived_params.is_indexed_draw = is_indexed ? ~0 : 0; - - brw_bo_unreference(brw->draw.derived_draw_params_bo); - brw->draw.derived_draw_params_bo = NULL; - brw->draw.derived_draw_params_offset = 0; - - if (devinfo->ver < 6) - brw_set_prim(brw, prim); - else - gfx6_set_prim(brw, prim); - -retry: - - /* Note that before the loop, brw->ctx.NewDriverState was set to != 0, and - * that the state updated in the loop outside of this block is that in - * *_set_prim or brw_batch_flush(), which only impacts - * brw->ctx.NewDriverState. - */ - if (brw->ctx.NewDriverState) { - brw->batch.no_wrap = true; - brw_upload_render_state(brw); - } - - if (devinfo->ver == 9) - gfx9_emit_preempt_wa(brw, prim, num_instances); - - brw_emit_prim(brw, prim, brw->primitive, is_indexed, num_instances, - base_instance, xfb_obj, stream, is_indirect, - indirect_offset); - - brw->batch.no_wrap = false; - - if (!brw_batch_has_aperture_space(brw, 0)) { - if (!fail_next) { - brw_batch_reset_to_saved(brw); - brw_batch_flush(brw); - fail_next = true; - goto retry; - } else { - int ret = brw_batch_flush(brw); - WARN_ONCE(ret == -ENOSPC, - "i965: Single primitive emit exceeded " - "available aperture space\n"); - } - } - - /* Now that we know we haven't run out of aperture space, we can safely - * reset the dirty bits. - */ - if (brw->ctx.NewDriverState) - brw_render_state_finished(brw); - - return; -} - - - -void -brw_draw_prims(struct gl_context *ctx, - const struct _mesa_prim *prims, - unsigned nr_prims, - const struct _mesa_index_buffer *ib, - bool index_bounds_valid, - bool primitive_restart, - unsigned restart_index, - unsigned min_index, - unsigned max_index, - unsigned num_instances, - unsigned base_instance) -{ - unsigned i; - struct brw_context *brw = brw_context(ctx); - int predicate_state = brw->predicate.state; - - if (!brw_check_conditional_render(brw)) - return; - - /* Handle primitive restart if needed */ - if (brw_handle_primitive_restart(ctx, prims, nr_prims, ib, num_instances, - base_instance, primitive_restart, - restart_index)) { - /* The draw was handled, so we can exit now */ - return; - } - - /* Do GL_SELECT and GL_FEEDBACK rendering using swrast, even though it - * won't support all the extensions we support. - */ - if (ctx->RenderMode != GL_RENDER) { - perf_debug("%s render mode not supported in hardware\n", - _mesa_enum_to_string(ctx->RenderMode)); - _swsetup_Wakeup(ctx); - _tnl_wakeup(ctx); - _tnl_draw(ctx, prims, nr_prims, ib, index_bounds_valid, - primitive_restart, restart_index, min_index, - max_index, num_instances, base_instance); - return; - } - - /* If we're going to have to upload any of the user's vertex arrays, then - * get the minimum and maximum of their index buffer so we know what range - * to upload. - */ - if (!index_bounds_valid && _mesa_draw_user_array_bits(ctx) != 0) { - perf_debug("Scanning index buffer to compute index buffer bounds. " - "Use glDrawRangeElements() to avoid this.\n"); - vbo_get_minmax_indices(ctx, prims, ib, &min_index, &max_index, nr_prims, - primitive_restart, restart_index); - index_bounds_valid = true; - } - - brw_prepare_drawing(ctx, ib, index_bounds_valid, min_index, max_index); - /* Try drawing with the hardware, but don't do anything else if we can't - * manage it. swrast doesn't support our featureset, so we can't fall back - * to it. - */ - - for (i = 0; i < nr_prims; i++) { - /* Implementation of ARB_indirect_parameters via predicates */ - if (brw->draw.draw_params_count_bo) { - brw_emit_pipe_control_flush(brw, PIPE_CONTROL_FLUSH_ENABLE); - - /* Upload the current draw count from the draw parameters buffer to - * MI_PREDICATE_SRC0. - */ - brw_load_register_mem(brw, MI_PREDICATE_SRC0, - brw->draw.draw_params_count_bo, - brw->draw.draw_params_count_offset); - /* Zero the top 32-bits of MI_PREDICATE_SRC0 */ - brw_load_register_imm32(brw, MI_PREDICATE_SRC0 + 4, 0); - /* Upload the id of the current primitive to MI_PREDICATE_SRC1. */ - brw_load_register_imm64(brw, MI_PREDICATE_SRC1, prims[i].draw_id); - - BEGIN_BATCH(1); - if (i == 0 && brw->predicate.state != BRW_PREDICATE_STATE_USE_BIT) { - OUT_BATCH(GFX7_MI_PREDICATE | MI_PREDICATE_LOADOP_LOADINV | - MI_PREDICATE_COMBINEOP_SET | - MI_PREDICATE_COMPAREOP_SRCS_EQUAL); - } else { - OUT_BATCH(GFX7_MI_PREDICATE | - MI_PREDICATE_LOADOP_LOAD | MI_PREDICATE_COMBINEOP_XOR | - MI_PREDICATE_COMPAREOP_SRCS_EQUAL); - } - ADVANCE_BATCH(); - - brw->predicate.state = BRW_PREDICATE_STATE_USE_BIT; - } - - brw_draw_single_prim(ctx, &prims[i], i, ib != NULL, num_instances, - base_instance, NULL, 0, - brw->draw.draw_indirect_offset + - brw->draw.draw_indirect_stride * i); - } - - brw_finish_drawing(ctx); - brw->predicate.state = predicate_state; -} - -static void -brw_draw_transform_feedback(struct gl_context *ctx, GLenum mode, - unsigned num_instances, unsigned stream, - struct gl_transform_feedback_object *gl_xfb_obj) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_transform_feedback_object *xfb_obj = - (struct brw_transform_feedback_object *) gl_xfb_obj; - - if (!brw_check_conditional_render(brw)) - return; - - /* Do GL_SELECT and GL_FEEDBACK rendering using swrast, even though it - * won't support all the extensions we support. - */ - if (ctx->RenderMode != GL_RENDER) { - perf_debug("%s render mode not supported in hardware\n", - _mesa_enum_to_string(ctx->RenderMode)); - /* swrast doesn't support DrawTransformFeedback. Nothing to do. */ - return; - } - - brw_prepare_drawing(ctx, NULL, false, 0, ~0); - - struct _mesa_prim prim; - memset(&prim, 0, sizeof(prim)); - prim.begin = 1; - prim.end = 1; - prim.mode = mode; - - /* Try drawing with the hardware, but don't do anything else if we can't - * manage it. swrast doesn't support our featureset, so we can't fall back - * to it. - */ - brw_draw_single_prim(ctx, &prim, 0, false, num_instances, 0, xfb_obj, - stream, 0); - brw_finish_drawing(ctx); -} - -void -brw_draw_indirect_prims(struct gl_context *ctx, - GLuint mode, - struct gl_buffer_object *indirect_data, - GLsizeiptr indirect_offset, - unsigned draw_count, - unsigned stride, - struct gl_buffer_object *indirect_params, - GLsizeiptr indirect_params_offset, - const struct _mesa_index_buffer *ib, - bool primitive_restart, - unsigned restart_index) -{ - struct brw_context *brw = brw_context(ctx); - struct _mesa_prim *prim; - GLsizei i; - - prim = calloc(draw_count, sizeof(*prim)); - if (prim == NULL) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "gl%sDraw%sIndirect%s", - (draw_count > 1) ? "Multi" : "", - ib ? "Elements" : "Arrays", - indirect_params ? "CountARB" : ""); - return; - } - - brw->draw.draw_indirect_stride = stride; - brw->draw.draw_indirect_offset = indirect_offset; - - prim[0].begin = 1; - prim[draw_count - 1].end = 1; - for (i = 0; i < draw_count; ++i) { - prim[i].mode = mode; - prim[i].draw_id = i; - } - - if (indirect_params) { - brw->draw.draw_params_count_bo = - brw_buffer_object(indirect_params)->buffer; - brw_bo_reference(brw->draw.draw_params_count_bo); - brw->draw.draw_params_count_offset = indirect_params_offset; - } - - brw->draw.draw_indirect_data = indirect_data; - - brw_draw_prims(ctx, prim, draw_count, ib, false, primitive_restart, - restart_index, 0, ~0, 0, 0); - - brw->draw.draw_indirect_data = NULL; - free(prim); -} - -void -brw_init_draw_functions(struct dd_function_table *functions) -{ - /* Register our drawing function: - */ - functions->Draw = brw_draw_prims; - functions->DrawTransformFeedback = brw_draw_transform_feedback; - functions->DrawIndirect = brw_draw_indirect_prims; -} - -void -brw_draw_init(struct brw_context *brw) -{ - for (int i = 0; i < VERT_ATTRIB_MAX; i++) - brw->vb.inputs[i].buffer = -1; - brw->vb.nr_buffers = 0; - brw->vb.nr_enabled = 0; -} - -void -brw_draw_destroy(struct brw_context *brw) -{ - unsigned i; - - for (i = 0; i < brw->vb.nr_buffers; i++) { - brw_bo_unreference(brw->vb.buffers[i].bo); - brw->vb.buffers[i].bo = NULL; - } - brw->vb.nr_buffers = 0; - - for (i = 0; i < brw->vb.nr_enabled; i++) { - brw->vb.enabled[i]->buffer = -1; - } - brw->vb.nr_enabled = 0; - - brw_bo_unreference(brw->ib.bo); - brw->ib.bo = NULL; -} diff --git a/src/mesa/drivers/dri/i965/brw_draw.h b/src/mesa/drivers/dri/i965/brw_draw.h deleted file mode 100644 index d9ab2f3..0000000 --- a/src/mesa/drivers/dri/i965/brw_draw.h +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Copyright 2005 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef BRW_DRAW_H -#define BRW_DRAW_H - -#include "main/mtypes.h" -#include "brw_bufmgr.h" - -struct brw_context; - -uint32_t * -brw_emit_vertex_buffer_state(struct brw_context *brw, - unsigned buffer_nr, - struct brw_bo *bo, - unsigned start_offset, - unsigned end_offset, - unsigned stride, - unsigned step_rate, - uint32_t *__map); - -#define EMIT_VERTEX_BUFFER_STATE(...) __map = \ - brw_emit_vertex_buffer_state(__VA_ARGS__, __map) - -void brw_draw_prims(struct gl_context *ctx, - const struct _mesa_prim *prims, - unsigned nr_prims, - const struct _mesa_index_buffer *ib, - bool index_bounds_valid, - bool primitive_restart, - unsigned restart_index, - unsigned min_index, - unsigned max_index, - unsigned num_instances, - unsigned base_instance); - -void brw_init_draw_functions(struct dd_function_table *functions); -void brw_draw_init( struct brw_context *brw ); -void brw_draw_destroy( struct brw_context *brw ); - -void brw_prepare_shader_draw_parameters(struct brw_context *); - -/* brw_primitive_restart.c */ -GLboolean -brw_handle_primitive_restart(struct gl_context *ctx, - const struct _mesa_prim *prims, - GLuint nr_prims, - const struct _mesa_index_buffer *ib, - GLuint num_instances, GLuint base_instance, - bool primitive_restart, - unsigned restart_index); - -void -brw_draw_indirect_prims(struct gl_context *ctx, - GLuint mode, - struct gl_buffer_object *indirect_data, - GLsizeiptr indirect_offset, - unsigned draw_count, - unsigned stride, - struct gl_buffer_object *indirect_params, - GLsizeiptr indirect_params_offset, - const struct _mesa_index_buffer *ib, - bool primitive_restart, - unsigned restart_index); -#endif diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c deleted file mode 100644 index 656159e..0000000 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ /dev/null @@ -1,801 +0,0 @@ -/* - * Copyright 2003 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "main/arrayobj.h" -#include "main/bufferobj.h" -#include "main/context.h" -#include "main/enums.h" -#include "main/macros.h" -#include "main/glformats.h" -#include "nir.h" - -#include "brw_draw.h" -#include "brw_defines.h" -#include "brw_context.h" -#include "brw_state.h" - -#include "brw_batch.h" -#include "brw_buffer_objects.h" - -static const GLuint double_types_float[5] = { - 0, - ISL_FORMAT_R64_FLOAT, - ISL_FORMAT_R64G64_FLOAT, - ISL_FORMAT_R64G64B64_FLOAT, - ISL_FORMAT_R64G64B64A64_FLOAT -}; - -static const GLuint double_types_passthru[5] = { - 0, - ISL_FORMAT_R64_PASSTHRU, - ISL_FORMAT_R64G64_PASSTHRU, - ISL_FORMAT_R64G64B64_PASSTHRU, - ISL_FORMAT_R64G64B64A64_PASSTHRU -}; - -static const GLuint float_types[5] = { - 0, - ISL_FORMAT_R32_FLOAT, - ISL_FORMAT_R32G32_FLOAT, - ISL_FORMAT_R32G32B32_FLOAT, - ISL_FORMAT_R32G32B32A32_FLOAT -}; - -static const GLuint half_float_types[5] = { - 0, - ISL_FORMAT_R16_FLOAT, - ISL_FORMAT_R16G16_FLOAT, - ISL_FORMAT_R16G16B16_FLOAT, - ISL_FORMAT_R16G16B16A16_FLOAT -}; - -static const GLuint fixed_point_types[5] = { - 0, - ISL_FORMAT_R32_SFIXED, - ISL_FORMAT_R32G32_SFIXED, - ISL_FORMAT_R32G32B32_SFIXED, - ISL_FORMAT_R32G32B32A32_SFIXED, -}; - -static const GLuint uint_types_direct[5] = { - 0, - ISL_FORMAT_R32_UINT, - ISL_FORMAT_R32G32_UINT, - ISL_FORMAT_R32G32B32_UINT, - ISL_FORMAT_R32G32B32A32_UINT -}; - -static const GLuint uint_types_norm[5] = { - 0, - ISL_FORMAT_R32_UNORM, - ISL_FORMAT_R32G32_UNORM, - ISL_FORMAT_R32G32B32_UNORM, - ISL_FORMAT_R32G32B32A32_UNORM -}; - -static const GLuint uint_types_scale[5] = { - 0, - ISL_FORMAT_R32_USCALED, - ISL_FORMAT_R32G32_USCALED, - ISL_FORMAT_R32G32B32_USCALED, - ISL_FORMAT_R32G32B32A32_USCALED -}; - -static const GLuint int_types_direct[5] = { - 0, - ISL_FORMAT_R32_SINT, - ISL_FORMAT_R32G32_SINT, - ISL_FORMAT_R32G32B32_SINT, - ISL_FORMAT_R32G32B32A32_SINT -}; - -static const GLuint int_types_norm[5] = { - 0, - ISL_FORMAT_R32_SNORM, - ISL_FORMAT_R32G32_SNORM, - ISL_FORMAT_R32G32B32_SNORM, - ISL_FORMAT_R32G32B32A32_SNORM -}; - -static const GLuint int_types_scale[5] = { - 0, - ISL_FORMAT_R32_SSCALED, - ISL_FORMAT_R32G32_SSCALED, - ISL_FORMAT_R32G32B32_SSCALED, - ISL_FORMAT_R32G32B32A32_SSCALED -}; - -static const GLuint ushort_types_direct[5] = { - 0, - ISL_FORMAT_R16_UINT, - ISL_FORMAT_R16G16_UINT, - ISL_FORMAT_R16G16B16_UINT, - ISL_FORMAT_R16G16B16A16_UINT -}; - -static const GLuint ushort_types_norm[5] = { - 0, - ISL_FORMAT_R16_UNORM, - ISL_FORMAT_R16G16_UNORM, - ISL_FORMAT_R16G16B16_UNORM, - ISL_FORMAT_R16G16B16A16_UNORM -}; - -static const GLuint ushort_types_scale[5] = { - 0, - ISL_FORMAT_R16_USCALED, - ISL_FORMAT_R16G16_USCALED, - ISL_FORMAT_R16G16B16_USCALED, - ISL_FORMAT_R16G16B16A16_USCALED -}; - -static const GLuint short_types_direct[5] = { - 0, - ISL_FORMAT_R16_SINT, - ISL_FORMAT_R16G16_SINT, - ISL_FORMAT_R16G16B16_SINT, - ISL_FORMAT_R16G16B16A16_SINT -}; - -static const GLuint short_types_norm[5] = { - 0, - ISL_FORMAT_R16_SNORM, - ISL_FORMAT_R16G16_SNORM, - ISL_FORMAT_R16G16B16_SNORM, - ISL_FORMAT_R16G16B16A16_SNORM -}; - -static const GLuint short_types_scale[5] = { - 0, - ISL_FORMAT_R16_SSCALED, - ISL_FORMAT_R16G16_SSCALED, - ISL_FORMAT_R16G16B16_SSCALED, - ISL_FORMAT_R16G16B16A16_SSCALED -}; - -static const GLuint ubyte_types_direct[5] = { - 0, - ISL_FORMAT_R8_UINT, - ISL_FORMAT_R8G8_UINT, - ISL_FORMAT_R8G8B8_UINT, - ISL_FORMAT_R8G8B8A8_UINT -}; - -static const GLuint ubyte_types_norm[5] = { - 0, - ISL_FORMAT_R8_UNORM, - ISL_FORMAT_R8G8_UNORM, - ISL_FORMAT_R8G8B8_UNORM, - ISL_FORMAT_R8G8B8A8_UNORM -}; - -static const GLuint ubyte_types_scale[5] = { - 0, - ISL_FORMAT_R8_USCALED, - ISL_FORMAT_R8G8_USCALED, - ISL_FORMAT_R8G8B8_USCALED, - ISL_FORMAT_R8G8B8A8_USCALED -}; - -static const GLuint byte_types_direct[5] = { - 0, - ISL_FORMAT_R8_SINT, - ISL_FORMAT_R8G8_SINT, - ISL_FORMAT_R8G8B8_SINT, - ISL_FORMAT_R8G8B8A8_SINT -}; - -static const GLuint byte_types_norm[5] = { - 0, - ISL_FORMAT_R8_SNORM, - ISL_FORMAT_R8G8_SNORM, - ISL_FORMAT_R8G8B8_SNORM, - ISL_FORMAT_R8G8B8A8_SNORM -}; - -static const GLuint byte_types_scale[5] = { - 0, - ISL_FORMAT_R8_SSCALED, - ISL_FORMAT_R8G8_SSCALED, - ISL_FORMAT_R8G8B8_SSCALED, - ISL_FORMAT_R8G8B8A8_SSCALED -}; - -static GLuint -double_types(int size, GLboolean doubles) -{ - /* From the BDW PRM, Volume 2d, page 588 (VERTEX_ELEMENT_STATE): - * "When SourceElementFormat is set to one of the *64*_PASSTHRU formats, - * 64-bit components are stored in the URB without any conversion." - * Also included on BDW PRM, Volume 7, page 470, table "Source Element - * Formats Supported in VF Unit" - * - * Previous PRMs don't include those references, so for gfx7 we can't use - * PASSTHRU formats directly. But in any case, we prefer to return passthru - * even in that case, because that reflects what we want to achieve, even - * if we would need to workaround on gen < 8. - */ - return (doubles - ? double_types_passthru[size] - : double_types_float[size]); -} - -/** - * Given vertex array type/size/format/normalized info, return - * the appopriate hardware surface type. - * Format will be GL_RGBA or possibly GL_BGRA for GLubyte[4] color arrays. - */ -unsigned -brw_get_vertex_surface_type(struct brw_context *brw, - const struct gl_vertex_format *glformat) -{ - int size = glformat->Size; - const struct intel_device_info *devinfo = &brw->screen->devinfo; - const bool is_ivybridge_or_older = - devinfo->verx10 < 70 || devinfo->platform == INTEL_PLATFORM_IVB; - - if (INTEL_DEBUG(DEBUG_VERTS)) - fprintf(stderr, "type %s size %d normalized %d\n", - _mesa_enum_to_string(glformat->Type), - glformat->Size, glformat->Normalized); - - if (glformat->Integer) { - assert(glformat->Format == GL_RGBA); /* sanity check */ - switch (glformat->Type) { - case GL_INT: return int_types_direct[size]; - case GL_SHORT: - if (is_ivybridge_or_older && size == 3) - return short_types_direct[4]; - else - return short_types_direct[size]; - case GL_BYTE: - if (is_ivybridge_or_older && size == 3) - return byte_types_direct[4]; - else - return byte_types_direct[size]; - case GL_UNSIGNED_INT: return uint_types_direct[size]; - case GL_UNSIGNED_SHORT: - if (is_ivybridge_or_older && size == 3) - return ushort_types_direct[4]; - else - return ushort_types_direct[size]; - case GL_UNSIGNED_BYTE: - if (is_ivybridge_or_older && size == 3) - return ubyte_types_direct[4]; - else - return ubyte_types_direct[size]; - default: unreachable("not reached"); - } - } else if (glformat->Type == GL_UNSIGNED_INT_10F_11F_11F_REV) { - return ISL_FORMAT_R11G11B10_FLOAT; - } else if (glformat->Normalized) { - switch (glformat->Type) { - case GL_DOUBLE: return double_types(size, glformat->Doubles); - case GL_FLOAT: return float_types[size]; - case GL_HALF_FLOAT: - case GL_HALF_FLOAT_OES: - if (devinfo->ver < 6 && size == 3) - return half_float_types[4]; - else - return half_float_types[size]; - case GL_INT: return int_types_norm[size]; - case GL_SHORT: return short_types_norm[size]; - case GL_BYTE: return byte_types_norm[size]; - case GL_UNSIGNED_INT: return uint_types_norm[size]; - case GL_UNSIGNED_SHORT: return ushort_types_norm[size]; - case GL_UNSIGNED_BYTE: - if (glformat->Format == GL_BGRA) { - /* See GL_EXT_vertex_array_bgra */ - assert(size == 4); - return ISL_FORMAT_B8G8R8A8_UNORM; - } - else { - return ubyte_types_norm[size]; - } - case GL_FIXED: - if (devinfo->verx10 >= 75) - return fixed_point_types[size]; - - /* This produces GL_FIXED inputs as values between INT32_MIN and - * INT32_MAX, which will be scaled down by 1/65536 by the VS. - */ - return int_types_scale[size]; - /* See GL_ARB_vertex_type_2_10_10_10_rev. - * W/A: Pre-Haswell, the hardware doesn't really support the formats we'd - * like to use here, so upload everything as UINT and fix - * it in the shader - */ - case GL_INT_2_10_10_10_REV: - assert(size == 4); - if (devinfo->verx10 >= 75) { - return glformat->Format == GL_BGRA - ? ISL_FORMAT_B10G10R10A2_SNORM - : ISL_FORMAT_R10G10B10A2_SNORM; - } - return ISL_FORMAT_R10G10B10A2_UINT; - case GL_UNSIGNED_INT_2_10_10_10_REV: - assert(size == 4); - if (devinfo->verx10 >= 75) { - return glformat->Format == GL_BGRA - ? ISL_FORMAT_B10G10R10A2_UNORM - : ISL_FORMAT_R10G10B10A2_UNORM; - } - return ISL_FORMAT_R10G10B10A2_UINT; - default: unreachable("not reached"); - } - } - else { - /* See GL_ARB_vertex_type_2_10_10_10_rev. - * W/A: the hardware doesn't really support the formats we'd - * like to use here, so upload everything as UINT and fix - * it in the shader - */ - if (glformat->Type == GL_INT_2_10_10_10_REV) { - assert(size == 4); - if (devinfo->verx10 >= 75) { - return glformat->Format == GL_BGRA - ? ISL_FORMAT_B10G10R10A2_SSCALED - : ISL_FORMAT_R10G10B10A2_SSCALED; - } - return ISL_FORMAT_R10G10B10A2_UINT; - } else if (glformat->Type == GL_UNSIGNED_INT_2_10_10_10_REV) { - assert(size == 4); - if (devinfo->verx10 >= 75) { - return glformat->Format == GL_BGRA - ? ISL_FORMAT_B10G10R10A2_USCALED - : ISL_FORMAT_R10G10B10A2_USCALED; - } - return ISL_FORMAT_R10G10B10A2_UINT; - } - assert(glformat->Format == GL_RGBA); /* sanity check */ - switch (glformat->Type) { - case GL_DOUBLE: return double_types(size, glformat->Doubles); - case GL_FLOAT: return float_types[size]; - case GL_HALF_FLOAT: - case GL_HALF_FLOAT_OES: - if (devinfo->ver < 6 && size == 3) - return half_float_types[4]; - else - return half_float_types[size]; - case GL_INT: return int_types_scale[size]; - case GL_SHORT: return short_types_scale[size]; - case GL_BYTE: return byte_types_scale[size]; - case GL_UNSIGNED_INT: return uint_types_scale[size]; - case GL_UNSIGNED_SHORT: return ushort_types_scale[size]; - case GL_UNSIGNED_BYTE: return ubyte_types_scale[size]; - case GL_FIXED: - if (devinfo->verx10 >= 75) - return fixed_point_types[size]; - - /* This produces GL_FIXED inputs as values between INT32_MIN and - * INT32_MAX, which will be scaled down by 1/65536 by the VS. - */ - return int_types_scale[size]; - default: unreachable("not reached"); - } - } -} - -static void -copy_array_to_vbo_array(struct brw_context *brw, - const uint8_t *const ptr, const int src_stride, - int min, int max, - struct brw_vertex_buffer *buffer, - GLuint dst_stride) -{ - const unsigned char *src = ptr + min * src_stride; - int count = max - min + 1; - GLuint size = count * dst_stride; - uint8_t *dst = brw_upload_space(&brw->upload, size, dst_stride, - &buffer->bo, &buffer->offset); - - /* The GL 4.5 spec says: - * "If any enabled array’s buffer binding is zero when DrawArrays or - * one of the other drawing commands defined in section 10.4 is called, - * the result is undefined." - * - * In this case, let's the dst with undefined values - */ - if (ptr != NULL) { - if (dst_stride == src_stride) { - memcpy(dst, src, size); - } else { - while (count--) { - memcpy(dst, src, dst_stride); - src += src_stride; - dst += dst_stride; - } - } - } - buffer->stride = dst_stride; - buffer->size = size; -} - -void -brw_prepare_vertices(struct brw_context *brw) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - struct gl_context *ctx = &brw->ctx; - /* BRW_NEW_VERTEX_PROGRAM */ - const struct gl_program *vp = brw->programs[MESA_SHADER_VERTEX]; - /* BRW_NEW_VS_PROG_DATA */ - const struct brw_vs_prog_data *vs_prog_data = - brw_vs_prog_data(brw->vs.base.prog_data); - const uint64_t vs_inputs64 = - nir_get_single_slot_attribs_mask(vs_prog_data->inputs_read, - vp->DualSlotInputs); - assert((vs_inputs64 & ~(uint64_t)VERT_BIT_ALL) == 0); - unsigned vs_inputs = (unsigned)vs_inputs64; - unsigned int min_index = brw->vb.min_index + brw->basevertex; - unsigned int max_index = brw->vb.max_index + brw->basevertex; - int delta, j; - - /* _NEW_POLYGON - * - * On gfx6+, edge flags don't end up in the VUE (either in or out of the - * VS). Instead, they're uploaded as the last vertex element, and the data - * is passed sideband through the fixed function units. So, we need to - * prepare the vertex buffer for it, but it's not present in inputs_read. - */ - if (devinfo->ver >= 6 && (ctx->Polygon.FrontMode != GL_FILL || - ctx->Polygon.BackMode != GL_FILL)) { - vs_inputs |= VERT_BIT_EDGEFLAG; - } - - if (0) - fprintf(stderr, "%s %d..%d\n", __func__, min_index, max_index); - - /* Accumulate the list of enabled arrays. */ - brw->vb.nr_enabled = 0; - - unsigned mask = vs_inputs; - while (mask) { - const gl_vert_attrib attr = u_bit_scan(&mask); - struct brw_vertex_element *input = &brw->vb.inputs[attr]; - brw->vb.enabled[brw->vb.nr_enabled++] = input; - } - assert(brw->vb.nr_enabled <= VERT_ATTRIB_MAX); - - if (brw->vb.nr_enabled == 0) - return; - - if (brw->vb.nr_buffers) - return; - - j = 0; - const struct gl_vertex_array_object *vao = ctx->Array._DrawVAO; - - unsigned vbomask = vs_inputs & _mesa_draw_vbo_array_bits(ctx); - while (vbomask) { - const struct gl_vertex_buffer_binding *const glbinding = - _mesa_draw_buffer_binding(vao, ffs(vbomask) - 1); - const GLsizei stride = glbinding->Stride; - - assert(glbinding->BufferObj); - - /* Accumulate the range of a single vertex, start with inverted range */ - uint32_t vertex_range_start = ~(uint32_t)0; - uint32_t vertex_range_end = 0; - - const unsigned boundmask = _mesa_draw_bound_attrib_bits(glbinding); - unsigned attrmask = vbomask & boundmask; - /* Mark the those attributes as processed */ - vbomask ^= attrmask; - /* We can assume that we have an array for the binding */ - assert(attrmask); - /* Walk attributes belonging to the binding */ - while (attrmask) { - const gl_vert_attrib attr = u_bit_scan(&attrmask); - const struct gl_array_attributes *const glattrib = - _mesa_draw_array_attrib(vao, attr); - const uint32_t rel_offset = - _mesa_draw_attributes_relative_offset(glattrib); - const uint32_t rel_end = rel_offset + glattrib->Format._ElementSize; - - vertex_range_start = MIN2(vertex_range_start, rel_offset); - vertex_range_end = MAX2(vertex_range_end, rel_end); - - struct brw_vertex_element *input = &brw->vb.inputs[attr]; - input->glformat = &glattrib->Format; - input->buffer = j; - input->is_dual_slot = (vp->DualSlotInputs & BITFIELD64_BIT(attr)) != 0; - input->offset = rel_offset; - } - assert(vertex_range_start <= vertex_range_end); - - struct brw_buffer_object *intel_buffer = - brw_buffer_object(glbinding->BufferObj); - struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; - - const uint32_t offset = _mesa_draw_binding_offset(glbinding); - - /* If nothing else is known take the buffer size and offset as a bound */ - uint32_t start = vertex_range_start; - uint32_t range = intel_buffer->Base.Size - offset - vertex_range_start; - /* Check if we can get a more narrow range */ - if (glbinding->InstanceDivisor) { - if (brw->num_instances) { - const uint32_t vertex_size = vertex_range_end - vertex_range_start; - start = vertex_range_start + stride * brw->baseinstance; - range = (stride * ((brw->num_instances - 1) / - glbinding->InstanceDivisor) + - vertex_size); - } - } else { - if (brw->vb.index_bounds_valid) { - const uint32_t vertex_size = vertex_range_end - vertex_range_start; - start = vertex_range_start + stride * min_index; - range = (stride * (max_index - min_index) + - vertex_size); - - /** - * Unreal Engine 4 has a bug in usage of glDrawRangeElements, - * causing it to be called with a number of vertices in place - * of "end" parameter (which specifies the maximum array index - * contained in indices). - * - * Since there is unknown amount of games affected and we - * could not identify that a game is built with UE4 - we are - * forced to make a blanket workaround, disregarding max_index - * in range calculations. Fortunately all such calls look like: - * glDrawRangeElements(GL_TRIANGLES, 0, 3, 3, ...); - * So we are able to narrow down this workaround. - * - * See: https://gitlab.freedesktop.org/mesa/mesa/-/issues/2917 - */ - if (unlikely(max_index == 3 && min_index == 0 && - brw->draw.derived_params.is_indexed_draw)) { - range = intel_buffer->Base.Size - offset - start; - } - } - } - - buffer->offset = offset; - buffer->size = start + range; - buffer->stride = stride; - buffer->step_rate = glbinding->InstanceDivisor; - - buffer->bo = brw_bufferobj_buffer(brw, intel_buffer, offset + start, - range, false); - brw_bo_reference(buffer->bo); - - j++; - } - - /* If we need to upload all the arrays, then we can trim those arrays to - * only the used elements [min_index, max_index] so long as we adjust all - * the values used in the 3DPRIMITIVE i.e. by setting the vertex bias. - */ - brw->vb.start_vertex_bias = 0; - delta = min_index; - if ((vs_inputs & _mesa_draw_vbo_array_bits(ctx)) == 0) { - brw->vb.start_vertex_bias = -delta; - delta = 0; - } - - unsigned usermask = vs_inputs & _mesa_draw_user_array_bits(ctx); - while (usermask) { - const struct gl_vertex_buffer_binding *const glbinding = - _mesa_draw_buffer_binding(vao, ffs(usermask) - 1); - const GLsizei stride = glbinding->Stride; - - assert(!glbinding->BufferObj); - assert(brw->vb.index_bounds_valid); - - /* Accumulate the range of a single vertex, start with inverted range */ - uint32_t vertex_range_start = ~(uint32_t)0; - uint32_t vertex_range_end = 0; - - const unsigned boundmask = _mesa_draw_bound_attrib_bits(glbinding); - unsigned attrmask = usermask & boundmask; - /* Mark the those attributes as processed */ - usermask ^= attrmask; - /* We can assume that we have an array for the binding */ - assert(attrmask); - /* Walk attributes belonging to the binding */ - while (attrmask) { - const gl_vert_attrib attr = u_bit_scan(&attrmask); - const struct gl_array_attributes *const glattrib = - _mesa_draw_array_attrib(vao, attr); - const uint32_t rel_offset = - _mesa_draw_attributes_relative_offset(glattrib); - const uint32_t rel_end = rel_offset + glattrib->Format._ElementSize; - - vertex_range_start = MIN2(vertex_range_start, rel_offset); - vertex_range_end = MAX2(vertex_range_end, rel_end); - - struct brw_vertex_element *input = &brw->vb.inputs[attr]; - input->glformat = &glattrib->Format; - input->buffer = j; - input->is_dual_slot = (vp->DualSlotInputs & BITFIELD64_BIT(attr)) != 0; - input->offset = rel_offset; - } - assert(vertex_range_start <= vertex_range_end); - - struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; - - const uint8_t *ptr = (const uint8_t*)_mesa_draw_binding_offset(glbinding); - ptr += vertex_range_start; - const uint32_t vertex_size = vertex_range_end - vertex_range_start; - if (glbinding->Stride == 0) { - /* If the source stride is zero, we just want to upload the current - * attribute once and set the buffer's stride to 0. There's no need - * to replicate it out. - */ - copy_array_to_vbo_array(brw, ptr, 0, 0, 0, buffer, vertex_size); - } else if (glbinding->InstanceDivisor == 0) { - copy_array_to_vbo_array(brw, ptr, stride, min_index, - max_index, buffer, vertex_size); - } else { - /* This is an instanced attribute, since its InstanceDivisor - * is not zero. Therefore, its data will be stepped after the - * instanced draw has been run InstanceDivisor times. - */ - uint32_t instanced_attr_max_index = - (brw->num_instances - 1) / glbinding->InstanceDivisor; - copy_array_to_vbo_array(brw, ptr, stride, 0, - instanced_attr_max_index, buffer, vertex_size); - } - buffer->offset -= delta * buffer->stride + vertex_range_start; - buffer->size += delta * buffer->stride + vertex_range_start; - buffer->step_rate = glbinding->InstanceDivisor; - - j++; - } - - /* Upload the current values */ - unsigned curmask = vs_inputs & _mesa_draw_current_bits(ctx); - if (curmask) { - /* For each attribute, upload the maximum possible size. */ - uint8_t data[VERT_ATTRIB_MAX * sizeof(GLdouble) * 4]; - uint8_t *cursor = data; - - do { - const gl_vert_attrib attr = u_bit_scan(&curmask); - const struct gl_array_attributes *const glattrib = - _mesa_draw_current_attrib(ctx, attr); - const unsigned size = glattrib->Format._ElementSize; - const unsigned alignment = align(size, sizeof(GLdouble)); - memcpy(cursor, glattrib->Ptr, size); - if (alignment != size) - memset(cursor + size, 0, alignment - size); - - struct brw_vertex_element *input = &brw->vb.inputs[attr]; - input->glformat = &glattrib->Format; - input->buffer = j; - input->is_dual_slot = (vp->DualSlotInputs & BITFIELD64_BIT(attr)) != 0; - input->offset = cursor - data; - - cursor += alignment; - } while (curmask); - - struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; - const unsigned size = cursor - data; - brw_upload_data(&brw->upload, data, size, size, - &buffer->bo, &buffer->offset); - buffer->stride = 0; - buffer->size = size; - buffer->step_rate = 0; - - j++; - } - brw->vb.nr_buffers = j; -} - -void -brw_prepare_shader_draw_parameters(struct brw_context *brw) -{ - const struct brw_vs_prog_data *vs_prog_data = - brw_vs_prog_data(brw->vs.base.prog_data); - - /* For non-indirect draws, upload the shader draw parameters */ - if ((vs_prog_data->uses_firstvertex || vs_prog_data->uses_baseinstance) && - brw->draw.draw_params_bo == NULL) { - brw_upload_data(&brw->upload, - &brw->draw.params, sizeof(brw->draw.params), 4, - &brw->draw.draw_params_bo, - &brw->draw.draw_params_offset); - } - - if (vs_prog_data->uses_drawid || vs_prog_data->uses_is_indexed_draw) { - brw_upload_data(&brw->upload, - &brw->draw.derived_params, sizeof(brw->draw.derived_params), 4, - &brw->draw.derived_draw_params_bo, - &brw->draw.derived_draw_params_offset); - } -} - -static void -brw_upload_indices(struct brw_context *brw) -{ - const struct _mesa_index_buffer *index_buffer = brw->ib.ib; - GLuint ib_size; - struct brw_bo *old_bo = brw->ib.bo; - struct gl_buffer_object *bufferobj; - GLuint offset; - GLuint ib_type_size; - - if (index_buffer == NULL) - return; - - ib_type_size = 1 << index_buffer->index_size_shift; - ib_size = index_buffer->count ? ib_type_size * index_buffer->count : - index_buffer->obj->Size; - bufferobj = index_buffer->obj; - - /* Turn into a proper VBO: - */ - if (!bufferobj) { - /* Get new bufferobj, offset: - */ - brw_upload_data(&brw->upload, index_buffer->ptr, ib_size, ib_type_size, - &brw->ib.bo, &offset); - brw->ib.size = brw->ib.bo->size; - } else { - offset = (GLuint) (unsigned long) index_buffer->ptr; - - struct brw_bo *bo = - brw_bufferobj_buffer(brw, brw_buffer_object(bufferobj), - offset, ib_size, false); - if (bo != brw->ib.bo) { - brw_bo_unreference(brw->ib.bo); - brw->ib.bo = bo; - brw->ib.size = bufferobj->Size; - brw_bo_reference(bo); - } - } - - /* Use 3DPRIMITIVE's start_vertex_offset to avoid re-uploading - * the index buffer state when we're just moving the start index - * of our drawing. - */ - brw->ib.start_vertex_offset = offset / ib_type_size; - - if (brw->ib.bo != old_bo) - brw->ctx.NewDriverState |= BRW_NEW_INDEX_BUFFER; - - unsigned index_size = 1 << index_buffer->index_size_shift; - if (index_size != brw->ib.index_size) { - brw->ib.index_size = index_size; - brw->ctx.NewDriverState |= BRW_NEW_INDEX_BUFFER; - } - - /* We need to re-emit an index buffer state each time - * when cut index flag is changed - */ - if (brw->prim_restart.enable_cut_index != brw->ib.enable_cut_index) { - brw->ib.enable_cut_index = brw->prim_restart.enable_cut_index; - brw->ctx.NewDriverState |= BRW_NEW_INDEX_BUFFER; - } -} - -const struct brw_tracked_state brw_indices = { - .dirty = { - .mesa = 0, - .brw = BRW_NEW_BLORP | - BRW_NEW_INDICES, - }, - .emit = brw_upload_indices, -}; diff --git a/src/mesa/drivers/dri/i965/brw_extensions.c b/src/mesa/drivers/dri/i965/brw_extensions.c deleted file mode 100644 index 7f72799..0000000 --- a/src/mesa/drivers/dri/i965/brw_extensions.c +++ /dev/null @@ -1,404 +0,0 @@ -/* - * Copyright 2003 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "main/version.h" - -#include "brw_context.h" -#include "brw_defines.h" -#include "brw_batch.h" - -/** - * Initializes potential list of extensions if ctx == NULL, or actually enables - * extensions for a context. - */ -void -brw_init_extensions(struct gl_context *ctx) -{ - struct brw_context *brw = brw_context(ctx); - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - assert(devinfo->ver >= 4); - - ctx->Extensions.ARB_arrays_of_arrays = true; - ctx->Extensions.ARB_buffer_storage = true; - ctx->Extensions.ARB_clear_texture = true; - ctx->Extensions.ARB_clip_control = true; - ctx->Extensions.ARB_copy_image = true; - ctx->Extensions.ARB_depth_buffer_float = true; - ctx->Extensions.ARB_depth_clamp = true; - ctx->Extensions.ARB_depth_texture = true; - ctx->Extensions.ARB_draw_elements_base_vertex = true; - ctx->Extensions.ARB_draw_instanced = true; - ctx->Extensions.ARB_ES2_compatibility = true; - ctx->Extensions.ARB_explicit_attrib_location = true; - ctx->Extensions.ARB_explicit_uniform_location = true; - ctx->Extensions.ARB_fragment_coord_conventions = true; - ctx->Extensions.ARB_fragment_program = true; - ctx->Extensions.ARB_fragment_program_shadow = true; - ctx->Extensions.ARB_fragment_shader = true; - ctx->Extensions.ARB_framebuffer_object = true; - ctx->Extensions.ARB_half_float_vertex = true; - ctx->Extensions.ARB_instanced_arrays = true; - ctx->Extensions.ARB_internalformat_query = true; - ctx->Extensions.ARB_internalformat_query2 = true; - ctx->Extensions.ARB_map_buffer_range = true; - ctx->Extensions.ARB_occlusion_query = true; - ctx->Extensions.ARB_occlusion_query2 = true; - ctx->Extensions.ARB_point_sprite = true; - ctx->Extensions.ARB_polygon_offset_clamp = true; - ctx->Extensions.ARB_seamless_cube_map = true; - ctx->Extensions.ARB_shader_bit_encoding = true; - ctx->Extensions.ARB_shader_draw_parameters = true; - ctx->Extensions.ARB_shader_group_vote = true; - ctx->Extensions.ARB_shader_texture_lod = true; - ctx->Extensions.ARB_shading_language_packing = true; - ctx->Extensions.ARB_shadow = true; - ctx->Extensions.ARB_sync = true; - ctx->Extensions.ARB_texture_border_clamp = true; - ctx->Extensions.ARB_texture_compression_rgtc = true; - ctx->Extensions.ARB_texture_cube_map = true; - ctx->Extensions.ARB_texture_env_combine = true; - ctx->Extensions.ARB_texture_env_crossbar = true; - ctx->Extensions.ARB_texture_env_dot3 = true; - ctx->Extensions.ARB_texture_filter_anisotropic = true; - ctx->Extensions.ARB_texture_float = true; - ctx->Extensions.ARB_texture_mirror_clamp_to_edge = true; - ctx->Extensions.ARB_texture_non_power_of_two = true; - ctx->Extensions.ARB_texture_rg = true; - ctx->Extensions.ARB_texture_rgb10_a2ui = true; - ctx->Extensions.ARB_vertex_program = true; - ctx->Extensions.ARB_vertex_shader = true; - ctx->Extensions.ARB_vertex_type_2_10_10_10_rev = true; - ctx->Extensions.ARB_vertex_type_10f_11f_11f_rev = true; - ctx->Extensions.EXT_blend_color = true; - ctx->Extensions.EXT_blend_equation_separate = true; - ctx->Extensions.EXT_blend_func_separate = true; - ctx->Extensions.EXT_blend_minmax = true; - ctx->Extensions.EXT_color_buffer_half_float = true; - ctx->Extensions.EXT_draw_buffers2 = true; - ctx->Extensions.EXT_EGL_image_storage = true; - ctx->Extensions.EXT_float_blend = true; - ctx->Extensions.EXT_framebuffer_sRGB = true; - ctx->Extensions.EXT_gpu_program_parameters = true; - ctx->Extensions.EXT_packed_float = true; - ctx->Extensions.EXT_pixel_buffer_object = true; - ctx->Extensions.EXT_point_parameters = true; - ctx->Extensions.EXT_provoking_vertex = true; - ctx->Extensions.EXT_render_snorm = true; - ctx->Extensions.EXT_sRGB = true; - ctx->Extensions.EXT_stencil_two_side = true; - ctx->Extensions.EXT_texture_array = true; - ctx->Extensions.EXT_texture_env_dot3 = true; - ctx->Extensions.EXT_texture_filter_anisotropic = true; - ctx->Extensions.EXT_texture_integer = true; - ctx->Extensions.EXT_texture_norm16 = true; - ctx->Extensions.EXT_texture_shared_exponent = true; - ctx->Extensions.EXT_texture_snorm = true; - ctx->Extensions.EXT_texture_sRGB = true; - ctx->Extensions.EXT_texture_sRGB_decode = true; - ctx->Extensions.EXT_texture_sRGB_R8 = true; - ctx->Extensions.EXT_texture_swizzle = true; - ctx->Extensions.EXT_texture_type_2_10_10_10_REV = true; - ctx->Extensions.EXT_vertex_array_bgra = true; - ctx->Extensions.KHR_robustness = true; - ctx->Extensions.AMD_seamless_cubemap_per_texture = true; - ctx->Extensions.APPLE_object_purgeable = true; - ctx->Extensions.ATI_texture_env_combine3 = true; - ctx->Extensions.MESA_framebuffer_flip_y = true; - ctx->Extensions.NV_conditional_render = true; - ctx->Extensions.NV_fog_distance = true; - ctx->Extensions.NV_primitive_restart = true; - ctx->Extensions.NV_texture_barrier = true; - ctx->Extensions.NV_texture_env_combine4 = true; - ctx->Extensions.NV_texture_rectangle = true; - ctx->Extensions.TDFX_texture_compression_FXT1 = true; - ctx->Extensions.OES_compressed_ETC1_RGB8_texture = true; - ctx->Extensions.OES_draw_texture = true; - ctx->Extensions.OES_EGL_image = true; - ctx->Extensions.OES_EGL_image_external = true; - ctx->Extensions.OES_standard_derivatives = true; - ctx->Extensions.OES_texture_float = true; - ctx->Extensions.OES_texture_float_linear = true; - ctx->Extensions.OES_texture_half_float = true; - ctx->Extensions.OES_texture_half_float_linear = true; - - if (devinfo->ver >= 8) - ctx->Const.GLSLVersion = 460; - else if (devinfo->platform == INTEL_PLATFORM_HSW && - can_do_pipelined_register_writes(brw->screen)) - ctx->Const.GLSLVersion = 450; - else if (devinfo->ver >= 7 && can_do_pipelined_register_writes(brw->screen)) - ctx->Const.GLSLVersion = 420; - else if (devinfo->ver >= 6) - ctx->Const.GLSLVersion = 330; - else - ctx->Const.GLSLVersion = 120; - - if (devinfo->ver >= 6) - ctx->Const.GLSLVersionCompat = 130; - else - ctx->Const.GLSLVersionCompat = 120; - - _mesa_override_glsl_version(&ctx->Const); - - ctx->Extensions.EXT_shader_integer_mix = ctx->Const.GLSLVersion >= 130; - ctx->Extensions.MESA_shader_integer_functions = ctx->Const.GLSLVersion >= 130; - - if (devinfo->verx10 >= 45) { - ctx->Extensions.EXT_shader_framebuffer_fetch_non_coherent = true; - ctx->Extensions.KHR_blend_equation_advanced = true; - } - - if (devinfo->ver >= 5) { - ctx->Extensions.ARB_texture_query_levels = ctx->Const.GLSLVersion >= 130; - ctx->Extensions.ARB_texture_query_lod = true; - ctx->Extensions.EXT_timer_query = true; - } - - if (devinfo->ver == 6) - ctx->Extensions.ARB_transform_feedback2 = true; - - if (devinfo->ver >= 6) { - ctx->Extensions.ARB_blend_func_extended = - !driQueryOptionb(&brw->screen->optionCache, "disable_blend_func_extended"); - ctx->Extensions.ARB_conditional_render_inverted = true; - ctx->Extensions.ARB_cull_distance = true; - ctx->Extensions.ARB_draw_buffers_blend = true; - if (ctx->API != API_OPENGL_COMPAT || - ctx->Const.AllowHigherCompatVersion) - ctx->Extensions.ARB_enhanced_layouts = true; - ctx->Extensions.ARB_ES3_compatibility = true; - ctx->Extensions.ARB_fragment_layer_viewport = true; - ctx->Extensions.ARB_pipeline_statistics_query = true; - ctx->Extensions.ARB_sample_shading = true; - ctx->Extensions.ARB_shading_language_420pack = true; - if (ctx->API != API_OPENGL_COMPAT || - ctx->Const.AllowHigherCompatVersion) { - ctx->Extensions.ARB_texture_buffer_object = true; - ctx->Extensions.ARB_texture_buffer_object_rgb32 = true; - ctx->Extensions.ARB_texture_buffer_range = true; - } - ctx->Extensions.ARB_texture_cube_map_array = true; - ctx->Extensions.ARB_texture_gather = true; - ctx->Extensions.ARB_texture_multisample = true; - ctx->Extensions.ARB_uniform_buffer_object = true; - ctx->Extensions.EXT_gpu_shader4 = true; - ctx->Extensions.EXT_texture_shadow_lod = true; - - if (ctx->API != API_OPENGL_COMPAT || - ctx->Const.AllowHigherCompatVersion) - ctx->Extensions.AMD_vertex_shader_layer = true; - ctx->Extensions.EXT_framebuffer_multisample = true; - ctx->Extensions.EXT_framebuffer_multisample_blit_scaled = true; - ctx->Extensions.EXT_transform_feedback = true; - ctx->Extensions.ARB_transform_feedback_overflow_query = true; - ctx->Extensions.OES_depth_texture_cube_map = true; - ctx->Extensions.OES_sample_variables = true; - - ctx->Extensions.ARB_timer_query = brw->screen->hw_has_timestamp; - ctx->Extensions.EXT_disjoint_timer_query = - ctx->Extensions.ARB_timer_query; - - /* Only enable this in core profile because geometry shaders are - * required, and Mesa only supports geometry shaders in OpenGL 3.2 and - * later. In this driver, that currently means Core profile. - */ - if (ctx->API == API_OPENGL_CORE || - ctx->Const.AllowHigherCompatVersion) { - ctx->Extensions.ARB_shader_viewport_layer_array = true; - ctx->Extensions.ARB_viewport_array = true; - ctx->Extensions.AMD_vertex_shader_viewport_index = true; - } - } - - brw->predicate.supported = false; - - if (devinfo->ver >= 7) { - ctx->Extensions.ARB_conservative_depth = true; - ctx->Extensions.ARB_derivative_control = true; - ctx->Extensions.ARB_framebuffer_no_attachments = true; - if (ctx->API != API_OPENGL_COMPAT || - ctx->Const.AllowHigherCompatVersion) { - ctx->Extensions.ARB_gpu_shader5 = true; - ctx->Extensions.ARB_gpu_shader_fp64 = true; - } - ctx->Extensions.ARB_shader_atomic_counters = true; - ctx->Extensions.ARB_shader_atomic_counter_ops = true; - ctx->Extensions.ARB_shader_clock = true; - ctx->Extensions.ARB_shader_image_load_store = true; - ctx->Extensions.ARB_shader_image_size = true; - ctx->Extensions.ARB_shader_precision = true; - ctx->Extensions.ARB_shader_texture_image_samples = true; - if (ctx->API != API_OPENGL_COMPAT || - ctx->Const.AllowHigherCompatVersion) - ctx->Extensions.ARB_tessellation_shader = true; - ctx->Extensions.ARB_texture_compression_bptc = true; - ctx->Extensions.ARB_texture_view = true; - ctx->Extensions.ARB_shader_storage_buffer_object = true; - ctx->Extensions.ARB_vertex_attrib_64bit = true; - ctx->Extensions.EXT_shader_samples_identical = true; - ctx->Extensions.OES_primitive_bounding_box = true; - ctx->Extensions.OES_texture_buffer = true; - - if (can_do_pipelined_register_writes(brw->screen)) { - ctx->Extensions.ARB_draw_indirect = true; - ctx->Extensions.ARB_transform_feedback2 = true; - ctx->Extensions.ARB_transform_feedback3 = true; - ctx->Extensions.ARB_transform_feedback_instanced = true; - - if (can_do_compute_dispatch(brw->screen) && - ctx->Const.MaxComputeWorkGroupSize[0] >= 1024) { - ctx->Extensions.ARB_compute_shader = true; - ctx->Extensions.ARB_ES3_1_compatibility = - devinfo->verx10 >= 75; - ctx->Extensions.NV_compute_shader_derivatives = true; - ctx->Extensions.ARB_compute_variable_group_size = true; - } - - if (can_do_predicate_writes(brw->screen)) { - brw->predicate.supported = true; - ctx->Extensions.ARB_indirect_parameters = true; - } - } - - ctx->Extensions.ARB_gl_spirv = true; - ctx->Extensions.ARB_spirv_extensions = true; - } - - if (devinfo->verx10 >= 75) { - ctx->Extensions.ARB_stencil_texturing = true; - ctx->Extensions.ARB_texture_stencil8 = true; - ctx->Extensions.OES_geometry_shader = true; - ctx->Extensions.OES_texture_cube_map_array = true; - ctx->Extensions.OES_viewport_array = true; - } - - if (devinfo->verx10 >= 75 || devinfo->platform == INTEL_PLATFORM_BYT) { - ctx->Extensions.ARB_robust_buffer_access_behavior = true; - } - - if (can_do_mi_math_and_lrr(brw->screen)) { - ctx->Extensions.ARB_query_buffer_object = true; - } - - if (devinfo->ver >= 8 || devinfo->platform == INTEL_PLATFORM_BYT) { - /* For now, we can't enable OES_texture_view on Gen 7 because of - * some piglit failures coming from - * piglit/tests/spec/arb_texture_view/rendering-formats.c that need - * investigation. - */ - ctx->Extensions.OES_texture_view = true; - } - - if (devinfo->ver >= 7) { - /* We can safely enable OES_copy_image on Gen 7, since we emulate - * the ETC2 support using the shadow_miptree to store the - * compressed data. - */ - ctx->Extensions.OES_copy_image = true; - } - - /* Gen < 6 still uses the blitter. It's somewhat annoying to add support - * for blackhole there... Does anybody actually care anymore anyway? - */ - if (devinfo->ver >= 6) - ctx->Extensions.INTEL_blackhole_render = true; - - if (devinfo->ver >= 8) { - ctx->Extensions.ARB_gpu_shader_int64 = true; - /* requires ARB_gpu_shader_int64 */ - ctx->Extensions.ARB_shader_ballot = true; - ctx->Extensions.ARB_ES3_2_compatibility = true; - - /* Currently only implemented in the scalar backend, so only enable for - * Gfx8+. Eventually Gfx6+ could be supported. - */ - ctx->Extensions.INTEL_shader_integer_functions2 = true; - } - - if (devinfo->ver >= 9) { - ctx->Extensions.ANDROID_extension_pack_es31a = true; - ctx->Extensions.AMD_depth_clamp_separate = true; - ctx->Extensions.ARB_post_depth_coverage = true; - ctx->Extensions.ARB_shader_stencil_export = true; - ctx->Extensions.EXT_shader_framebuffer_fetch = true; - ctx->Extensions.INTEL_conservative_rasterization = true; - ctx->Extensions.INTEL_shader_atomic_float_minmax = true; - ctx->Extensions.KHR_blend_equation_advanced_coherent = true; - ctx->Extensions.KHR_texture_compression_astc_ldr = true; - ctx->Extensions.KHR_texture_compression_astc_sliced_3d = true; - - /* - * From the Skylake PRM Vol. 7 (Memory Fence Message, page 221): - * "A memory fence message issued by a thread causes further messages - * issued by the thread to be blocked until all previous data port - * messages have completed, or the results can be globally observed from - * the point of view of other threads in the system." - * - * From the Haswell PRM Vol. 7 (Memory Fence, page 256): - * "A memory fence message issued by a thread causes further messages - * issued by the thread to be blocked until all previous messages issued - * by the thread to that data port (data cache or render cache) have - * been globally observed from the point of view of other threads in the - * system." - * - * Summarized: For ARB_fragment_shader_interlock to work, we need to - * ensure memory access ordering for all messages to the dataport from - * all threads. Memory fence messages prior to SKL only provide memory - * access ordering for messages from the same thread, so we can only - * support the feature from Gfx9 onwards. - * - */ - - ctx->Extensions.ARB_fragment_shader_interlock = true; - } - - if (intel_device_info_is_9lp(devinfo)) - ctx->Extensions.KHR_texture_compression_astc_hdr = true; - - if (devinfo->ver >= 6) - ctx->Extensions.INTEL_performance_query = true; - - if (ctx->API != API_OPENGL_COMPAT || - ctx->Const.AllowHigherCompatVersion) - ctx->Extensions.ARB_base_instance = true; - if (ctx->API != API_OPENGL_CORE) - ctx->Extensions.ARB_color_buffer_float = true; - - ctx->Extensions.EXT_texture_compression_s3tc = true; - ctx->Extensions.EXT_texture_compression_s3tc_srgb = true; - ctx->Extensions.ANGLE_texture_compression_dxt = true; - - ctx->Extensions.EXT_demote_to_helper_invocation = true; - - ctx->Const.PrimitiveRestartFixedIndex = true; - - if (devinfo->ver >= 7) { - ctx->Extensions.EXT_memory_object_fd = true; - ctx->Extensions.EXT_memory_object = true; - ctx->Extensions.EXT_semaphore = true; - ctx->Extensions.EXT_semaphore_fd = true; - } -} diff --git a/src/mesa/drivers/dri/i965/brw_fbo.c b/src/mesa/drivers/dri/i965/brw_fbo.c deleted file mode 100644 index ff30385..0000000 --- a/src/mesa/drivers/dri/i965/brw_fbo.c +++ /dev/null @@ -1,1139 +0,0 @@ -/* - * Copyright 2006 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "main/enums.h" -#include "main/macros.h" -#include "main/mtypes.h" -#include "main/fbobject.h" -#include "main/framebuffer.h" -#include "main/renderbuffer.h" -#include "main/context.h" -#include "main/teximage.h" -#include "main/image.h" -#include "main/condrender.h" -#include "util/hash_table.h" -#include "util/set.h" -#include "util/u_memory.h" - -#include "swrast/swrast.h" -#include "drivers/common/meta.h" - -#include "brw_batch.h" -#include "brw_buffers.h" -#include "brw_blit.h" -#include "brw_fbo.h" -#include "brw_mipmap_tree.h" -#include "brw_image.h" -#include "brw_screen.h" -#include "brw_tex.h" -#include "brw_context.h" -#include "brw_defines.h" - -#define FILE_DEBUG_FLAG DEBUG_FBO - -/** Called by gl_renderbuffer::Delete() */ -static void -brw_delete_renderbuffer(struct gl_context *ctx, struct gl_renderbuffer *rb) -{ - struct brw_renderbuffer *irb = brw_renderbuffer(rb); - - assert(irb); - - brw_miptree_release(&irb->mt); - brw_miptree_release(&irb->singlesample_mt); - - _mesa_delete_renderbuffer(ctx, rb); -} - -/** - * \brief Downsample a winsys renderbuffer from mt to singlesample_mt. - * - * If the miptree needs no downsample, then skip. - */ -void -brw_renderbuffer_downsample(struct brw_context *brw, - struct brw_renderbuffer *irb) -{ - if (!irb->need_downsample) - return; - brw_miptree_updownsample(brw, irb->mt, irb->singlesample_mt); - irb->need_downsample = false; -} - -/** - * \brief Upsample a winsys renderbuffer from singlesample_mt to mt. - * - * The upsample is done unconditionally. - */ -void -brw_renderbuffer_upsample(struct brw_context *brw, - struct brw_renderbuffer *irb) -{ - assert(!irb->need_downsample); - - brw_miptree_updownsample(brw, irb->singlesample_mt, irb->mt); -} - -/** - * \see dd_function_table::MapRenderbuffer - */ -static void -brw_map_renderbuffer(struct gl_context *ctx, - struct gl_renderbuffer *rb, - GLuint x, GLuint y, GLuint w, GLuint h, - GLbitfield mode, - GLubyte **out_map, - GLint *out_stride, - bool flip_y) -{ - struct brw_context *brw = brw_context(ctx); - struct swrast_renderbuffer *srb = (struct swrast_renderbuffer *)rb; - struct brw_renderbuffer *irb = brw_renderbuffer(rb); - struct brw_mipmap_tree *mt; - void *map; - ptrdiff_t stride; - - if (srb->Buffer) { - /* this is a malloc'd renderbuffer (accum buffer), not an irb */ - GLint bpp = _mesa_get_format_bytes(rb->Format); - GLint rowStride = srb->RowStride; - *out_map = (GLubyte *) srb->Buffer + y * rowStride + x * bpp; - *out_stride = rowStride; - return; - } - - brw_prepare_render(brw); - - /* The MapRenderbuffer API should always return a single-sampled mapping. - * The case we are asked to map multisampled RBs is in glReadPixels() (or - * swrast paths like glCopyTexImage()) from a window-system MSAA buffer, - * and GL expects an automatic resolve to happen. - * - * If it's a color miptree, there is a ->singlesample_mt which wraps the - * actual window system renderbuffer (which we may resolve to at any time), - * while the miptree itself is our driver-private allocation. If it's a - * depth or stencil miptree, we have a private MSAA buffer and no shared - * singlesample buffer, and since we don't expect anybody to ever actually - * resolve it, we just make a temporary singlesample buffer now when we - * have to. - */ - if (rb->NumSamples > 1) { - if (!irb->singlesample_mt) { - irb->singlesample_mt = - brw_miptree_create_for_renderbuffer(brw, irb->mt->format, - rb->Width, rb->Height, - 1 /*num_samples*/); - if (!irb->singlesample_mt) - goto fail; - irb->singlesample_mt_is_tmp = true; - irb->need_downsample = true; - } - - brw_renderbuffer_downsample(brw, irb); - mt = irb->singlesample_mt; - - irb->need_map_upsample = mode & GL_MAP_WRITE_BIT; - } else { - mt = irb->mt; - } - - /* For a window-system renderbuffer, we need to flip the mapping we receive - * upside-down. So we need to ask for a rectangle on flipped vertically, and - * we then return a pointer to the bottom of it with a negative stride. - */ - if (flip_y) { - y = rb->Height - y - h; - } - - brw_miptree_map(brw, mt, irb->mt_level, irb->mt_layer, - x, y, w, h, mode, &map, &stride); - - if (flip_y) { - map += (h - 1) * stride; - stride = -stride; - } - - DBG("%s: rb %d (%s) mt mapped: (%d, %d) (%dx%d) -> %p/%"PRIdPTR"\n", - __func__, rb->Name, _mesa_get_format_name(rb->Format), - x, y, w, h, map, stride); - - *out_map = map; - *out_stride = stride; - return; - -fail: - *out_map = NULL; - *out_stride = 0; -} - -/** - * \see dd_function_table::UnmapRenderbuffer - */ -static void -brw_unmap_renderbuffer(struct gl_context *ctx, struct gl_renderbuffer *rb) -{ - struct brw_context *brw = brw_context(ctx); - struct swrast_renderbuffer *srb = (struct swrast_renderbuffer *)rb; - struct brw_renderbuffer *irb = brw_renderbuffer(rb); - struct brw_mipmap_tree *mt; - - DBG("%s: rb %d (%s)\n", __func__, - rb->Name, _mesa_get_format_name(rb->Format)); - - if (srb->Buffer) { - /* this is a malloc'd renderbuffer (accum buffer) */ - /* nothing to do */ - return; - } - - if (rb->NumSamples > 1) { - mt = irb->singlesample_mt; - } else { - mt = irb->mt; - } - - brw_miptree_unmap(brw, mt, irb->mt_level, irb->mt_layer); - - if (irb->need_map_upsample) { - brw_renderbuffer_upsample(brw, irb); - irb->need_map_upsample = false; - } - - if (irb->singlesample_mt_is_tmp) - brw_miptree_release(&irb->singlesample_mt); -} - - -/** - * Round up the requested multisample count to the next supported sample size. - */ -unsigned -brw_quantize_num_samples(struct brw_screen *intel, unsigned num_samples) -{ - const int *msaa_modes = brw_supported_msaa_modes(intel); - int quantized_samples = 0; - - for (int i = 0; msaa_modes[i] != -1; ++i) { - if (msaa_modes[i] >= num_samples) - quantized_samples = msaa_modes[i]; - else - break; - } - - return quantized_samples; -} - -static mesa_format -brw_renderbuffer_format(struct gl_context * ctx, GLenum internalFormat) -{ - struct brw_context *brw = brw_context(ctx); - ASSERTED const struct intel_device_info *devinfo = &brw->screen->devinfo; - - switch (internalFormat) { - default: - /* Use the same format-choice logic as for textures. - * Renderbuffers aren't any different from textures for us, - * except they're less useful because you can't texture with - * them. - */ - return ctx->Driver.ChooseTextureFormat(ctx, GL_TEXTURE_2D, - internalFormat, - GL_NONE, GL_NONE); - break; - case GL_STENCIL_INDEX: - case GL_STENCIL_INDEX1_EXT: - case GL_STENCIL_INDEX4_EXT: - case GL_STENCIL_INDEX8_EXT: - case GL_STENCIL_INDEX16_EXT: - /* These aren't actual texture formats, so force them here. */ - if (brw->has_separate_stencil) { - return MESA_FORMAT_S_UINT8; - } else { - assert(!devinfo->must_use_separate_stencil); - return MESA_FORMAT_Z24_UNORM_S8_UINT; - } - } -} - -static GLboolean -brw_alloc_private_renderbuffer_storage(struct gl_context *ctx, - struct gl_renderbuffer *rb, - GLenum internalFormat, - GLuint width, GLuint height) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_screen *screen = brw->screen; - struct brw_renderbuffer *irb = brw_renderbuffer(rb); - - assert(rb->Format != MESA_FORMAT_NONE); - - rb->NumSamples = brw_quantize_num_samples(screen, rb->NumSamples); - rb->NumStorageSamples = rb->NumSamples; - rb->Width = width; - rb->Height = height; - rb->_BaseFormat = _mesa_get_format_base_format(rb->Format); - - brw_miptree_release(&irb->mt); - - DBG("%s: %s: %s (%dx%d)\n", __func__, - _mesa_enum_to_string(internalFormat), - _mesa_get_format_name(rb->Format), width, height); - - if (width == 0 || height == 0) - return true; - - irb->mt = brw_miptree_create_for_renderbuffer(brw, rb->Format, - width, height, - MAX2(rb->NumSamples, 1)); - if (!irb->mt) - return false; - - irb->layer_count = 1; - - return true; -} - -/** - * Called via glRenderbufferStorageEXT() to set the format and allocate - * storage for a user-created renderbuffer. - */ -static GLboolean -brw_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer *rb, - GLenum internalFormat, - GLuint width, GLuint height) -{ - rb->Format = brw_renderbuffer_format(ctx, internalFormat); - return brw_alloc_private_renderbuffer_storage(ctx, rb, internalFormat, width, height); -} - -static mesa_format -fallback_rgbx_to_rgba(struct brw_screen *screen, struct gl_renderbuffer *rb, - mesa_format original_format) -{ - mesa_format format = original_format; - - /* The base format and internal format must be derived from the user-visible - * format (that is, the gl_config's format), even if we internally use - * choose a different format for the renderbuffer. Otherwise, rendering may - * use incorrect channel write masks. - */ - rb->_BaseFormat = _mesa_get_format_base_format(original_format); - rb->InternalFormat = rb->_BaseFormat; - - if (!screen->mesa_format_supports_render[original_format]) { - /* The glRenderbufferStorage paths in core Mesa detect if the driver - * does not support the user-requested format, and then searches for - * a fallback format. The DRI code bypasses core Mesa, though. So we do - * the fallbacks here. - * - * We must support MESA_FORMAT_R8G8B8X8 on Android because the Android - * framework requires HAL_PIXEL_FORMAT_RGBX8888 winsys surfaces. - */ - format = _mesa_format_fallback_rgbx_to_rgba(original_format); - assert(screen->mesa_format_supports_render[format]); - } - return format; -} - -static void -brw_image_target_renderbuffer_storage(struct gl_context *ctx, - struct gl_renderbuffer *rb, - void *image_handle) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_renderbuffer *irb; - __DRIscreen *dri_screen = brw->screen->driScrnPriv; - __DRIimage *image; - - image = dri_screen->dri2.image->lookupEGLImage(dri_screen, image_handle, - dri_screen->loaderPrivate); - if (image == NULL) - return; - - if (image->planar_format && image->planar_format->nplanes > 1) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glEGLImageTargetRenderbufferStorage(planar buffers are not " - "supported as render targets.)"); - return; - } - - rb->Format = fallback_rgbx_to_rgba(brw->screen, rb, image->format); - - mesa_format chosen_format = rb->Format == image->format ? - image->format : rb->Format; - - /* __DRIimage is opaque to the core so it has to be checked here */ - if (!brw->mesa_format_supports_render[chosen_format]) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glEGLImageTargetRenderbufferStorage(unsupported image format)"); - return; - } - - irb = brw_renderbuffer(rb); - brw_miptree_release(&irb->mt); - - /* Disable creation of the miptree's aux buffers because the driver exposes - * no EGL API to manage them. That is, there is no API for resolving the aux - * buffer's content to the main buffer nor for invalidating the aux buffer's - * content. - */ - irb->mt = brw_miptree_create_for_dri_image(brw, image, GL_TEXTURE_2D, - rb->Format, false); - if (!irb->mt) - return; - - rb->Width = image->width; - rb->Height = image->height; - rb->NeedsFinishRenderTexture = true; - irb->layer_count = 1; -} - -/** - * Called by _mesa_resize_framebuffer() for each hardware renderbuffer when a - * window system framebuffer is resized. - * - * Any actual buffer reallocations for hardware renderbuffers (which would - * have triggered _mesa_resize_framebuffer()) were done by - * brw_process_dri2_buffer(). - */ -static GLboolean -brw_alloc_window_storage(struct gl_context *ctx, struct gl_renderbuffer *rb, - GLenum internalFormat, GLuint width, GLuint height) -{ - (void) ctx; - assert(rb->Name == 0); - rb->Width = width; - rb->Height = height; - rb->InternalFormat = internalFormat; - - return true; -} - -/** Dummy function for gl_renderbuffer::AllocStorage() */ -static GLboolean -brw_nop_alloc_storage(struct gl_context *ctx, struct gl_renderbuffer *rb, - GLenum internalFormat, GLuint width, GLuint height) -{ - (void) rb; - (void) internalFormat; - (void) width; - (void) height; - _mesa_problem(ctx, "brw_nop_alloc_storage should never be called."); - return false; -} - -/** - * Create an brw_renderbuffer for a __DRIdrawable. This function is - * unrelated to GL renderbuffers (that is, those created by - * glGenRenderbuffers). - * - * \param num_samples must be quantized. - */ -struct brw_renderbuffer * -brw_create_winsys_renderbuffer(struct brw_screen *screen, - mesa_format format, unsigned num_samples) -{ - struct brw_renderbuffer *irb = CALLOC_STRUCT(brw_renderbuffer); - if (!irb) - return NULL; - - struct gl_renderbuffer *rb = &irb->Base.Base; - irb->layer_count = 1; - - _mesa_init_renderbuffer(rb, 0); - rb->ClassID = INTEL_RB_CLASS; - rb->NumSamples = num_samples; - rb->NumStorageSamples = num_samples; - - rb->Format = fallback_rgbx_to_rgba(screen, rb, format); - - /* intel-specific methods */ - rb->Delete = brw_delete_renderbuffer; - rb->AllocStorage = brw_alloc_window_storage; - - return irb; -} - -/** - * Private window-system buffers (as opposed to ones shared with the display - * server created with brw_create_winsys_renderbuffer()) are most similar in their - * handling to user-created renderbuffers, but they have a resize handler that - * may be called at brw_update_renderbuffers() time. - * - * \param num_samples must be quantized. - */ -struct brw_renderbuffer * -brw_create_private_renderbuffer(struct brw_screen *screen, - mesa_format format, unsigned num_samples) -{ - struct brw_renderbuffer *irb; - - irb = brw_create_winsys_renderbuffer(screen, format, num_samples); - irb->Base.Base.AllocStorage = brw_alloc_private_renderbuffer_storage; - - return irb; -} - -/** - * Create a new renderbuffer object. - * Typically called via glBindRenderbufferEXT(). - */ -static struct gl_renderbuffer * -brw_new_renderbuffer(struct gl_context *ctx, GLuint name) -{ - struct brw_renderbuffer *irb; - struct gl_renderbuffer *rb; - - irb = CALLOC_STRUCT(brw_renderbuffer); - if (!irb) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "creating renderbuffer"); - return NULL; - } - - rb = &irb->Base.Base; - - _mesa_init_renderbuffer(rb, name); - rb->ClassID = INTEL_RB_CLASS; - - /* intel-specific methods */ - rb->Delete = brw_delete_renderbuffer; - rb->AllocStorage = brw_alloc_renderbuffer_storage; - /* span routines set in alloc_storage function */ - - return rb; -} - -static bool -brw_renderbuffer_update_wrapper(struct brw_context *brw, - struct brw_renderbuffer *irb, - struct gl_texture_image *image, - uint32_t layer, - bool layered) -{ - struct gl_renderbuffer *rb = &irb->Base.Base; - struct brw_texture_image *intel_image = brw_texture_image(image); - struct brw_mipmap_tree *mt = intel_image->mt; - int level = image->Level; - - rb->AllocStorage = brw_nop_alloc_storage; - - /* adjust for texture view parameters */ - layer += image->TexObject->Attrib.MinLayer; - level += image->TexObject->Attrib.MinLevel; - - brw_miptree_check_level_layer(mt, level, layer); - irb->mt_level = level; - irb->mt_layer = layer; - - if (!layered) { - irb->layer_count = 1; - } else if (mt->target != GL_TEXTURE_3D && image->TexObject->Attrib.NumLayers > 0) { - irb->layer_count = image->TexObject->Attrib.NumLayers; - } else { - irb->layer_count = mt->surf.dim == ISL_SURF_DIM_3D ? - minify(mt->surf.logical_level0_px.depth, level) : - mt->surf.logical_level0_px.array_len; - } - - brw_miptree_reference(&irb->mt, mt); - - brw_renderbuffer_set_draw_offset(irb); - - return true; -} - -void -brw_renderbuffer_set_draw_offset(struct brw_renderbuffer *irb) -{ - unsigned int dst_x, dst_y; - - /* compute offset of the particular 2D image within the texture region */ - brw_miptree_get_image_offset(irb->mt, irb->mt_level, irb->mt_layer, - &dst_x, &dst_y); - - irb->draw_x = dst_x; - irb->draw_y = dst_y; -} - -/** - * Called by glFramebufferTexture[123]DEXT() (and other places) to - * prepare for rendering into texture memory. This might be called - * many times to choose different texture levels, cube faces, etc - * before brw_finish_render_texture() is ever called. - */ -static void -brw_render_texture(struct gl_context * ctx, - struct gl_framebuffer *fb, - struct gl_renderbuffer_attachment *att) -{ - struct brw_context *brw = brw_context(ctx); - struct gl_renderbuffer *rb = att->Renderbuffer; - struct brw_renderbuffer *irb = brw_renderbuffer(rb); - struct gl_texture_image *image = rb->TexImage; - struct brw_texture_image *intel_image = brw_texture_image(image); - struct brw_mipmap_tree *mt = intel_image->mt; - int layer; - - (void) fb; - - if (att->CubeMapFace > 0) { - assert(att->Zoffset == 0); - layer = att->CubeMapFace; - } else { - layer = att->Zoffset; - } - - if (!intel_image->mt) { - /* Fallback on drawing to a texture that doesn't have a miptree - * (has a border, width/height 0, etc.) - */ - _swrast_render_texture(ctx, fb, att); - return; - } - - brw_miptree_check_level_layer(mt, att->TextureLevel, layer); - - if (!brw_renderbuffer_update_wrapper(brw, irb, image, layer, att->Layered)) { - _swrast_render_texture(ctx, fb, att); - return; - } - - DBG("Begin render %s texture tex=%u w=%d h=%d d=%d refcount=%d\n", - _mesa_get_format_name(image->TexFormat), - att->Texture->Name, image->Width, image->Height, image->Depth, - rb->RefCount); -} - - -#define fbo_incomplete(fb, error_id, ...) do { \ - static GLuint msg_id = 0; \ - if (unlikely(ctx->Const.ContextFlags & GL_CONTEXT_FLAG_DEBUG_BIT)) { \ - _mesa_gl_debugf(ctx, &msg_id, \ - MESA_DEBUG_SOURCE_API, \ - MESA_DEBUG_TYPE_OTHER, \ - MESA_DEBUG_SEVERITY_MEDIUM, \ - __VA_ARGS__); \ - } \ - DBG(__VA_ARGS__); \ - fb->_Status = error_id; \ - } while (0) - -/** - * Do additional "completeness" testing of a framebuffer object. - */ -static void -brw_validate_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb) -{ - struct brw_context *brw = brw_context(ctx); - const struct intel_device_info *devinfo = &brw->screen->devinfo; - struct brw_renderbuffer *depthRb = - brw_get_renderbuffer(fb, BUFFER_DEPTH); - struct brw_renderbuffer *stencilRb = - brw_get_renderbuffer(fb, BUFFER_STENCIL); - struct brw_mipmap_tree *depth_mt = NULL, *stencil_mt = NULL; - unsigned i; - - DBG("%s() on fb %p (%s)\n", __func__, - fb, (fb == ctx->DrawBuffer ? "drawbuffer" : - (fb == ctx->ReadBuffer ? "readbuffer" : "other buffer"))); - - if (depthRb) - depth_mt = depthRb->mt; - if (stencilRb) { - stencil_mt = stencilRb->mt; - if (stencil_mt->stencil_mt) - stencil_mt = stencil_mt->stencil_mt; - } - - if (depth_mt && stencil_mt) { - if (devinfo->ver >= 6) { - const unsigned d_width = depth_mt->surf.phys_level0_sa.width; - const unsigned d_height = depth_mt->surf.phys_level0_sa.height; - const unsigned d_depth = depth_mt->surf.dim == ISL_SURF_DIM_3D ? - depth_mt->surf.phys_level0_sa.depth : - depth_mt->surf.phys_level0_sa.array_len; - - const unsigned s_width = stencil_mt->surf.phys_level0_sa.width; - const unsigned s_height = stencil_mt->surf.phys_level0_sa.height; - const unsigned s_depth = stencil_mt->surf.dim == ISL_SURF_DIM_3D ? - stencil_mt->surf.phys_level0_sa.depth : - stencil_mt->surf.phys_level0_sa.array_len; - - /* For gen >= 6, we are using the lod/minimum-array-element fields - * and supporting layered rendering. This means that we must restrict - * the depth & stencil attachments to match in various more retrictive - * ways. (width, height, depth, LOD and layer) - */ - if (d_width != s_width || - d_height != s_height || - d_depth != s_depth || - depthRb->mt_level != stencilRb->mt_level || - depthRb->mt_layer != stencilRb->mt_layer) { - fbo_incomplete(fb, GL_FRAMEBUFFER_UNSUPPORTED, - "FBO incomplete: depth and stencil must match in" - "width, height, depth, LOD and layer\n"); - } - } - if (depth_mt == stencil_mt) { - /* For true packed depth/stencil (not faked on prefers-separate-stencil - * hardware) we need to be sure they're the same level/layer, since - * we'll be emitting a single packet describing the packed setup. - */ - if (depthRb->mt_level != stencilRb->mt_level || - depthRb->mt_layer != stencilRb->mt_layer) { - fbo_incomplete(fb, GL_FRAMEBUFFER_UNSUPPORTED, - "FBO incomplete: depth image level/layer %d/%d != " - "stencil image %d/%d\n", - depthRb->mt_level, - depthRb->mt_layer, - stencilRb->mt_level, - stencilRb->mt_layer); - } - } else { - if (!brw->has_separate_stencil) { - fbo_incomplete(fb, GL_FRAMEBUFFER_UNSUPPORTED, - "FBO incomplete: separate stencil unsupported\n"); - } - if (stencil_mt->format != MESA_FORMAT_S_UINT8) { - fbo_incomplete(fb, GL_FRAMEBUFFER_UNSUPPORTED, - "FBO incomplete: separate stencil is %s " - "instead of S8\n", - _mesa_get_format_name(stencil_mt->format)); - } - if (devinfo->ver < 7 && !brw_renderbuffer_has_hiz(depthRb)) { - /* Before Gfx7, separate depth and stencil buffers can be used - * only if HiZ is enabled. From the Sandybridge PRM, Volume 2, - * Part 1, Bit 3DSTATE_DEPTH_BUFFER.SeparateStencilBufferEnable: - * [DevSNB]: This field must be set to the same value (enabled - * or disabled) as Hierarchical Depth Buffer Enable. - */ - fbo_incomplete(fb, GL_FRAMEBUFFER_UNSUPPORTED, - "FBO incomplete: separate stencil without HiZ\n"); - } - } - } - - for (i = 0; i < ARRAY_SIZE(fb->Attachment); i++) { - struct gl_renderbuffer *rb; - struct brw_renderbuffer *irb; - - if (fb->Attachment[i].Type == GL_NONE) - continue; - - /* A supported attachment will have a Renderbuffer set either - * from being a Renderbuffer or being a texture that got the - * brw_wrap_texture() treatment. - */ - rb = fb->Attachment[i].Renderbuffer; - if (rb == NULL) { - fbo_incomplete(fb, GL_FRAMEBUFFER_UNSUPPORTED, - "FBO incomplete: attachment without " - "renderbuffer\n"); - continue; - } - - if (fb->Attachment[i].Type == GL_TEXTURE) { - if (rb->TexImage->Border) { - fbo_incomplete(fb, GL_FRAMEBUFFER_UNSUPPORTED, - "FBO incomplete: texture with border\n"); - continue; - } - } - - irb = brw_renderbuffer(rb); - if (irb == NULL) { - fbo_incomplete(fb, GL_FRAMEBUFFER_UNSUPPORTED, - "FBO incomplete: software rendering renderbuffer\n"); - continue; - } - - if (rb->Format == MESA_FORMAT_R_SRGB8) { - fbo_incomplete(fb, GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT, - "FBO incomplete: Format not color renderable: %s\n", - _mesa_get_format_name(rb->Format)); - continue; - } - - if (!brw_render_target_supported(brw, rb)) { - fbo_incomplete(fb, GL_FRAMEBUFFER_UNSUPPORTED, - "FBO incomplete: Unsupported HW " - "texture/renderbuffer format attached: %s\n", - _mesa_get_format_name(brw_rb_format(irb))); - } - } -} - -/** - * Try to do a glBlitFramebuffer using glCopyTexSubImage2D - * We can do this when the dst renderbuffer is actually a texture and - * there is no scaling, mirroring or scissoring. - * - * \return new buffer mask indicating the buffers left to blit using the - * normal path. - */ -static GLbitfield -brw_blit_framebuffer_with_blitter(struct gl_context *ctx, - const struct gl_framebuffer *readFb, - const struct gl_framebuffer *drawFb, - GLint srcX0, GLint srcY0, - GLint srcX1, GLint srcY1, - GLint dstX0, GLint dstY0, - GLint dstX1, GLint dstY1, - GLbitfield mask) -{ - struct brw_context *brw = brw_context(ctx); - - /* Sync up the state of window system buffers. We need to do this before - * we go looking for the buffers. - */ - brw_prepare_render(brw); - - if (mask & GL_COLOR_BUFFER_BIT) { - unsigned i; - struct gl_renderbuffer *src_rb = readFb->_ColorReadBuffer; - struct brw_renderbuffer *src_irb = brw_renderbuffer(src_rb); - - if (!src_irb) { - perf_debug("glBlitFramebuffer(): missing src renderbuffer. " - "Falling back to software rendering.\n"); - return mask; - } - - /* If the source and destination are the same size with no mirroring, - * the rectangles are within the size of the texture and there is no - * scissor, then we can probably use the blit engine. - */ - if (!(srcX0 - srcX1 == dstX0 - dstX1 && - srcY0 - srcY1 == dstY0 - dstY1 && - srcX1 >= srcX0 && - srcY1 >= srcY0 && - srcX0 >= 0 && srcX1 <= readFb->Width && - srcY0 >= 0 && srcY1 <= readFb->Height && - dstX0 >= 0 && dstX1 <= drawFb->Width && - dstY0 >= 0 && dstY1 <= drawFb->Height && - !(ctx->Scissor.EnableFlags))) { - perf_debug("glBlitFramebuffer(): non-1:1 blit. " - "Falling back to software rendering.\n"); - return mask; - } - - /* Blit to all active draw buffers. We don't do any pre-checking, - * because we assume that copying to MRTs is rare, and failure midway - * through copying is even more rare. Even if it was to occur, it's - * safe to let meta start the copy over from scratch, because - * glBlitFramebuffer completely overwrites the destination pixels, and - * results are undefined if any destination pixels have a dependency on - * source pixels. - */ - for (i = 0; i < drawFb->_NumColorDrawBuffers; i++) { - struct gl_renderbuffer *dst_rb = drawFb->_ColorDrawBuffers[i]; - struct brw_renderbuffer *dst_irb = brw_renderbuffer(dst_rb); - - if (!dst_irb) { - perf_debug("glBlitFramebuffer(): missing dst renderbuffer. " - "Falling back to software rendering.\n"); - return mask; - } - - if (ctx->Color.sRGBEnabled && - _mesa_is_format_srgb(src_irb->mt->format) != - _mesa_is_format_srgb(dst_irb->mt->format)) { - perf_debug("glBlitFramebuffer() with sRGB conversion cannot be " - "handled by BLT path.\n"); - return mask; - } - - if (!brw_miptree_blit(brw, - src_irb->mt, - src_irb->mt_level, src_irb->mt_layer, - srcX0, srcY0, readFb->FlipY, - dst_irb->mt, - dst_irb->mt_level, dst_irb->mt_layer, - dstX0, dstY0, drawFb->FlipY, - dstX1 - dstX0, dstY1 - dstY0, - COLOR_LOGICOP_COPY)) { - perf_debug("glBlitFramebuffer(): unknown blit failure. " - "Falling back to software rendering.\n"); - return mask; - } - } - - mask &= ~GL_COLOR_BUFFER_BIT; - } - - return mask; -} - -static void -brw_blit_framebuffer(struct gl_context *ctx, - struct gl_framebuffer *readFb, - struct gl_framebuffer *drawFb, - GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, - GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, - GLbitfield mask, GLenum filter) -{ - struct brw_context *brw = brw_context(ctx); - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - /* Page 679 of OpenGL 4.4 spec says: - * "Added BlitFramebuffer to commands affected by conditional rendering in - * section 10.10 (Bug 9562)." - */ - if (!_mesa_check_conditional_render(ctx)) - return; - - if (devinfo->ver < 6) { - /* On gfx4-5, try BLT first. - * - * Gfx4-5 have a single ring for both 3D and BLT operations, so there's - * no inter-ring synchronization issues like on Gfx6+. It is apparently - * faster than using the 3D pipeline. Original Gfx4 also has to rebase - * and copy miptree slices in order to render to unaligned locations. - */ - mask = brw_blit_framebuffer_with_blitter(ctx, readFb, drawFb, - srcX0, srcY0, srcX1, srcY1, - dstX0, dstY0, dstX1, dstY1, - mask); - if (mask == 0x0) - return; - } - - mask = brw_blorp_framebuffer(brw, readFb, drawFb, - srcX0, srcY0, srcX1, srcY1, - dstX0, dstY0, dstX1, dstY1, - mask, filter); - if (mask == 0x0) - return; - - /* brw_blorp_framebuffer should always be successful for color blits. */ - assert(!(mask & GL_COLOR_BUFFER_BIT)); - - mask = _mesa_meta_BlitFramebuffer(ctx, readFb, drawFb, - srcX0, srcY0, srcX1, srcY1, - dstX0, dstY0, dstX1, dstY1, - mask, filter); - if (mask == 0x0) - return; - - if (devinfo->ver >= 8 && (mask & GL_STENCIL_BUFFER_BIT)) { - assert(!"Invalid blit"); - } - - _swrast_BlitFramebuffer(ctx, readFb, drawFb, - srcX0, srcY0, srcX1, srcY1, - dstX0, dstY0, dstX1, dstY1, - mask, filter); -} - -/** - * Does the renderbuffer have hiz enabled? - */ -bool -brw_renderbuffer_has_hiz(struct brw_renderbuffer *irb) -{ - return brw_miptree_level_has_hiz(irb->mt, irb->mt_level); -} - -void -brw_renderbuffer_move_to_temp(struct brw_context *brw, - struct brw_renderbuffer *irb, - bool invalidate) -{ - struct gl_renderbuffer *rb =&irb->Base.Base; - struct brw_texture_image *intel_image = brw_texture_image(rb->TexImage); - struct brw_mipmap_tree *new_mt; - int width, height, depth; - - brw_get_image_dims(rb->TexImage, &width, &height, &depth); - - assert(irb->align_wa_mt == NULL); - new_mt = brw_miptree_create(brw, GL_TEXTURE_2D, - intel_image->base.Base.TexFormat, - 0, 0, - width, height, 1, - irb->mt->surf.samples, - MIPTREE_CREATE_BUSY); - - if (!invalidate) { - brw_miptree_copy_slice(brw, intel_image->mt, - intel_image->base.Base.Level, irb->mt_layer, - new_mt, 0, 0); - } - - brw_miptree_reference(&irb->align_wa_mt, new_mt); - brw_miptree_release(&new_mt); - - irb->draw_x = 0; - irb->draw_y = 0; -} - -void -brw_cache_sets_clear(struct brw_context *brw) -{ - hash_table_foreach(brw->render_cache, render_entry) - _mesa_hash_table_remove(brw->render_cache, render_entry); - - set_foreach(brw->depth_cache, depth_entry) - _mesa_set_remove(brw->depth_cache, depth_entry); -} - -/** - * Emits an appropriate flush for a BO if it has been rendered to within the - * same batchbuffer as a read that's about to be emitted. - * - * The GPU has separate, incoherent caches for the render cache and the - * sampler cache, along with other caches. Usually data in the different - * caches don't interact (e.g. we don't render to our driver-generated - * immediate constant data), but for render-to-texture in FBOs we definitely - * do. When a batchbuffer is flushed, the kernel will ensure that everything - * necessary is flushed before another use of that BO, but for reuse from - * different caches within a batchbuffer, it's all our responsibility. - */ -static void -flush_depth_and_render_caches(struct brw_context *brw, struct brw_bo *bo) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - if (devinfo->ver >= 6) { - brw_emit_pipe_control_flush(brw, - PIPE_CONTROL_DEPTH_CACHE_FLUSH | - PIPE_CONTROL_RENDER_TARGET_FLUSH | - PIPE_CONTROL_CS_STALL); - - brw_emit_pipe_control_flush(brw, - PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | - PIPE_CONTROL_CONST_CACHE_INVALIDATE); - } else { - brw_emit_mi_flush(brw); - } - - brw_cache_sets_clear(brw); -} - -void -brw_cache_flush_for_read(struct brw_context *brw, struct brw_bo *bo) -{ - if (_mesa_hash_table_search(brw->render_cache, bo) || - _mesa_set_search(brw->depth_cache, bo)) - flush_depth_and_render_caches(brw, bo); -} - -static void * -format_aux_tuple(enum isl_format format, enum isl_aux_usage aux_usage) -{ - return (void *)(uintptr_t)((uint32_t)format << 8 | aux_usage); -} - -void -brw_cache_flush_for_render(struct brw_context *brw, struct brw_bo *bo, - enum isl_format format, - enum isl_aux_usage aux_usage) -{ - if (_mesa_set_search(brw->depth_cache, bo)) - flush_depth_and_render_caches(brw, bo); - - /* Check to see if this bo has been used by a previous rendering operation - * but with a different format or aux usage. If it has, flush the render - * cache so we ensure that it's only in there with one format or aux usage - * at a time. - * - * Even though it's not obvious, this can easily happen in practice. - * Suppose a client is blending on a surface with sRGB encode enabled on - * gfx9. This implies that you get AUX_USAGE_CCS_D at best. If the client - * then disables sRGB decode and continues blending we will flip on - * AUX_USAGE_CCS_E without doing any sort of resolve in-between (this is - * perfectly valid since CCS_E is a subset of CCS_D). However, this means - * that we have fragments in-flight which are rendering with UNORM+CCS_E - * and other fragments in-flight with SRGB+CCS_D on the same surface at the - * same time and the pixel scoreboard and color blender are trying to sort - * it all out. This ends badly (i.e. GPU hangs). - * - * To date, we have never observed GPU hangs or even corruption to be - * associated with switching the format, only the aux usage. However, - * there are comments in various docs which indicate that the render cache - * isn't 100% resilient to format changes. We may as well be conservative - * and flush on format changes too. We can always relax this later if we - * find it to be a performance problem. - */ - struct hash_entry *entry = _mesa_hash_table_search(brw->render_cache, bo); - if (entry && entry->data != format_aux_tuple(format, aux_usage)) - flush_depth_and_render_caches(brw, bo); -} - -void -brw_render_cache_add_bo(struct brw_context *brw, struct brw_bo *bo, - enum isl_format format, - enum isl_aux_usage aux_usage) -{ -#ifndef NDEBUG - struct hash_entry *entry = _mesa_hash_table_search(brw->render_cache, bo); - if (entry) { - /* Otherwise, someone didn't do a flush_for_render and that would be - * very bad indeed. - */ - assert(entry->data == format_aux_tuple(format, aux_usage)); - } -#endif - - _mesa_hash_table_insert(brw->render_cache, bo, - format_aux_tuple(format, aux_usage)); -} - -void -brw_cache_flush_for_depth(struct brw_context *brw, struct brw_bo *bo) -{ - if (_mesa_hash_table_search(brw->render_cache, bo)) - flush_depth_and_render_caches(brw, bo); -} - -void -brw_depth_cache_add_bo(struct brw_context *brw, struct brw_bo *bo) -{ - _mesa_set_add(brw->depth_cache, bo); -} - -/** - * Do one-time context initializations related to GL_EXT_framebuffer_object. - * Hook in device driver functions. - */ -void -brw_fbo_init(struct brw_context *brw) -{ - struct dd_function_table *dd = &brw->ctx.Driver; - dd->NewRenderbuffer = brw_new_renderbuffer; - dd->MapRenderbuffer = brw_map_renderbuffer; - dd->UnmapRenderbuffer = brw_unmap_renderbuffer; - dd->RenderTexture = brw_render_texture; - dd->ValidateFramebuffer = brw_validate_framebuffer; - dd->BlitFramebuffer = brw_blit_framebuffer; - dd->EGLImageTargetRenderbufferStorage = - brw_image_target_renderbuffer_storage; - - brw->render_cache = _mesa_hash_table_create(brw->mem_ctx, _mesa_hash_pointer, - _mesa_key_pointer_equal); - brw->depth_cache = _mesa_set_create(brw->mem_ctx, _mesa_hash_pointer, - _mesa_key_pointer_equal); - util_dynarray_init(&brw->batch.exec_fences, NULL); -} diff --git a/src/mesa/drivers/dri/i965/brw_fbo.h b/src/mesa/drivers/dri/i965/brw_fbo.h deleted file mode 100644 index b4dcb9a..0000000 --- a/src/mesa/drivers/dri/i965/brw_fbo.h +++ /dev/null @@ -1,255 +0,0 @@ -/* - * Copyright 2006 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef BRW_FBO_H -#define BRW_FBO_H - -#include -#include -#include "main/formats.h" -#include "main/macros.h" -#include "brw_context.h" -#include "brw_mipmap_tree.h" -#include "brw_screen.h" - -#ifdef __cplusplus -extern "C" { -#endif - -struct brw_mipmap_tree; - -/** - * Intel renderbuffer, derived from gl_renderbuffer. - */ -struct brw_renderbuffer -{ - struct swrast_renderbuffer Base; - /** - * The real renderbuffer storage. - * - * This is multisampled if NumSamples is > 1. - */ - struct brw_mipmap_tree *mt; - - /** - * Downsampled contents for window-system MSAA renderbuffers. - * - * For window system MSAA color buffers, the singlesample_mt is shared with - * other processes in DRI2 (and in DRI3, it's the image buffer managed by - * glx_dri3.c), while mt is private to our process. To do a swapbuffers, - * we have to downsample out of mt into singlesample_mt. For depth and - * stencil buffers, the singlesample_mt is also private, and since we don't - * expect to need to do resolves (except if someone does a glReadPixels() - * or glCopyTexImage()), we just temporarily allocate singlesample_mt when - * asked to map the renderbuffer. - */ - struct brw_mipmap_tree *singlesample_mt; - - /* Gen < 6 doesn't have layer specifier for render targets or depth. Driver - * needs to manually offset surfaces to correct level/layer. There are, - * however, alignment restrictions to respect as well and in come cases - * the only option is to use temporary single slice surface which driver - * copies after rendering to the full miptree. - * - * See brw_renderbuffer_move_to_temp(). - */ - struct brw_mipmap_tree *align_wa_mt; - - /** - * \name Miptree view - * \{ - * - * Multiple renderbuffers may simultaneously wrap a single texture and each - * provide a different view into that texture. The fields below indicate - * which miptree slice is wrapped by this renderbuffer. The fields' values - * are consistent with the 'level' and 'layer' parameters of - * glFramebufferTextureLayer(). - * - * For renderbuffers not created with glFramebufferTexture*(), mt_level and - * mt_layer are 0. - */ - unsigned int mt_level; - unsigned int mt_layer; - - /* The number of attached logical layers. */ - unsigned int layer_count; - /** \} */ - - GLuint draw_x, draw_y; /**< Offset of drawing within the region */ - - /** - * Set to true at every draw call, to indicate if a window-system - * renderbuffer needs to be downsampled before using singlesample_mt. - */ - bool need_downsample; - - /** - * Set to true when doing an brw_renderbuffer_map()/unmap() that requires - * an upsample at the end. - */ - bool need_map_upsample; - - /** - * Set to true if singlesample_mt is temporary storage that persists only - * for the duration of a mapping. - */ - bool singlesample_mt_is_tmp; - - /** - * Set to true when application specifically asked for a sRGB visual. - */ - bool need_srgb; -}; - - -/** - * gl_renderbuffer is a base class which we subclass. The Class field - * is used for simple run-time type checking. - */ -#define INTEL_RB_CLASS 0x12345678 - - -/** - * Return a gl_renderbuffer ptr casted to brw_renderbuffer. - * NULL will be returned if the rb isn't really an brw_renderbuffer. - * This is determined by checking the ClassID. - */ -static inline struct brw_renderbuffer * -brw_renderbuffer(struct gl_renderbuffer *rb) -{ - struct brw_renderbuffer *irb = (struct brw_renderbuffer *) rb; - if (irb && irb->Base.Base.ClassID == INTEL_RB_CLASS) - return irb; - else - return NULL; -} - -static inline struct brw_mipmap_tree * -brw_renderbuffer_get_mt(struct brw_renderbuffer *irb) -{ - if (!irb) - return NULL; - - return (irb->align_wa_mt) ? irb->align_wa_mt : irb->mt; -} - -/** - * \brief Return the framebuffer attachment specified by attIndex. - * - * If the framebuffer lacks the specified attachment, then return null. - * - * If the attached renderbuffer is a wrapper, then return wrapped - * renderbuffer. - */ -static inline struct brw_renderbuffer * -brw_get_renderbuffer(struct gl_framebuffer *fb, gl_buffer_index attIndex) -{ - struct gl_renderbuffer *rb; - - assert((unsigned)attIndex < ARRAY_SIZE(fb->Attachment)); - - rb = fb->Attachment[attIndex].Renderbuffer; - if (!rb) - return NULL; - - return brw_renderbuffer(rb); -} - - -static inline mesa_format -brw_rb_format(const struct brw_renderbuffer *rb) -{ - return rb->Base.Base.Format; -} - -extern struct brw_renderbuffer * -brw_create_winsys_renderbuffer(struct brw_screen *screen, - mesa_format format, unsigned num_samples); - -struct brw_renderbuffer * -brw_create_private_renderbuffer(struct brw_screen *screen, - mesa_format format, unsigned num_samples); - -struct gl_renderbuffer* -brw_create_wrapped_renderbuffer(struct gl_context *ctx, - int width, int height, - mesa_format format); - -extern void -brw_fbo_init(struct brw_context *brw); - -void -brw_renderbuffer_set_draw_offset(struct brw_renderbuffer *irb); - -static inline uint32_t -brw_renderbuffer_get_tile_offsets(struct brw_renderbuffer *irb, - uint32_t *tile_x, - uint32_t *tile_y) -{ - if (irb->align_wa_mt) { - *tile_x = 0; - *tile_y = 0; - return 0; - } - - return brw_miptree_get_tile_offsets(irb->mt, irb->mt_level, irb->mt_layer, - tile_x, tile_y); -} - -bool -brw_renderbuffer_has_hiz(struct brw_renderbuffer *irb); - - -void brw_renderbuffer_move_to_temp(struct brw_context *brw, - struct brw_renderbuffer *irb, - bool invalidate); - -void -brw_renderbuffer_downsample(struct brw_context *brw, - struct brw_renderbuffer *irb); - -void -brw_renderbuffer_upsample(struct brw_context *brw, - struct brw_renderbuffer *irb); - -void brw_cache_sets_clear(struct brw_context *brw); -void brw_cache_flush_for_read(struct brw_context *brw, struct brw_bo *bo); -void brw_cache_flush_for_render(struct brw_context *brw, struct brw_bo *bo, - enum isl_format format, - enum isl_aux_usage aux_usage); -void brw_cache_flush_for_depth(struct brw_context *brw, struct brw_bo *bo); -void brw_render_cache_add_bo(struct brw_context *brw, struct brw_bo *bo, - enum isl_format format, - enum isl_aux_usage aux_usage); -void brw_depth_cache_add_bo(struct brw_context *brw, struct brw_bo *bo); - -unsigned -brw_quantize_num_samples(struct brw_screen *intel, unsigned num_samples); - -#ifdef __cplusplus -} -#endif - -#endif /* BRW_FBO_H */ diff --git a/src/mesa/drivers/dri/i965/brw_ff_gs.c b/src/mesa/drivers/dri/i965/brw_ff_gs.c deleted file mode 100644 index 93bbc9e..0000000 --- a/src/mesa/drivers/dri/i965/brw_ff_gs.c +++ /dev/null @@ -1,178 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - -#include "main/macros.h" -#include "main/enums.h" -#include "main/transformfeedback.h" - -#include "brw_batch.h" - -#include "brw_defines.h" -#include "brw_context.h" -#include "brw_util.h" -#include "brw_state.h" -#include "brw_ff_gs.h" -#include "util/ralloc.h" - -static void -compile_ff_gs_prog(struct brw_context *brw, - struct brw_ff_gs_prog_key *key) -{ - const GLuint *program; - void *mem_ctx; - GLuint program_size; - - mem_ctx = ralloc_context(NULL); - - struct brw_ff_gs_prog_data prog_data; - program = brw_compile_ff_gs_prog(brw->screen->compiler, mem_ctx, key, - &prog_data, - &brw_vue_prog_data(brw->vs.base.prog_data)->vue_map, - &program_size); - - brw_upload_cache(&brw->cache, BRW_CACHE_FF_GS_PROG, - key, sizeof(*key), - program, program_size, - &prog_data, sizeof(prog_data), - &brw->ff_gs.prog_offset, &brw->ff_gs.prog_data); - ralloc_free(mem_ctx); -} - -static bool -brw_ff_gs_state_dirty(const struct brw_context *brw) -{ - return brw_state_dirty(brw, - _NEW_LIGHT, - BRW_NEW_PRIMITIVE | - BRW_NEW_TRANSFORM_FEEDBACK | - BRW_NEW_VS_PROG_DATA); -} - -static void -brw_ff_gs_populate_key(struct brw_context *brw, - struct brw_ff_gs_prog_key *key) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - static const unsigned swizzle_for_offset[4] = { - BRW_SWIZZLE4(0, 1, 2, 3), - BRW_SWIZZLE4(1, 2, 3, 3), - BRW_SWIZZLE4(2, 3, 3, 3), - BRW_SWIZZLE4(3, 3, 3, 3) - }; - - struct gl_context *ctx = &brw->ctx; - - assert(devinfo->ver < 7); - - memset(key, 0, sizeof(*key)); - - /* BRW_NEW_VS_PROG_DATA (part of VUE map) */ - key->attrs = brw_vue_prog_data(brw->vs.base.prog_data)->vue_map.slots_valid; - - /* BRW_NEW_PRIMITIVE */ - key->primitive = brw->primitive; - - /* _NEW_LIGHT */ - key->pv_first = (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION); - if (key->primitive == _3DPRIM_QUADLIST && ctx->Light.ShadeModel != GL_FLAT) { - /* Provide consistent primitive order with brw_set_prim's - * optimization of single quads to trifans. - */ - key->pv_first = true; - } - - if (devinfo->ver == 6) { - /* On Gfx6, GS is used for transform feedback. */ - /* BRW_NEW_TRANSFORM_FEEDBACK */ - if (_mesa_is_xfb_active_and_unpaused(ctx)) { - const struct gl_program *prog = - ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX]; - const struct gl_transform_feedback_info *linked_xfb_info = - prog->sh.LinkedTransformFeedback; - int i; - - /* Make sure that the VUE slots won't overflow the unsigned chars in - * key->transform_feedback_bindings[]. - */ - STATIC_ASSERT(BRW_VARYING_SLOT_COUNT <= 256); - - /* Make sure that we don't need more binding table entries than we've - * set aside for use in transform feedback. (We shouldn't, since we - * set aside enough binding table entries to have one per component). - */ - assert(linked_xfb_info->NumOutputs <= BRW_MAX_SOL_BINDINGS); - - key->need_gs_prog = true; - key->num_transform_feedback_bindings = linked_xfb_info->NumOutputs; - for (i = 0; i < key->num_transform_feedback_bindings; ++i) { - key->transform_feedback_bindings[i] = - linked_xfb_info->Outputs[i].OutputRegister; - key->transform_feedback_swizzles[i] = - swizzle_for_offset[linked_xfb_info->Outputs[i].ComponentOffset]; - } - } - } else { - /* Pre-gfx6, GS is used to transform QUADLIST, QUADSTRIP, and LINELOOP - * into simpler primitives. - */ - key->need_gs_prog = (brw->primitive == _3DPRIM_QUADLIST || - brw->primitive == _3DPRIM_QUADSTRIP || - brw->primitive == _3DPRIM_LINELOOP); - } -} - -/* Calculate interpolants for triangle and line rasterization. - */ -void -brw_upload_ff_gs_prog(struct brw_context *brw) -{ - struct brw_ff_gs_prog_key key; - - if (!brw_ff_gs_state_dirty(brw)) - return; - - /* Populate the key: - */ - brw_ff_gs_populate_key(brw, &key); - - if (brw->ff_gs.prog_active != key.need_gs_prog) { - brw->ctx.NewDriverState |= BRW_NEW_FF_GS_PROG_DATA; - brw->ff_gs.prog_active = key.need_gs_prog; - } - - if (brw->ff_gs.prog_active) { - if (!brw_search_cache(&brw->cache, BRW_CACHE_FF_GS_PROG, &key, - sizeof(key), &brw->ff_gs.prog_offset, - &brw->ff_gs.prog_data, true)) { - compile_ff_gs_prog(brw, &key); - } - } -} diff --git a/src/mesa/drivers/dri/i965/brw_ff_gs.h b/src/mesa/drivers/dri/i965/brw_ff_gs.h deleted file mode 100644 index e35c9d3..0000000 --- a/src/mesa/drivers/dri/i965/brw_ff_gs.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - - -#ifndef BRW_GS_H -#define BRW_GS_H - -#include "brw_context.h" -#include "compiler/brw_eu.h" - -void -brw_upload_ff_gs_prog(struct brw_context *brw); - -#endif diff --git a/src/mesa/drivers/dri/i965/brw_formatquery.c b/src/mesa/drivers/dri/i965/brw_formatquery.c deleted file mode 100644 index 67d603f..0000000 --- a/src/mesa/drivers/dri/i965/brw_formatquery.c +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "brw_context.h" -#include "brw_state.h" -#include "main/context.h" -#include "main/formatquery.h" -#include "main/glformats.h" - -static size_t -brw_query_samples_for_format(struct gl_context *ctx, GLenum target, - GLenum internalFormat, int samples[16]) -{ - struct brw_context *brw = brw_context(ctx); - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - (void) target; - (void) internalFormat; - - switch (devinfo->ver) { - case 11: - case 10: - case 9: - samples[0] = 16; - samples[1] = 8; - samples[2] = 4; - samples[3] = 2; - return 4; - - case 8: - samples[0] = 8; - samples[1] = 4; - samples[2] = 2; - return 3; - - case 7: - if (internalFormat == GL_RGBA32F && _mesa_is_gles(ctx)) { - /* For GLES, we are allowed to return a smaller number of samples for - * GL_RGBA32F. See OpenGLES 3.2 spec, section 20.3.1 Internal Format - * Query Parameters, under SAMPLES: - * - * "A value less than or equal to the value of MAX_SAMPLES, if - * internalformat is RGBA16F, R32F, RG32F, or RGBA32F." - * - * In brw_render_target_supported, we prevent formats with a size - * greater than 8 bytes from using 8x MSAA on gfx7. - */ - samples[0] = 4; - return 1; - } else { - samples[0] = 8; - samples[1] = 4; - return 2; - } - - case 6: - samples[0] = 4; - return 1; - - default: - assert(devinfo->ver < 6); - samples[0] = 1; - return 1; - } -} - -void -brw_query_internal_format(struct gl_context *ctx, GLenum target, - GLenum internalFormat, GLenum pname, GLint *params) -{ - /* The Mesa layer gives us a temporary params buffer that is guaranteed - * to be non-NULL, and have at least 16 elements. - */ - assert(params != NULL); - - switch (pname) { - case GL_SAMPLES: - brw_query_samples_for_format(ctx, target, internalFormat, params); - break; - - case GL_NUM_SAMPLE_COUNTS: { - size_t num_samples; - GLint dummy_buffer[16]; - - num_samples = brw_query_samples_for_format(ctx, target, internalFormat, - dummy_buffer); - params[0] = (GLint) num_samples; - break; - } - - default: - /* By default, we call the driver hook's fallback function from the frontend, - * which has generic implementation for all pnames. - */ - _mesa_query_internal_format_default(ctx, target, internalFormat, pname, - params); - break; - } -} diff --git a/src/mesa/drivers/dri/i965/brw_generate_mipmap.c b/src/mesa/drivers/dri/i965/brw_generate_mipmap.c deleted file mode 100644 index e533f84..0000000 --- a/src/mesa/drivers/dri/i965/brw_generate_mipmap.c +++ /dev/null @@ -1,144 +0,0 @@ -/* - * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "main/mipmap.h" -#include "main/teximage.h" -#include "brw_blorp.h" -#include "brw_context.h" -#include "brw_tex.h" -#include "drivers/common/meta.h" - -#define FILE_DEBUG_FLAG DEBUG_BLORP - - -/** - * The GenerateMipmap() driver hook. - */ -void -brw_generate_mipmap(struct gl_context *ctx, GLenum target, - struct gl_texture_object *tex_obj) -{ - struct brw_context *brw = brw_context(ctx); - struct intel_device_info *devinfo = &brw->screen->devinfo; - struct brw_texture_object *intel_obj = brw_texture_object(tex_obj); - const unsigned base_level = tex_obj->Attrib.BaseLevel; - unsigned last_level, first_layer, last_layer; - - /* Blorp doesn't handle combined depth/stencil surfaces on Gfx4-5 yet. */ - if (devinfo->ver <= 5 && - (tex_obj->Image[0][base_level]->_BaseFormat == GL_DEPTH_COMPONENT || - tex_obj->Image[0][base_level]->_BaseFormat == GL_DEPTH_STENCIL)) { - _mesa_meta_GenerateMipmap(ctx, target, tex_obj); - return; - } - - /* find expected last mipmap level to generate */ - last_level = _mesa_compute_num_levels(ctx, tex_obj, target) - 1; - - if (last_level == 0) - return; - - /* The texture isn't in a "complete" state yet so set the expected - * last_level here; we're not going through normal texture validation. - */ - intel_obj->_MaxLevel = last_level; - - if (!tex_obj->Immutable) { - _mesa_prepare_mipmap_levels(ctx, tex_obj, base_level, last_level); - - /* At this point, memory for all the texture levels has been - * allocated. However, the base level image may be in one resource - * while the subsequent/smaller levels may be in another resource. - * Finalizing the texture will copy the base images from the former - * resource to the latter. - * - * After this, we'll have all mipmap levels in one resource. - */ - brw_finalize_mipmap_tree(brw, tex_obj); - } - - struct brw_mipmap_tree *mt = intel_obj->mt; - if (!mt) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "mipmap generation"); - return; - } - - const mesa_format format = intel_obj->_Format; - - /* Fall back to the CPU for non-renderable cases. - * - * TODO: 3D textures require blending data from multiple slices, - * which means we need custom shaders. For now, fall back. - */ - if (!brw->mesa_format_supports_render[format] || target == GL_TEXTURE_3D) { - _mesa_generate_mipmap(ctx, target, tex_obj); - return; - } - - const struct isl_extent4d *base_size = &mt->surf.logical_level0_px; - - if (mt->target == GL_TEXTURE_CUBE_MAP) { - first_layer = _mesa_tex_target_to_face(target); - last_layer = first_layer; - } else { - first_layer = 0; - last_layer = base_size->array_len - 1; - } - - /* The GL_EXT_texture_sRGB_decode extension's issues section says: - * - * "10) How is mipmap generation of sRGB textures affected by the - * TEXTURE_SRGB_DECODE_EXT parameter? - * - * RESOLVED: When the TEXTURE_SRGB_DECODE parameter is DECODE_EXT - * for an sRGB texture, mipmap generation should decode sRGB texels - * to a linear RGB color space, perform downsampling, then encode - * back to an sRGB color space. (Issue 24 in the EXT_texture_sRGB - * specification provides a rationale for why.) When the parameter - * is SKIP_DECODE_EXT instead, mipmap generation skips the encode - * and decode steps during mipmap generation. By skipping the - * encode and decode steps, sRGB mipmap generation should match - * the mipmap generation for a non-sRGB texture." - */ - bool do_srgb = tex_obj->Sampler.Attrib.sRGBDecode == GL_DECODE_EXT; - - for (unsigned dst_level = base_level + 1; - dst_level <= last_level; - dst_level++) { - - const unsigned src_level = dst_level - 1; - - for (unsigned layer = first_layer; layer <= last_layer; layer++) { - brw_blorp_blit_miptrees(brw, mt, src_level, layer, format, - SWIZZLE_XYZW, mt, dst_level, layer, format, - 0, 0, - minify(base_size->width, src_level), - minify(base_size->height, src_level), - 0, 0, - minify(base_size->width, dst_level), - minify(base_size->height, dst_level), - GL_LINEAR, false, false, - do_srgb, do_srgb); - } - } -} diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c deleted file mode 100644 index b3acdec..0000000 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ /dev/null @@ -1,256 +0,0 @@ -/* - * Copyright © 2013 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file brw_vec4_gs.c - * - * State atom for client-programmable geometry shaders, and support code. - */ - -#include "brw_gs.h" -#include "brw_context.h" -#include "brw_state.h" -#include "brw_ff_gs.h" -#include "compiler/brw_nir.h" -#include "brw_program.h" -#include "compiler/glsl/ir_uniform.h" - -static void -assign_gs_binding_table_offsets(const struct intel_device_info *devinfo, - const struct gl_program *prog, - struct brw_gs_prog_data *prog_data) -{ - /* In gfx6 we reserve the first BRW_MAX_SOL_BINDINGS entries for transform - * feedback surfaces. - */ - uint32_t reserved = devinfo->ver == 6 ? BRW_MAX_SOL_BINDINGS : 0; - - brw_assign_common_binding_table_offsets(devinfo, prog, - &prog_data->base.base, reserved); -} - -static void -brw_gfx6_xfb_setup(const struct gl_transform_feedback_info *linked_xfb_info, - struct brw_gs_prog_data *gs_prog_data) -{ - static const unsigned swizzle_for_offset[4] = { - BRW_SWIZZLE4(0, 1, 2, 3), - BRW_SWIZZLE4(1, 2, 3, 3), - BRW_SWIZZLE4(2, 3, 3, 3), - BRW_SWIZZLE4(3, 3, 3, 3) - }; - - int i; - - /* Make sure that the VUE slots won't overflow the unsigned chars in - * prog_data->transform_feedback_bindings[]. - */ - STATIC_ASSERT(BRW_VARYING_SLOT_COUNT <= 256); - - /* Make sure that we don't need more binding table entries than we've - * set aside for use in transform feedback. (We shouldn't, since we - * set aside enough binding table entries to have one per component). - */ - assert(linked_xfb_info->NumOutputs <= BRW_MAX_SOL_BINDINGS); - - gs_prog_data->num_transform_feedback_bindings = linked_xfb_info->NumOutputs; - for (i = 0; i < gs_prog_data->num_transform_feedback_bindings; i++) { - gs_prog_data->transform_feedback_bindings[i] = - linked_xfb_info->Outputs[i].OutputRegister; - gs_prog_data->transform_feedback_swizzles[i] = - swizzle_for_offset[linked_xfb_info->Outputs[i].ComponentOffset]; - } -} -static bool -brw_codegen_gs_prog(struct brw_context *brw, - struct brw_program *gp, - struct brw_gs_prog_key *key) -{ - struct brw_compiler *compiler = brw->screen->compiler; - const struct intel_device_info *devinfo = &brw->screen->devinfo; - struct brw_stage_state *stage_state = &brw->gs.base; - struct brw_gs_prog_data prog_data; - bool start_busy = false; - double start_time = 0; - - memset(&prog_data, 0, sizeof(prog_data)); - - void *mem_ctx = ralloc_context(NULL); - - nir_shader *nir = nir_shader_clone(mem_ctx, gp->program.nir); - - assign_gs_binding_table_offsets(devinfo, &gp->program, &prog_data); - - brw_nir_setup_glsl_uniforms(mem_ctx, nir, &gp->program, - &prog_data.base.base, - compiler->scalar_stage[MESA_SHADER_GEOMETRY]); - if (brw->can_push_ubos) { - brw_nir_analyze_ubo_ranges(compiler, nir, NULL, - prog_data.base.base.ubo_ranges); - } - - uint64_t outputs_written = nir->info.outputs_written; - - brw_compute_vue_map(devinfo, - &prog_data.base.vue_map, outputs_written, - gp->program.info.separate_shader, 1); - - if (devinfo->ver == 6) - brw_gfx6_xfb_setup(gp->program.sh.LinkedTransformFeedback, - &prog_data); - - int st_index = -1; - if (INTEL_DEBUG(DEBUG_SHADER_TIME)) - st_index = brw_get_shader_time_index(brw, &gp->program, ST_GS, true); - - if (unlikely(brw->perf_debug)) { - start_busy = brw->batch.last_bo && brw_bo_busy(brw->batch.last_bo); - start_time = get_time(); - } - - char *error_str; - const unsigned *program = - brw_compile_gs(brw->screen->compiler, brw, mem_ctx, key, - &prog_data, nir, st_index, - NULL, &error_str); - if (program == NULL) { - ralloc_strcat(&gp->program.sh.data->InfoLog, error_str); - _mesa_problem(NULL, "Failed to compile geometry shader: %s\n", error_str); - - ralloc_free(mem_ctx); - return false; - } - - if (unlikely(brw->perf_debug)) { - if (gp->compiled_once) { - brw_debug_recompile(brw, MESA_SHADER_GEOMETRY, gp->program.Id, - &key->base); - } - if (start_busy && !brw_bo_busy(brw->batch.last_bo)) { - perf_debug("GS compile took %.03f ms and stalled the GPU\n", - (get_time() - start_time) * 1000); - } - gp->compiled_once = true; - } - - /* Scratch space is used for register spilling */ - brw_alloc_stage_scratch(brw, stage_state, - prog_data.base.base.total_scratch); - - /* The param and pull_param arrays will be freed by the shader cache. */ - ralloc_steal(NULL, prog_data.base.base.param); - ralloc_steal(NULL, prog_data.base.base.pull_param); - brw_upload_cache(&brw->cache, BRW_CACHE_GS_PROG, - key, sizeof(*key), - program, prog_data.base.base.program_size, - &prog_data, sizeof(prog_data), - &stage_state->prog_offset, &brw->gs.base.prog_data); - ralloc_free(mem_ctx); - - return true; -} - -static bool -brw_gs_state_dirty(const struct brw_context *brw) -{ - return brw_state_dirty(brw, - _NEW_TEXTURE, - BRW_NEW_GEOMETRY_PROGRAM | - BRW_NEW_TRANSFORM_FEEDBACK); -} - -void -brw_gs_populate_key(struct brw_context *brw, - struct brw_gs_prog_key *key) -{ - struct gl_context *ctx = &brw->ctx; - struct brw_program *gp = - (struct brw_program *) brw->programs[MESA_SHADER_GEOMETRY]; - - memset(key, 0, sizeof(*key)); - - brw_populate_base_prog_key(ctx, gp, &key->base); -} - -void -brw_upload_gs_prog(struct brw_context *brw) -{ - struct brw_stage_state *stage_state = &brw->gs.base; - struct brw_gs_prog_key key; - /* BRW_NEW_GEOMETRY_PROGRAM */ - struct brw_program *gp = - (struct brw_program *) brw->programs[MESA_SHADER_GEOMETRY]; - - if (!brw_gs_state_dirty(brw)) - return; - - brw_gs_populate_key(brw, &key); - - if (brw_search_cache(&brw->cache, BRW_CACHE_GS_PROG, &key, sizeof(key), - &stage_state->prog_offset, &brw->gs.base.prog_data, - true)) - return; - - if (brw_disk_cache_upload_program(brw, MESA_SHADER_GEOMETRY)) - return; - - gp = (struct brw_program *) brw->programs[MESA_SHADER_GEOMETRY]; - gp->id = key.base.program_string_id; - - ASSERTED bool success = brw_codegen_gs_prog(brw, gp, &key); - assert(success); -} - -void -brw_gs_populate_default_key(const struct brw_compiler *compiler, - struct brw_gs_prog_key *key, - struct gl_program *prog) -{ - const struct intel_device_info *devinfo = compiler->devinfo; - - memset(key, 0, sizeof(*key)); - - brw_populate_default_base_prog_key(devinfo, brw_program(prog), - &key->base); -} - -bool -brw_gs_precompile(struct gl_context *ctx, struct gl_program *prog) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_gs_prog_key key; - uint32_t old_prog_offset = brw->gs.base.prog_offset; - struct brw_stage_prog_data *old_prog_data = brw->gs.base.prog_data; - bool success; - - struct brw_program *bgp = brw_program(prog); - - brw_gs_populate_default_key(brw->screen->compiler, &key, prog); - - success = brw_codegen_gs_prog(brw, bgp, &key); - - brw->gs.base.prog_offset = old_prog_offset; - brw->gs.base.prog_data = old_prog_data; - - return success; -} diff --git a/src/mesa/drivers/dri/i965/brw_gs.h b/src/mesa/drivers/dri/i965/brw_gs.h deleted file mode 100644 index 7dab548..0000000 --- a/src/mesa/drivers/dri/i965/brw_gs.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright © 2013 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#ifndef BRW_VEC4_GS_H -#define BRW_VEC4_GS_H - -#include - -#include "brw_context.h" - -#ifdef __cplusplus -extern "C" { -#endif - -struct gl_shader_program; - -void -brw_upload_gs_prog(struct brw_context *brw); - -void -brw_gs_populate_key(struct brw_context *brw, - struct brw_gs_prog_key *key); -void -brw_gs_populate_default_key(const struct brw_compiler *compiler, - struct brw_gs_prog_key *key, - struct gl_program *prog); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* BRW_VEC4_GS_H */ diff --git a/src/mesa/drivers/dri/i965/brw_gs_surface_state.c b/src/mesa/drivers/dri/i965/brw_gs_surface_state.c deleted file mode 100644 index 6f2629e..0000000 --- a/src/mesa/drivers/dri/i965/brw_gs_surface_state.c +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Copyright © 2013 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "main/mtypes.h" -#include "program/prog_parameter.h" -#include "main/shaderapi.h" - -#include "brw_context.h" -#include "brw_state.h" - - -/* Creates a new GS constant buffer reflecting the current GS program's - * constants, if needed by the GS program. - * - * Otherwise, constants go through the CURBEs using the brw_constant_buffer - * state atom. - */ -static void -brw_upload_gs_pull_constants(struct brw_context *brw) -{ - struct brw_stage_state *stage_state = &brw->gs.base; - - /* BRW_NEW_GEOMETRY_PROGRAM */ - struct brw_program *gp = - (struct brw_program *) brw->programs[MESA_SHADER_GEOMETRY]; - - if (!gp) - return; - - /* BRW_NEW_GS_PROG_DATA */ - const struct brw_stage_prog_data *prog_data = brw->gs.base.prog_data; - - _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_GEOMETRY); - /* _NEW_PROGRAM_CONSTANTS */ - brw_upload_pull_constants(brw, BRW_NEW_GS_CONSTBUF, &gp->program, - stage_state, prog_data); -} - -const struct brw_tracked_state brw_gs_pull_constants = { - .dirty = { - .mesa = _NEW_PROGRAM_CONSTANTS, - .brw = BRW_NEW_BATCH | - BRW_NEW_GEOMETRY_PROGRAM | - BRW_NEW_GS_PROG_DATA, - }, - .emit = brw_upload_gs_pull_constants, -}; - -static void -brw_upload_gs_ubo_surfaces(struct brw_context *brw) -{ - struct gl_context *ctx = &brw->ctx; - - /* _NEW_PROGRAM */ - struct gl_program *prog = - ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY]; - - /* BRW_NEW_GS_PROG_DATA */ - struct brw_stage_prog_data *prog_data = brw->gs.base.prog_data; - - brw_upload_ubo_surfaces(brw, prog, &brw->gs.base, prog_data); -} - -const struct brw_tracked_state brw_gs_ubo_surfaces = { - .dirty = { - .mesa = _NEW_PROGRAM, - .brw = BRW_NEW_BATCH | - BRW_NEW_GS_PROG_DATA | - BRW_NEW_UNIFORM_BUFFER, - }, - .emit = brw_upload_gs_ubo_surfaces, -}; - -static void -brw_upload_gs_image_surfaces(struct brw_context *brw) -{ - /* BRW_NEW_GEOMETRY_PROGRAM */ - const struct gl_program *gp = brw->programs[MESA_SHADER_GEOMETRY]; - - if (gp) { - /* BRW_NEW_GS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */ - brw_upload_image_surfaces(brw, gp, &brw->gs.base, - brw->gs.base.prog_data); - } -} - -const struct brw_tracked_state brw_gs_image_surfaces = { - .dirty = { - .mesa = _NEW_TEXTURE, - .brw = BRW_NEW_BATCH | - BRW_NEW_AUX_STATE | - BRW_NEW_GEOMETRY_PROGRAM | - BRW_NEW_GS_PROG_DATA | - BRW_NEW_IMAGE_UNITS, - }, - .emit = brw_upload_gs_image_surfaces, -}; diff --git a/src/mesa/drivers/dri/i965/brw_image.h b/src/mesa/drivers/dri/i965/brw_image.h deleted file mode 100644 index 6a976eb..0000000 --- a/src/mesa/drivers/dri/i965/brw_image.h +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Copyright 2006 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef BRW_IMAGE_H -#define BRW_IMAGE_H - -/** @file intel_image.h - * - * Structure definitions and prototypes for __DRIimage, the driver-private - * structure backing EGLImage or a drawable in DRI3. - * - * The __DRIimage is passed around the loader code (src/glx and src/egl), but - * it's opaque to that code and may only be accessed by loader extensions - * (mostly located in brw_screen.c). - */ - -#include -#include - -#include "main/mtypes.h" -#include "brw_bufmgr.h" -#include - -#ifdef __cplusplus -extern "C" { -#endif - -/** - * Used with images created with image_from_names - * to help support planar images. - */ -struct brw_image_format { - int fourcc; - int components; - int nplanes; - struct { - int buffer_index; - int width_shift; - int height_shift; - uint32_t dri_format; - int cpp; - } planes[3]; - float scaling_factor; -}; - -struct __DRIimageRec { - struct brw_screen *screen; - struct brw_bo *bo; - uint32_t pitch; /**< in bytes */ - GLenum internal_format; - uint32_t dri_format; - GLuint format; /**< mesa_format or mesa_array_format */ - uint64_t modifier; /**< fb modifier (fourcc) */ - uint32_t offset; - - /* - * Need to save these here between calls to - * image_from_names and calls to image_from_planar. - */ - uint32_t strides[3]; - uint32_t offsets[3]; - const struct brw_image_format *planar_format; - - /* particular miptree level */ - GLuint width; - GLuint height; - GLuint tile_x; - GLuint tile_y; - bool has_depthstencil; - bool imported_dmabuf; - - /** Offset of the auxiliary compression surface in the bo. */ - uint32_t aux_offset; - - /** Pitch of the auxiliary compression surface. */ - uint32_t aux_pitch; - - /** Total size in bytes of the auxiliary compression surface. */ - uint32_t aux_size; - - /** - * Provided by EGL_EXT_image_dma_buf_import. - * \{ - */ - enum __DRIYUVColorSpace yuv_color_space; - enum __DRISampleRange sample_range; - enum __DRIChromaSiting horizontal_siting; - enum __DRIChromaSiting vertical_siting; - /* \} */ - - __DRIscreen *driScrnPriv; - - void *loader_private; -}; - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/mesa/drivers/dri/i965/brw_link.cpp b/src/mesa/drivers/dri/i965/brw_link.cpp deleted file mode 100644 index 974543e..0000000 --- a/src/mesa/drivers/dri/i965/brw_link.cpp +++ /dev/null @@ -1,401 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "brw_context.h" -#include "compiler/brw_nir.h" -#include "brw_program.h" -#include "compiler/glsl/gl_nir.h" -#include "compiler/glsl/gl_nir_linker.h" -#include "compiler/glsl/ir.h" -#include "compiler/glsl/ir_optimization.h" -#include "compiler/glsl/program.h" -#include "compiler/nir/nir_serialize.h" -#include "program/program.h" -#include "main/glspirv.h" -#include "main/mtypes.h" -#include "main/shaderapi.h" -#include "main/shaderobj.h" -#include "main/uniforms.h" - -/** - * Performs a compile of the shader stages even when we don't know - * what non-orthogonal state will be set, in the hope that it reflects - * the eventual NOS used, and thus allows us to produce link failures. - */ -static bool -brw_shader_precompile(struct gl_context *ctx, - struct gl_shader_program *sh_prog) -{ - struct gl_linked_shader *vs = sh_prog->_LinkedShaders[MESA_SHADER_VERTEX]; - struct gl_linked_shader *tcs = sh_prog->_LinkedShaders[MESA_SHADER_TESS_CTRL]; - struct gl_linked_shader *tes = sh_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL]; - struct gl_linked_shader *gs = sh_prog->_LinkedShaders[MESA_SHADER_GEOMETRY]; - struct gl_linked_shader *fs = sh_prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; - struct gl_linked_shader *cs = sh_prog->_LinkedShaders[MESA_SHADER_COMPUTE]; - - if (fs && !brw_fs_precompile(ctx, fs->Program)) - return false; - - if (gs && !brw_gs_precompile(ctx, gs->Program)) - return false; - - if (tes && !brw_tes_precompile(ctx, sh_prog, tes->Program)) - return false; - - if (tcs && !brw_tcs_precompile(ctx, sh_prog, tcs->Program)) - return false; - - if (vs && !brw_vs_precompile(ctx, vs->Program)) - return false; - - if (cs && !brw_cs_precompile(ctx, cs->Program)) - return false; - - return true; -} - -static void -brw_lower_packing_builtins(struct brw_context *brw, - exec_list *ir) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - /* Gens < 7 don't have instructions to convert to or from half-precision, - * and Gens < 6 don't expose that functionality. - */ - if (devinfo->ver != 6) - return; - - lower_packing_builtins(ir, LOWER_PACK_HALF_2x16 | LOWER_UNPACK_HALF_2x16); -} - -static void -process_glsl_ir(struct brw_context *brw, - struct gl_shader_program *shader_prog, - struct gl_linked_shader *shader) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - struct gl_context *ctx = &brw->ctx; - - /* Temporary memory context for any new IR. */ - void *mem_ctx = ralloc_context(NULL); - - ralloc_adopt(mem_ctx, shader->ir); - - if (shader->Stage == MESA_SHADER_FRAGMENT) { - lower_blend_equation_advanced( - shader, ctx->Extensions.KHR_blend_equation_advanced_coherent); - } - - /* lower_packing_builtins() inserts arithmetic instructions, so it - * must precede lower_instructions(). - */ - brw_lower_packing_builtins(brw, shader->ir); - do_mat_op_to_vec(shader->ir); - - unsigned instructions_to_lower = (DIV_TO_MUL_RCP | - SUB_TO_ADD_NEG | - EXP_TO_EXP2 | - LOG_TO_LOG2 | - DFREXP_DLDEXP_TO_ARITH); - if (devinfo->ver < 7) { - instructions_to_lower |= BIT_COUNT_TO_MATH | - EXTRACT_TO_SHIFTS | - INSERT_TO_SHIFTS | - REVERSE_TO_SHIFTS; - } - - lower_instructions(shader->ir, instructions_to_lower); - - /* Pre-gfx6 HW can only nest if-statements 16 deep. Beyond this, - * if-statements need to be flattened. - */ - if (devinfo->ver < 6) - lower_if_to_cond_assign(shader->Stage, shader->ir, 16); - - do_vec_index_to_cond_assign(shader->ir); - lower_vector_insert(shader->ir, true); - lower_offset_arrays(shader->ir); - lower_quadop_vector(shader->ir, false); - - validate_ir_tree(shader->ir); - - /* Now that we've finished altering the linked IR, reparent any live IR back - * to the permanent memory context, and free the temporary one (discarding any - * junk we optimized away). - */ - reparent_ir(shader->ir, shader->ir); - ralloc_free(mem_ctx); - - if (ctx->_Shader->Flags & GLSL_DUMP) { - fprintf(stderr, "\n"); - if (shader->ir) { - fprintf(stderr, "GLSL IR for linked %s program %d:\n", - _mesa_shader_stage_to_string(shader->Stage), - shader_prog->Name); - _mesa_print_ir(stderr, shader->ir, NULL); - } else { - fprintf(stderr, "No GLSL IR for linked %s program %d (shader may be " - "from cache)\n", _mesa_shader_stage_to_string(shader->Stage), - shader_prog->Name); - } - fprintf(stderr, "\n"); - } -} - -static void -unify_interfaces(struct shader_info **infos) -{ - struct shader_info *prev_info = NULL; - - for (unsigned i = MESA_SHADER_VERTEX; i < MESA_SHADER_FRAGMENT; i++) { - if (!infos[i]) - continue; - - if (prev_info) { - prev_info->outputs_written |= infos[i]->inputs_read & - ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER); - infos[i]->inputs_read |= prev_info->outputs_written & - ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER); - - prev_info->patch_outputs_written |= infos[i]->patch_inputs_read; - infos[i]->patch_inputs_read |= prev_info->patch_outputs_written; - } - prev_info = infos[i]; - } -} - -static void -update_xfb_info(struct gl_transform_feedback_info *xfb_info, - struct shader_info *info) -{ - if (!xfb_info) - return; - - for (unsigned i = 0; i < xfb_info->NumOutputs; i++) { - struct gl_transform_feedback_output *output = &xfb_info->Outputs[i]; - - /* The VUE header contains three scalar fields packed together: - * - gl_PointSize is stored in VARYING_SLOT_PSIZ.w - * - gl_Layer is stored in VARYING_SLOT_PSIZ.y - * - gl_ViewportIndex is stored in VARYING_SLOT_PSIZ.z - */ - switch (output->OutputRegister) { - case VARYING_SLOT_LAYER: - assert(output->NumComponents == 1); - output->OutputRegister = VARYING_SLOT_PSIZ; - output->ComponentOffset = 1; - break; - case VARYING_SLOT_VIEWPORT: - assert(output->NumComponents == 1); - output->OutputRegister = VARYING_SLOT_PSIZ; - output->ComponentOffset = 2; - break; - case VARYING_SLOT_PSIZ: - assert(output->NumComponents == 1); - output->ComponentOffset = 3; - break; - } - - info->outputs_written |= 1ull << output->OutputRegister; - } -} - -extern "C" GLboolean -brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg) -{ - struct brw_context *brw = brw_context(ctx); - const struct brw_compiler *compiler = brw->screen->compiler; - unsigned int stage; - struct shader_info *infos[MESA_SHADER_STAGES] = { 0, }; - - if (shProg->data->LinkStatus == LINKING_SKIPPED) - return GL_TRUE; - - for (stage = 0; stage < ARRAY_SIZE(shProg->_LinkedShaders); stage++) { - struct gl_linked_shader *shader = shProg->_LinkedShaders[stage]; - if (!shader) - continue; - - struct gl_program *prog = shader->Program; - prog->Parameters = _mesa_new_parameter_list(); - - if (!shader->spirv_data) - process_glsl_ir(brw, shProg, shader); - - _mesa_copy_linked_program_data(shProg, shader); - - prog->ShadowSamplers = shader->shadow_samplers; - - bool debug_enabled = - INTEL_DEBUG(intel_debug_flag_for_shader_stage(shader->Stage)); - - if (debug_enabled && shader->ir) { - fprintf(stderr, "GLSL IR for native %s shader %d:\n", - _mesa_shader_stage_to_string(shader->Stage), shProg->Name); - _mesa_print_ir(stderr, shader->ir, NULL); - fprintf(stderr, "\n\n"); - } - - prog->nir = brw_create_nir(brw, shProg, prog, (gl_shader_stage) stage, - compiler->scalar_stage[stage]); - } - - /* TODO: Verify if its feasible to split up the NIR linking work into a - * per-stage part (that fill out information we need for the passes) and a - * actual linking part, so that we could fold back brw_nir_lower_resources - * back into brw_create_nir. - */ - - /* SPIR-V programs use a NIR linker */ - if (shProg->data->spirv) { - static const gl_nir_linker_options opts = { - .fill_parameters = false, - }; - if (!gl_nir_link_spirv(ctx, shProg, &opts)) - return GL_FALSE; - } - - for (stage = 0; stage < ARRAY_SIZE(shProg->_LinkedShaders); stage++) { - struct gl_linked_shader *shader = shProg->_LinkedShaders[stage]; - if (!shader) - continue; - - struct gl_program *prog = shader->Program; - - brw_nir_lower_resources(prog->nir, shProg, prog, &brw->screen->devinfo); - - NIR_PASS_V(prog->nir, brw_nir_lower_gl_images, prog); - } - - /* Determine first and last stage. */ - unsigned first = MESA_SHADER_STAGES; - unsigned last = 0; - for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { - if (!shProg->_LinkedShaders[i]) - continue; - if (first == MESA_SHADER_STAGES) - first = i; - last = i; - } - - /* Linking the stages in the opposite order (from fragment to vertex) - * ensures that inter-shader outputs written to in an earlier stage - * are eliminated if they are (transitively) not used in a later - * stage. - * - * TODO: Look into Shadow of Mordor regressions on HSW and enable this for - * all platforms. See: https://bugs.freedesktop.org/show_bug.cgi?id=103537 - */ - if (first != last && brw->screen->devinfo.ver >= 8) { - int next = last; - for (int i = next - 1; i >= 0; i--) { - if (shProg->_LinkedShaders[i] == NULL) - continue; - - brw_nir_link_shaders(compiler, - shProg->_LinkedShaders[i]->Program->nir, - shProg->_LinkedShaders[next]->Program->nir); - next = i; - } - } - - for (stage = 0; stage < ARRAY_SIZE(shProg->_LinkedShaders); stage++) { - struct gl_linked_shader *shader = shProg->_LinkedShaders[stage]; - if (!shader) - continue; - - struct gl_program *prog = shader->Program; - - _mesa_update_shader_textures_used(shProg, prog); - - brw_shader_gather_info(prog->nir, prog); - - NIR_PASS_V(prog->nir, gl_nir_lower_atomics, shProg, false); - NIR_PASS_V(prog->nir, nir_lower_atomics_to_ssbo); - - nir_sweep(prog->nir); - - infos[stage] = &prog->nir->info; - - update_xfb_info(prog->sh.LinkedTransformFeedback, infos[stage]); - - /* Make a pass over the IR to add state references for any built-in - * uniforms that are used. This has to be done now (during linking). - * Code generation doesn't happen until the first time this shader is - * used for rendering. Waiting until then to generate the parameters is - * too late. At that point, the values for the built-in uniforms won't - * get sent to the shader. - */ - nir_foreach_uniform_variable(var, prog->nir) { - const nir_state_slot *const slots = var->state_slots; - for (unsigned int i = 0; i < var->num_state_slots; i++) { - assert(slots != NULL); - _mesa_add_state_reference(prog->Parameters, slots[i].tokens); - } - } - } - - /* The linker tries to dead code eliminate unused varying components, - * and make sure interfaces match. But it isn't able to do so in all - * cases. So, explicitly make the interfaces match by OR'ing together - * the inputs_read/outputs_written bitfields of adjacent stages. - */ - if (!shProg->SeparateShader) - unify_interfaces(infos); - - if ((ctx->_Shader->Flags & GLSL_DUMP) && shProg->Name != 0) { - for (unsigned i = 0; i < shProg->NumShaders; i++) { - const struct gl_shader *sh = shProg->Shaders[i]; - if (!sh) - continue; - - fprintf(stderr, "GLSL %s shader %d source for linked program %d:\n", - _mesa_shader_stage_to_string(sh->Stage), - i, shProg->Name); - fprintf(stderr, "%s", sh->Source); - fprintf(stderr, "\n"); - } - } - - if (brw->precompile && !brw_shader_precompile(ctx, shProg)) - return GL_FALSE; - - /* SPIR-V programs build its resource list from linked NIR shaders. */ - if (!shProg->data->spirv) - build_program_resource_list(ctx, shProg, false); - else - nir_build_program_resource_list(ctx, shProg, true); - - for (stage = 0; stage < ARRAY_SIZE(shProg->_LinkedShaders); stage++) { - struct gl_linked_shader *shader = shProg->_LinkedShaders[stage]; - if (!shader) - continue; - - /* The GLSL IR won't be needed anymore. */ - ralloc_free(shader->ir); - shader->ir = NULL; - } - - return GL_TRUE; -} diff --git a/src/mesa/drivers/dri/i965/brw_meta_util.c b/src/mesa/drivers/dri/i965/brw_meta_util.c deleted file mode 100644 index eca367d..0000000 --- a/src/mesa/drivers/dri/i965/brw_meta_util.c +++ /dev/null @@ -1,422 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "brw_context.h" -#include "brw_defines.h" -#include "brw_fbo.h" -#include "brw_meta_util.h" -#include "brw_state.h" -#include "main/blend.h" -#include "main/fbobject.h" -#include "util/format_srgb.h" - -/** - * Helper function for handling mirror image blits. - * - * If coord0 > coord1, swap them and invert the "mirror" boolean. - */ -static inline void -fixup_mirroring(bool *mirror, float *coord0, float *coord1) -{ - if (*coord0 > *coord1) { - *mirror = !*mirror; - float tmp = *coord0; - *coord0 = *coord1; - *coord1 = tmp; - } -} - -/** - * Compute the number of pixels to clip for each side of a rect - * - * \param x0 The rect's left coordinate - * \param y0 The rect's bottom coordinate - * \param x1 The rect's right coordinate - * \param y1 The rect's top coordinate - * \param min_x The clipping region's left coordinate - * \param min_y The clipping region's bottom coordinate - * \param max_x The clipping region's right coordinate - * \param max_y The clipping region's top coordinate - * \param clipped_x0 The number of pixels to clip from the left side - * \param clipped_y0 The number of pixels to clip from the bottom side - * \param clipped_x1 The number of pixels to clip from the right side - * \param clipped_y1 The number of pixels to clip from the top side - * - * \return false if we clip everything away, true otherwise - */ -static inline bool -compute_pixels_clipped(float x0, float y0, float x1, float y1, - float min_x, float min_y, float max_x, float max_y, - float *clipped_x0, float *clipped_y0, float *clipped_x1, float *clipped_y1) -{ - /* If we are going to clip everything away, stop. */ - if (!(min_x <= max_x && - min_y <= max_y && - x0 <= max_x && - y0 <= max_y && - min_x <= x1 && - min_y <= y1 && - x0 <= x1 && - y0 <= y1)) { - return false; - } - - if (x0 < min_x) - *clipped_x0 = min_x - x0; - else - *clipped_x0 = 0; - if (max_x < x1) - *clipped_x1 = x1 - max_x; - else - *clipped_x1 = 0; - - if (y0 < min_y) - *clipped_y0 = min_y - y0; - else - *clipped_y0 = 0; - if (max_y < y1) - *clipped_y1 = y1 - max_y; - else - *clipped_y1 = 0; - - return true; -} - -/** - * Clips a coordinate (left, right, top or bottom) for the src or dst rect - * (whichever requires the largest clip) and adjusts the coordinate - * for the other rect accordingly. - * - * \param mirror true if mirroring is required - * \param src the source rect coordinate (for example srcX0) - * \param dst0 the dst rect coordinate (for example dstX0) - * \param dst1 the opposite dst rect coordinate (for example dstX1) - * \param clipped_src0 number of pixels to clip from the src coordinate - * \param clipped_dst0 number of pixels to clip from the dst coordinate - * \param clipped_dst1 number of pixels to clip from the opposite dst coordinate - * \param scale the src vs dst scale involved for that coordinate - * \param isLeftOrBottom true if we are clipping the left or bottom sides - * of the rect. - */ -static inline void -clip_coordinates(bool mirror, - float *src, float *dst0, float *dst1, - float clipped_src0, - float clipped_dst0, - float clipped_dst1, - float scale, - bool isLeftOrBottom) -{ - /* When clipping we need to add or subtract pixels from the original - * coordinates depending on whether we are acting on the left/bottom - * or right/top sides of the rect respectively. We assume we have to - * add them in the code below, and multiply by -1 when we should - * subtract. - */ - int mult = isLeftOrBottom ? 1 : -1; - - if (!mirror) { - if (clipped_src0 >= clipped_dst0 * scale) { - *src += clipped_src0 * mult; - *dst0 += clipped_src0 / scale * mult; - } else { - *dst0 += clipped_dst0 * mult; - *src += clipped_dst0 * scale * mult; - } - } else { - if (clipped_src0 >= clipped_dst1 * scale) { - *src += clipped_src0 * mult; - *dst1 -= clipped_src0 / scale * mult; - } else { - *dst1 -= clipped_dst1 * mult; - *src += clipped_dst1 * scale * mult; - } - } -} - -bool -brw_meta_mirror_clip_and_scissor(const struct gl_context *ctx, - const struct gl_framebuffer *read_fb, - const struct gl_framebuffer *draw_fb, - GLfloat *srcX0, GLfloat *srcY0, - GLfloat *srcX1, GLfloat *srcY1, - GLfloat *dstX0, GLfloat *dstY0, - GLfloat *dstX1, GLfloat *dstY1, - bool *mirror_x, bool *mirror_y) -{ - *mirror_x = false; - *mirror_y = false; - - /* Detect if the blit needs to be mirrored */ - fixup_mirroring(mirror_x, srcX0, srcX1); - fixup_mirroring(mirror_x, dstX0, dstX1); - fixup_mirroring(mirror_y, srcY0, srcY1); - fixup_mirroring(mirror_y, dstY0, dstY1); - - /* Compute number of pixels to clip for each side of both rects. Return - * early if we are going to clip everything away. - */ - float clip_src_x0; - float clip_src_x1; - float clip_src_y0; - float clip_src_y1; - float clip_dst_x0; - float clip_dst_x1; - float clip_dst_y0; - float clip_dst_y1; - - if (!compute_pixels_clipped(*srcX0, *srcY0, *srcX1, *srcY1, - 0, 0, read_fb->Width, read_fb->Height, - &clip_src_x0, &clip_src_y0, &clip_src_x1, &clip_src_y1)) - return true; - - if (!compute_pixels_clipped(*dstX0, *dstY0, *dstX1, *dstY1, - draw_fb->_Xmin, draw_fb->_Ymin, draw_fb->_Xmax, draw_fb->_Ymax, - &clip_dst_x0, &clip_dst_y0, &clip_dst_x1, &clip_dst_y1)) - return true; - - /* When clipping any of the two rects we need to adjust the coordinates in - * the other rect considering the scaling factor involved. To obtain the best - * precision we want to make sure that we only clip once per side to avoid - * accumulating errors due to the scaling adjustment. - * - * For example, if srcX0 and dstX0 need both to be clipped we want to avoid - * the situation where we clip srcX0 first, then adjust dstX0 accordingly - * but then we realize that the resulting dstX0 still needs to be clipped, - * so we clip dstX0 and adjust srcX0 again. Because we are applying scaling - * factors to adjust the coordinates in each clipping pass we lose some - * precision and that can affect the results of the blorp blit operation - * slightly. What we want to do here is detect the rect that we should - * clip first for each side so that when we adjust the other rect we ensure - * the resulting coordinate does not need to be clipped again. - * - * The code below implements this by comparing the number of pixels that - * we need to clip for each side of both rects considering the scales - * involved. For example, clip_src_x0 represents the number of pixels to be - * clipped for the src rect's left side, so if clip_src_x0 = 5, - * clip_dst_x0 = 4 and scaleX = 2 it means that we are clipping more from - * the dst rect so we should clip dstX0 only and adjust srcX0. This is - * because clipping 4 pixels in the dst is equivalent to clipping - * 4 * 2 = 8 > 5 in the src. - */ - - if (*srcX0 == *srcX1 || *srcY0 == *srcY1 - || *dstX0 == *dstX1 || *dstY0 == *dstY1) - return true; - - float scaleX = (float) (*srcX1 - *srcX0) / (*dstX1 - *dstX0); - float scaleY = (float) (*srcY1 - *srcY0) / (*dstY1 - *dstY0); - - /* Clip left side */ - clip_coordinates(*mirror_x, - srcX0, dstX0, dstX1, - clip_src_x0, clip_dst_x0, clip_dst_x1, - scaleX, true); - - /* Clip right side */ - clip_coordinates(*mirror_x, - srcX1, dstX1, dstX0, - clip_src_x1, clip_dst_x1, clip_dst_x0, - scaleX, false); - - /* Clip bottom side */ - clip_coordinates(*mirror_y, - srcY0, dstY0, dstY1, - clip_src_y0, clip_dst_y0, clip_dst_y1, - scaleY, true); - - /* Clip top side */ - clip_coordinates(*mirror_y, - srcY1, dstY1, dstY0, - clip_src_y1, clip_dst_y1, clip_dst_y0, - scaleY, false); - - /* Account for the fact that in the system framebuffer, the origin is at - * the lower left. - */ - if (read_fb->FlipY) { - GLint tmp = read_fb->Height - *srcY0; - *srcY0 = read_fb->Height - *srcY1; - *srcY1 = tmp; - *mirror_y = !*mirror_y; - } - if (draw_fb->FlipY) { - GLint tmp = draw_fb->Height - *dstY0; - *dstY0 = draw_fb->Height - *dstY1; - *dstY1 = tmp; - *mirror_y = !*mirror_y; - } - - /* Check for invalid bounds - * Can't blit for 0-dimensions - */ - return *srcX0 == *srcX1 || *srcY0 == *srcY1 - || *dstX0 == *dstX1 || *dstY0 == *dstY1; -} - -/** - * Determine if fast color clear supports the given clear color. - * - * Fast color clear can only clear to color values of 1.0 or 0.0. At the - * moment we only support floating point, unorm, and snorm buffers. - */ -bool -brw_is_color_fast_clear_compatible(struct brw_context *brw, - const struct brw_mipmap_tree *mt, - const union gl_color_union *color) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - const struct gl_context *ctx = &brw->ctx; - - /* If we're mapping the render format to a different format than the - * format we use for texturing then it is a bit questionable whether it - * should be possible to use a fast clear. Although we only actually - * render using a renderable format, without the override workaround it - * wouldn't be possible to have a non-renderable surface in a fast clear - * state so the hardware probably legitimately doesn't need to support - * this case. At least on Gfx9 this really does seem to cause problems. - */ - if (devinfo->ver >= 9 && - brw_isl_format_for_mesa_format(mt->format) != - brw->mesa_to_isl_render_format[mt->format]) - return false; - - const mesa_format format = _mesa_get_render_format(ctx, mt->format); - if (_mesa_is_format_integer_color(format)) { - if (devinfo->ver >= 8) { - perf_debug("Integer fast clear not enabled for (%s)", - _mesa_get_format_name(format)); - } - return false; - } - - for (int i = 0; i < 4; i++) { - if (!_mesa_format_has_color_component(format, i)) { - continue; - } - - if (devinfo->ver < 9 && - color->f[i] != 0.0f && color->f[i] != 1.0f) { - return false; - } - } - return true; -} - -/** - * Convert the given color to a bitfield suitable for ORing into DWORD 7 of - * SURFACE_STATE (DWORD 12-15 on SKL+). - */ -union isl_color_value -brw_meta_convert_fast_clear_color(const struct brw_context *brw, - const struct brw_mipmap_tree *mt, - const union gl_color_union *color) -{ - union isl_color_value override_color = { - .u32 = { - color->ui[0], - color->ui[1], - color->ui[2], - color->ui[3], - }, - }; - - /* The sampler doesn't look at the format of the surface when the fast - * clear color is used so we need to implement luminance, intensity and - * missing components manually. - */ - switch (_mesa_get_format_base_format(mt->format)) { - case GL_INTENSITY: - override_color.u32[3] = override_color.u32[0]; - FALLTHROUGH; - case GL_LUMINANCE: - case GL_LUMINANCE_ALPHA: - override_color.u32[1] = override_color.u32[0]; - override_color.u32[2] = override_color.u32[0]; - break; - default: - for (int i = 0; i < 3; i++) { - if (!_mesa_format_has_color_component(mt->format, i)) - override_color.u32[i] = 0; - } - break; - } - - switch (_mesa_get_format_datatype(mt->format)) { - case GL_UNSIGNED_NORMALIZED: - for (int i = 0; i < 4; i++) - override_color.f32[i] = SATURATE(override_color.f32[i]); - break; - - case GL_SIGNED_NORMALIZED: - for (int i = 0; i < 4; i++) - override_color.f32[i] = CLAMP(override_color.f32[i], -1.0f, 1.0f); - break; - - case GL_UNSIGNED_INT: - for (int i = 0; i < 4; i++) { - unsigned bits = _mesa_get_format_bits(mt->format, GL_RED_BITS + i); - if (bits < 32) { - uint32_t max = (1u << bits) - 1; - override_color.u32[i] = MIN2(override_color.u32[i], max); - } - } - break; - - case GL_INT: - for (int i = 0; i < 4; i++) { - unsigned bits = _mesa_get_format_bits(mt->format, GL_RED_BITS + i); - if (bits < 32) { - int32_t max = (1 << (bits - 1)) - 1; - int32_t min = -(1 << (bits - 1)); - override_color.i32[i] = CLAMP(override_color.i32[i], min, max); - } - } - break; - - case GL_FLOAT: - if (!_mesa_is_format_signed(mt->format)) { - for (int i = 0; i < 4; i++) - override_color.f32[i] = MAX2(override_color.f32[i], 0.0f); - } - break; - } - - if (!_mesa_format_has_color_component(mt->format, 3)) { - if (_mesa_is_format_integer_color(mt->format)) - override_color.u32[3] = 1; - else - override_color.f32[3] = 1.0f; - } - - /* Handle linear to SRGB conversion */ - if (brw->ctx.Color.sRGBEnabled && - _mesa_get_srgb_format_linear(mt->format) != mt->format) { - for (int i = 0; i < 3; i++) { - override_color.f32[i] = - util_format_linear_to_srgb_float(override_color.f32[i]); - } - } - - return override_color; -} diff --git a/src/mesa/drivers/dri/i965/brw_meta_util.h b/src/mesa/drivers/dri/i965/brw_meta_util.h deleted file mode 100644 index c469490..0000000 --- a/src/mesa/drivers/dri/i965/brw_meta_util.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#ifndef BRW_META_UTIL_H -#define BRW_META_UTIL_H - -#include -#include "main/mtypes.h" -#include "brw_mipmap_tree.h" - -#ifdef __cplusplus -extern "C" { -#endif - -bool -brw_meta_mirror_clip_and_scissor(const struct gl_context *ctx, - const struct gl_framebuffer *read_fb, - const struct gl_framebuffer *draw_fb, - GLfloat *srcX0, GLfloat *srcY0, - GLfloat *srcX1, GLfloat *srcY1, - GLfloat *dstX0, GLfloat *dstY0, - GLfloat *dstX1, GLfloat *dstY1, - bool *mirror_x, bool *mirror_y); - -union isl_color_value -brw_meta_convert_fast_clear_color(const struct brw_context *brw, - const struct brw_mipmap_tree *mt, - const union gl_color_union *color); - -bool -brw_is_color_fast_clear_compatible(struct brw_context *brw, - const struct brw_mipmap_tree *mt, - const union gl_color_union *color); - -#ifdef __cplusplus -} -#endif - -#endif /* BRW_META_UTIL_H */ diff --git a/src/mesa/drivers/dri/i965/brw_mipmap_tree.c b/src/mesa/drivers/dri/i965/brw_mipmap_tree.c deleted file mode 100644 index 327340a..0000000 --- a/src/mesa/drivers/dri/i965/brw_mipmap_tree.c +++ /dev/null @@ -1,3308 +0,0 @@ -/* - * Copyright 2006 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include -#include -#include "drm-uapi/drm_fourcc.h" - -#include "brw_batch.h" -#include "brw_image.h" -#include "brw_mipmap_tree.h" -#include "brw_tex.h" -#include "brw_blit.h" -#include "brw_fbo.h" - -#include "brw_blorp.h" -#include "brw_context.h" -#include "brw_state.h" - -#include "main/enums.h" -#include "main/fbobject.h" -#include "main/formats.h" -#include "main/glformats.h" -#include "main/texcompress_etc.h" -#include "main/teximage.h" -#include "main/streaming-load-memcpy.h" - -#include "util/format_srgb.h" -#include "util/u_memory.h" - -#include "x86/common_x86_asm.h" - -#define FILE_DEBUG_FLAG DEBUG_MIPTREE - -static void *brw_miptree_map_raw(struct brw_context *brw, - struct brw_mipmap_tree *mt, - GLbitfield mode); - -static void brw_miptree_unmap_raw(struct brw_mipmap_tree *mt); - -/** - * Return true if the format that will be used to access the miptree is - * CCS_E-compatible with the miptree's linear/non-sRGB format. - * - * Why use the linear format? Well, although the miptree may be specified with - * an sRGB format, the usage of that color space/format can be toggled. Since - * our HW tends to support more linear formats than sRGB ones, we use this - * format variant for check for CCS_E compatibility. - */ -static bool -format_ccs_e_compat_with_miptree(const struct intel_device_info *devinfo, - const struct brw_mipmap_tree *mt, - enum isl_format access_format) -{ - assert(mt->aux_usage == ISL_AUX_USAGE_CCS_E); - - mesa_format linear_format = _mesa_get_srgb_format_linear(mt->format); - enum isl_format isl_format = brw_isl_format_for_mesa_format(linear_format); - return isl_formats_are_ccs_e_compatible(devinfo, isl_format, access_format); -} - -/* Determine if CCS_E is supported for a given platform and mesa format. */ -static bool -format_supports_ccs_e(const struct brw_context *brw, mesa_format format) -{ - /* For now compression is only enabled for integer formats even though - * there exist supported floating point formats also. This is a heuristic - * decision based on current public benchmarks. In none of the cases these - * formats provided any improvement but a few cases were seen to regress. - * Hence these are left to to be enabled in the future when they are known - * to improve things. - */ - if (_mesa_get_format_datatype(format) == GL_FLOAT) - return false; - - /* Many window system buffers are sRGB even if they are never rendered as - * sRGB. For those, we want CCS_E for when sRGBEncode is false. When the - * surface is used as sRGB, we fall back to CCS_D. - */ - mesa_format linear_format = _mesa_get_srgb_format_linear(format); - enum isl_format isl_format = brw_isl_format_for_mesa_format(linear_format); - return isl_format_supports_ccs_e(&brw->screen->devinfo, isl_format); -} - -/** - * Determine depth format corresponding to a depth+stencil format, - * for separate stencil. - */ -mesa_format -brw_depth_format_for_depthstencil_format(mesa_format format) { - switch (format) { - case MESA_FORMAT_Z24_UNORM_S8_UINT: - return MESA_FORMAT_Z24_UNORM_X8_UINT; - case MESA_FORMAT_Z32_FLOAT_S8X24_UINT: - return MESA_FORMAT_Z_FLOAT32; - default: - return format; - } -} - -static bool -create_mapping_table(GLenum target, unsigned first_level, unsigned last_level, - unsigned depth0, struct brw_mipmap_level *table) -{ - for (unsigned level = first_level; level <= last_level; level++) { - const unsigned d = - target == GL_TEXTURE_3D ? minify(depth0, level) : depth0; - - table[level].slice = calloc(d, sizeof(*table[0].slice)); - if (!table[level].slice) - goto unwind; - } - - return true; - -unwind: - for (unsigned level = first_level; level <= last_level; level++) - free(table[level].slice); - - return false; -} - -static bool -needs_separate_stencil(const struct brw_context *brw, - struct brw_mipmap_tree *mt, - mesa_format format) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - if (_mesa_get_format_base_format(format) != GL_DEPTH_STENCIL) - return false; - - if (devinfo->must_use_separate_stencil) - return true; - - return brw->has_separate_stencil && brw->has_hiz; -} - -/** - * Choose the aux usage for this miptree. This function must be called fairly - * late in the miptree create process after we have a tiling. - */ -static void -brw_miptree_choose_aux_usage(struct brw_context *brw, - struct brw_mipmap_tree *mt) -{ - assert(mt->aux_usage == ISL_AUX_USAGE_NONE); - - if (_mesa_is_format_color_format(mt->format)) { - if (mt->surf.samples > 1) { - mt->aux_usage = ISL_AUX_USAGE_MCS; - } else if (!INTEL_DEBUG(DEBUG_NO_RBC) && - format_supports_ccs_e(brw, mt->format)) { - mt->aux_usage = ISL_AUX_USAGE_CCS_E; - } else if (brw->mesa_format_supports_render[mt->format]) { - mt->aux_usage = ISL_AUX_USAGE_CCS_D; - } - } else if (isl_surf_usage_is_depth(mt->surf.usage) && brw->has_hiz) { - mt->aux_usage = ISL_AUX_USAGE_HIZ; - } - - /* We can do fast-clear on all auxiliary surface types that are - * allocated through the normal texture creation paths. - */ - if (mt->aux_usage != ISL_AUX_USAGE_NONE) - mt->supports_fast_clear = true; -} - - -/** - * Choose an appropriate uncompressed format for a requested - * compressed format, if unsupported. - */ -mesa_format -brw_lower_compressed_format(struct brw_context *brw, mesa_format format) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - /* No need to lower ETC formats on these platforms, - * they are supported natively. - */ - if (devinfo->ver >= 8 || devinfo->platform == INTEL_PLATFORM_BYT) - return format; - - switch (format) { - case MESA_FORMAT_ETC1_RGB8: - return MESA_FORMAT_R8G8B8X8_UNORM; - case MESA_FORMAT_ETC2_RGB8: - return MESA_FORMAT_R8G8B8X8_UNORM; - case MESA_FORMAT_ETC2_SRGB8: - case MESA_FORMAT_ETC2_SRGB8_ALPHA8_EAC: - case MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1: - return MESA_FORMAT_B8G8R8A8_SRGB; - case MESA_FORMAT_ETC2_RGBA8_EAC: - case MESA_FORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1: - return MESA_FORMAT_R8G8B8A8_UNORM; - case MESA_FORMAT_ETC2_R11_EAC: - return MESA_FORMAT_R_UNORM16; - case MESA_FORMAT_ETC2_SIGNED_R11_EAC: - return MESA_FORMAT_R_SNORM16; - case MESA_FORMAT_ETC2_RG11_EAC: - return MESA_FORMAT_RG_UNORM16; - case MESA_FORMAT_ETC2_SIGNED_RG11_EAC: - return MESA_FORMAT_RG_SNORM16; - default: - /* Non ETC1 / ETC2 format */ - return format; - } -} - -unsigned -brw_get_num_logical_layers(const struct brw_mipmap_tree *mt, unsigned level) -{ - if (mt->surf.dim == ISL_SURF_DIM_3D) - return minify(mt->surf.logical_level0_px.depth, level); - else - return mt->surf.logical_level0_px.array_len; -} - -UNUSED static unsigned -get_num_phys_layers(const struct isl_surf *surf, unsigned level) -{ - /* In case of physical dimensions one needs to consider also the layout. - * See isl_calc_phys_level0_extent_sa(). - */ - if (surf->dim != ISL_SURF_DIM_3D) - return surf->phys_level0_sa.array_len; - - if (surf->dim_layout == ISL_DIM_LAYOUT_GFX4_2D) - return minify(surf->phys_level0_sa.array_len, level); - - return minify(surf->phys_level0_sa.depth, level); -} - -/** \brief Assert that the level and layer are valid for the miptree. */ -void -brw_miptree_check_level_layer(const struct brw_mipmap_tree *mt, - uint32_t level, - uint32_t layer) -{ - (void) mt; - (void) level; - (void) layer; - - assert(level >= mt->first_level); - assert(level <= mt->last_level); - assert(layer < get_num_phys_layers(&mt->surf, level)); -} - -static enum isl_aux_state ** -create_aux_state_map(struct brw_mipmap_tree *mt, - enum isl_aux_state initial) -{ - const uint32_t levels = mt->last_level + 1; - - uint32_t total_slices = 0; - for (uint32_t level = 0; level < levels; level++) - total_slices += brw_get_num_logical_layers(mt, level); - - const size_t per_level_array_size = levels * sizeof(enum isl_aux_state *); - - /* We're going to allocate a single chunk of data for both the per-level - * reference array and the arrays of aux_state. This makes cleanup - * significantly easier. - */ - const size_t total_size = per_level_array_size + - total_slices * sizeof(enum isl_aux_state); - void *data = malloc(total_size); - if (data == NULL) - return NULL; - - enum isl_aux_state **per_level_arr = data; - enum isl_aux_state *s = data + per_level_array_size; - for (uint32_t level = 0; level < levels; level++) { - per_level_arr[level] = s; - const unsigned level_layers = brw_get_num_logical_layers(mt, level); - for (uint32_t a = 0; a < level_layers; a++) - *(s++) = initial; - } - assert((void *)s == data + total_size); - - return per_level_arr; -} - -static void -free_aux_state_map(enum isl_aux_state **state) -{ - free(state); -} - -static bool -need_to_retile_as_linear(struct brw_context *brw, unsigned blt_pitch, - enum isl_tiling tiling, unsigned samples) -{ - if (samples > 1) - return false; - - if (tiling == ISL_TILING_LINEAR) - return false; - - if (blt_pitch >= 32768) { - perf_debug("blt pitch %u too large to blit, falling back to untiled", - blt_pitch); - return true; - } - - return false; -} - -static bool -need_to_retile_as_x(const struct brw_context *brw, uint64_t size, - enum isl_tiling tiling) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - /* If the BO is too large to fit in the aperture, we need to use the - * BLT engine to support it. Prior to Sandybridge, the BLT paths can't - * handle Y-tiling, so we need to fall back to X. - */ - if (devinfo->ver < 6 && size >= brw->max_gtt_map_object_size && - tiling == ISL_TILING_Y0) - return true; - - return false; -} - -static struct brw_mipmap_tree * -make_surface(struct brw_context *brw, GLenum target, mesa_format format, - unsigned first_level, unsigned last_level, - unsigned width0, unsigned height0, unsigned depth0, - unsigned num_samples, isl_tiling_flags_t tiling_flags, - isl_surf_usage_flags_t isl_usage_flags, uint32_t alloc_flags, - unsigned row_pitch_B, struct brw_bo *bo) -{ - struct brw_mipmap_tree *mt = calloc(sizeof(*mt), 1); - if (!mt) - return NULL; - - if (!create_mapping_table(target, first_level, last_level, depth0, - mt->level)) { - free(mt); - return NULL; - } - - mt->refcount = 1; - - if (target == GL_TEXTURE_CUBE_MAP || - target == GL_TEXTURE_CUBE_MAP_ARRAY) - isl_usage_flags |= ISL_SURF_USAGE_CUBE_BIT; - - DBG("%s: %s %s %ux %u:%u:%u %d..%d <-- %p\n", - __func__, - _mesa_enum_to_string(target), - _mesa_get_format_name(format), - num_samples, width0, height0, depth0, - first_level, last_level, mt); - - struct isl_surf_init_info init_info = { - .dim = get_isl_surf_dim(target), - .format = translate_tex_format(brw, format, false), - .width = width0, - .height = height0, - .depth = target == GL_TEXTURE_3D ? depth0 : 1, - .levels = last_level - first_level + 1, - .array_len = target == GL_TEXTURE_3D ? 1 : depth0, - .samples = num_samples, - .row_pitch_B = row_pitch_B, - .usage = isl_usage_flags, - .tiling_flags = tiling_flags, - }; - - if (!isl_surf_init_s(&brw->isl_dev, &mt->surf, &init_info)) - goto fail; - - /* Depth surfaces are always Y-tiled and stencil is always W-tiled, although - * on gfx7 platforms we also need to create Y-tiled copies of stencil for - * texturing since the hardware can't sample from W-tiled surfaces. For - * everything else, check for corner cases needing special treatment. - */ - bool is_depth_stencil = - mt->surf.usage & (ISL_SURF_USAGE_STENCIL_BIT | ISL_SURF_USAGE_DEPTH_BIT); - if (!is_depth_stencil) { - if (need_to_retile_as_linear(brw, brw_miptree_blt_pitch(mt), - mt->surf.tiling, mt->surf.samples)) { - init_info.tiling_flags = 1u << ISL_TILING_LINEAR; - if (!isl_surf_init_s(&brw->isl_dev, &mt->surf, &init_info)) - goto fail; - } else if (need_to_retile_as_x(brw, mt->surf.size_B, mt->surf.tiling)) { - init_info.tiling_flags = 1u << ISL_TILING_X; - if (!isl_surf_init_s(&brw->isl_dev, &mt->surf, &init_info)) - goto fail; - } - } - - /* In case of linear the buffer gets padded by fixed 64 bytes and therefore - * the size may not be multiple of row_pitch. - * See isl_apply_surface_padding(). - */ - if (mt->surf.tiling != ISL_TILING_LINEAR) - assert(mt->surf.size_B % mt->surf.row_pitch_B == 0); - - if (!bo) { - mt->bo = brw_bo_alloc_tiled(brw->bufmgr, "isl-miptree", - mt->surf.size_B, - BRW_MEMZONE_OTHER, - isl_tiling_to_i915_tiling( - mt->surf.tiling), - mt->surf.row_pitch_B, alloc_flags); - if (!mt->bo) - goto fail; - } else { - mt->bo = bo; - } - - mt->first_level = first_level; - mt->last_level = last_level; - mt->target = target; - mt->format = format; - mt->aux_state = NULL; - mt->cpp = isl_format_get_layout(mt->surf.format)->bpb / 8; - mt->compressed = _mesa_is_format_compressed(format); - mt->drm_modifier = DRM_FORMAT_MOD_INVALID; - - return mt; - -fail: - brw_miptree_release(&mt); - return NULL; -} - -/* Return the usual surface usage flags for the given format. */ -static isl_surf_usage_flags_t -mt_surf_usage(mesa_format format) -{ - switch(_mesa_get_format_base_format(format)) { - case GL_DEPTH_COMPONENT: - return ISL_SURF_USAGE_DEPTH_BIT | ISL_SURF_USAGE_TEXTURE_BIT; - case GL_DEPTH_STENCIL: - return ISL_SURF_USAGE_DEPTH_BIT | ISL_SURF_USAGE_STENCIL_BIT | - ISL_SURF_USAGE_TEXTURE_BIT; - case GL_STENCIL_INDEX: - return ISL_SURF_USAGE_STENCIL_BIT | ISL_SURF_USAGE_TEXTURE_BIT; - default: - return ISL_SURF_USAGE_RENDER_TARGET_BIT | ISL_SURF_USAGE_TEXTURE_BIT; - } -} - -static struct brw_mipmap_tree * -miptree_create(struct brw_context *brw, - GLenum target, - mesa_format format, - GLuint first_level, - GLuint last_level, - GLuint width0, - GLuint height0, - GLuint depth0, - GLuint num_samples, - enum brw_miptree_create_flags flags) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - const uint32_t alloc_flags = - (flags & MIPTREE_CREATE_BUSY || num_samples > 1) ? BO_ALLOC_BUSY : 0; - isl_tiling_flags_t tiling_flags = ISL_TILING_ANY_MASK; - - /* TODO: This used to be because there wasn't BLORP to handle Y-tiling. */ - if (devinfo->ver < 6 && _mesa_is_format_color_format(format)) - tiling_flags &= ~ISL_TILING_Y0_BIT; - - mesa_format mt_fmt = format; - if (!_mesa_is_format_color_format(format) && devinfo->ver >= 6) { - /* Fix up the Z miptree format for how we're splitting out separate - * stencil. Gfx7 expects there to be no stencil bits in its depth buffer. - */ - mt_fmt = brw_depth_format_for_depthstencil_format(format); - } - - struct brw_mipmap_tree *mt = - make_surface(brw, target, mt_fmt, first_level, last_level, - width0, height0, depth0, num_samples, - tiling_flags, mt_surf_usage(mt_fmt), - alloc_flags, 0, NULL); - - if (mt == NULL) - return NULL; - - if (brw_miptree_needs_fake_etc(brw, mt)) { - mesa_format decomp_format = brw_lower_compressed_format(brw, format); - mt->shadow_mt = make_surface(brw, target, decomp_format, first_level, - last_level, width0, height0, depth0, - num_samples, tiling_flags, - mt_surf_usage(decomp_format), - alloc_flags, 0, NULL); - - if (mt->shadow_mt == NULL) { - brw_miptree_release(&mt); - return NULL; - } - } - - if (needs_separate_stencil(brw, mt, format)) { - mt->stencil_mt = - make_surface(brw, target, MESA_FORMAT_S_UINT8, first_level, last_level, - width0, height0, depth0, num_samples, - ISL_TILING_W_BIT, mt_surf_usage(MESA_FORMAT_S_UINT8), - alloc_flags, 0, NULL); - if (mt->stencil_mt == NULL) { - brw_miptree_release(&mt); - return NULL; - } - } - - if (!(flags & MIPTREE_CREATE_NO_AUX)) - brw_miptree_choose_aux_usage(brw, mt); - - return mt; -} - -struct brw_mipmap_tree * -brw_miptree_create(struct brw_context *brw, - GLenum target, - mesa_format format, - GLuint first_level, - GLuint last_level, - GLuint width0, - GLuint height0, - GLuint depth0, - GLuint num_samples, - enum brw_miptree_create_flags flags) -{ - assert(num_samples > 0); - - struct brw_mipmap_tree *mt = miptree_create( - brw, target, format, - first_level, last_level, - width0, height0, depth0, num_samples, - flags); - if (!mt) - return NULL; - - mt->offset = 0; - - /* Create the auxiliary surface up-front. CCS_D, on the other hand, can only - * compress clear color so we wait until an actual fast-clear to allocate - * it. - */ - if (mt->aux_usage != ISL_AUX_USAGE_CCS_D && - !brw_miptree_alloc_aux(brw, mt)) { - mt->aux_usage = ISL_AUX_USAGE_NONE; - mt->supports_fast_clear = false; - } - - return mt; -} - -struct brw_mipmap_tree * -brw_miptree_create_for_bo(struct brw_context *brw, - struct brw_bo *bo, - mesa_format format, - uint32_t offset, - uint32_t width, - uint32_t height, - uint32_t depth, - int pitch, - enum isl_tiling tiling, - enum brw_miptree_create_flags flags) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - struct brw_mipmap_tree *mt; - const GLenum target = depth > 1 ? GL_TEXTURE_2D_ARRAY : GL_TEXTURE_2D; - const GLenum base_format = _mesa_get_format_base_format(format); - - if ((base_format == GL_DEPTH_COMPONENT || - base_format == GL_DEPTH_STENCIL)) { - const mesa_format mt_fmt = (devinfo->ver < 6) ? format : - brw_depth_format_for_depthstencil_format(format); - mt = make_surface(brw, target, mt_fmt, - 0, 0, width, height, depth, 1, ISL_TILING_Y0_BIT, - mt_surf_usage(mt_fmt), - 0, pitch, bo); - if (!mt) - return NULL; - - brw_bo_reference(bo); - - if (!(flags & MIPTREE_CREATE_NO_AUX)) - brw_miptree_choose_aux_usage(brw, mt); - - return mt; - } else if (format == MESA_FORMAT_S_UINT8) { - mt = make_surface(brw, target, MESA_FORMAT_S_UINT8, - 0, 0, width, height, depth, 1, - ISL_TILING_W_BIT, - mt_surf_usage(MESA_FORMAT_S_UINT8), - 0, pitch, bo); - if (!mt) - return NULL; - - assert(bo->size >= mt->surf.size_B); - - brw_bo_reference(bo); - return mt; - } - - /* Nothing will be able to use this miptree with the BO if the offset isn't - * aligned. - */ - if (tiling != ISL_TILING_LINEAR) - assert(offset % 4096 == 0); - - /* miptrees can't handle negative pitch. If you need flipping of images, - * that's outside of the scope of the mt. - */ - assert(pitch >= 0); - - mt = make_surface(brw, target, format, - 0, 0, width, height, depth, 1, - 1lu << tiling, - mt_surf_usage(format), - 0, pitch, bo); - if (!mt) - return NULL; - - brw_bo_reference(bo); - mt->bo = bo; - mt->offset = offset; - - if (!(flags & MIPTREE_CREATE_NO_AUX)) { - brw_miptree_choose_aux_usage(brw, mt); - - /* Create the auxiliary surface up-front. CCS_D, on the other hand, can - * only compress clear color so we wait until an actual fast-clear to - * allocate it. - */ - if (mt->aux_usage != ISL_AUX_USAGE_CCS_D && - !brw_miptree_alloc_aux(brw, mt)) { - mt->aux_usage = ISL_AUX_USAGE_NONE; - mt->supports_fast_clear = false; - } - } - - return mt; -} - -static struct brw_mipmap_tree * -miptree_create_for_planar_image(struct brw_context *brw, - __DRIimage *image, GLenum target, - enum isl_tiling tiling) -{ - const struct brw_image_format *f = image->planar_format; - struct brw_mipmap_tree *planar_mt = NULL; - - for (int i = 0; i < f->nplanes; i++) { - const int index = f->planes[i].buffer_index; - const uint32_t dri_format = f->planes[i].dri_format; - const mesa_format format = driImageFormatToGLFormat(dri_format); - const uint32_t width = image->width >> f->planes[i].width_shift; - const uint32_t height = image->height >> f->planes[i].height_shift; - - /* Disable creation of the texture's aux buffers because the driver - * exposes no EGL API to manage them. That is, there is no API for - * resolving the aux buffer's content to the main buffer nor for - * invalidating the aux buffer's content. - */ - struct brw_mipmap_tree *mt = - brw_miptree_create_for_bo(brw, image->bo, format, - image->offsets[index], - width, height, 1, - image->strides[index], - tiling, - MIPTREE_CREATE_NO_AUX); - if (mt == NULL) { - brw_miptree_release(&planar_mt); - return NULL; - } - - mt->target = target; - - if (i == 0) - planar_mt = mt; - else - planar_mt->plane[i - 1] = mt; - } - - planar_mt->drm_modifier = image->modifier; - - return planar_mt; -} - -static bool -create_ccs_buf_for_image(struct brw_context *brw, - __DRIimage *image, - struct brw_mipmap_tree *mt, - enum isl_aux_state initial_state) -{ - struct isl_surf temp_ccs_surf = {0,}; - - /* CCS is only supported for very simple miptrees */ - assert(image->aux_offset != 0 && image->aux_pitch != 0); - assert(image->tile_x == 0 && image->tile_y == 0); - assert(mt->surf.samples == 1); - assert(mt->surf.levels == 1); - assert(mt->surf.logical_level0_px.depth == 1); - assert(mt->surf.logical_level0_px.array_len == 1); - assert(mt->first_level == 0); - assert(mt->last_level == 0); - - /* We shouldn't already have a CCS */ - assert(!mt->aux_buf); - - if (!isl_surf_get_ccs_surf(&brw->isl_dev, &mt->surf, NULL, - &temp_ccs_surf, image->aux_pitch)) - return false; - - assert(image->aux_offset < image->bo->size); - assert(temp_ccs_surf.size_B <= image->bo->size - image->aux_offset); - - mt->aux_buf = calloc(sizeof(*mt->aux_buf), 1); - if (mt->aux_buf == NULL) - return false; - - mt->aux_state = create_aux_state_map(mt, initial_state); - if (!mt->aux_state) { - free(mt->aux_buf); - mt->aux_buf = NULL; - return false; - } - - /* On gfx10+ we start using an extra space in the aux buffer to store the - * indirect clear color. However, if we imported an image from the window - * system with CCS, we don't have the extra space at the end of the aux - * buffer. So create a new bo here that will store that clear color. - */ - if (brw->isl_dev.ss.clear_color_state_size > 0) { - mt->aux_buf->clear_color_bo = - brw_bo_alloc_tiled(brw->bufmgr, "clear_color_bo", - brw->isl_dev.ss.clear_color_state_size, - BRW_MEMZONE_OTHER, I915_TILING_NONE, 0, - BO_ALLOC_ZEROED); - if (!mt->aux_buf->clear_color_bo) { - free(mt->aux_buf); - mt->aux_buf = NULL; - return false; - } - } - - mt->aux_buf->bo = image->bo; - brw_bo_reference(image->bo); - - mt->aux_buf->offset = image->aux_offset; - mt->aux_buf->surf = temp_ccs_surf; - - return true; -} - -struct brw_mipmap_tree * -brw_miptree_create_for_dri_image(struct brw_context *brw, - __DRIimage *image, GLenum target, - mesa_format format, - bool allow_internal_aux) -{ - uint32_t bo_tiling, bo_swizzle; - brw_bo_get_tiling(image->bo, &bo_tiling, &bo_swizzle); - - const struct isl_drm_modifier_info *mod_info = - isl_drm_modifier_get_info(image->modifier); - - const enum isl_tiling tiling = - mod_info ? mod_info->tiling : isl_tiling_from_i915_tiling(bo_tiling); - - if (image->planar_format && image->planar_format->nplanes > 1) - return miptree_create_for_planar_image(brw, image, target, tiling); - - if (image->planar_format) - assert(image->planar_format->planes[0].dri_format == image->dri_format); - - if (!brw->ctx.TextureFormatSupported[format]) { - /* The texture storage paths in core Mesa detect if the driver does not - * support the user-requested format, and then searches for a - * fallback format. The DRIimage code bypasses core Mesa, though. So we - * do the fallbacks here for important formats. - * - * We must support DRM_FOURCC_XBGR8888 textures because the Android - * framework produces HAL_PIXEL_FORMAT_RGBX8888 winsys surfaces, which - * the Chrome OS compositor consumes as dma_buf EGLImages. - */ - format = _mesa_format_fallback_rgbx_to_rgba(format); - } - - if (!brw->ctx.TextureFormatSupported[format]) - return NULL; - - enum brw_miptree_create_flags mt_create_flags = 0; - - /* If this image comes in from a window system, we have different - * requirements than if it comes in via an EGL import operation. Window - * system images can use any form of auxiliary compression we wish because - * they get "flushed" before being handed off to the window system and we - * have the opportunity to do resolves. Non window-system images, on the - * other hand, have no resolve point so we can't have aux without a - * modifier. - */ - if (!allow_internal_aux) - mt_create_flags |= MIPTREE_CREATE_NO_AUX; - - /* If we have a modifier which specifies aux, don't create one yet */ - if (mod_info && mod_info->aux_usage != ISL_AUX_USAGE_NONE) - mt_create_flags |= MIPTREE_CREATE_NO_AUX; - - /* Disable creation of the texture's aux buffers because the driver exposes - * no EGL API to manage them. That is, there is no API for resolving the aux - * buffer's content to the main buffer nor for invalidating the aux buffer's - * content. - */ - struct brw_mipmap_tree *mt = - brw_miptree_create_for_bo(brw, image->bo, format, - image->offset, image->width, image->height, 1, - image->pitch, tiling, mt_create_flags); - if (mt == NULL) - return NULL; - - mt->target = target; - mt->level[0].level_x = image->tile_x; - mt->level[0].level_y = image->tile_y; - mt->drm_modifier = image->modifier; - - /* From "OES_EGL_image" error reporting. We report GL_INVALID_OPERATION - * for EGL images from non-tile aligned sufaces in gfx4 hw and earlier which has - * trouble resolving back to destination image due to alignment issues. - */ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - if (!devinfo->has_surface_tile_offset) { - uint32_t draw_x, draw_y; - brw_miptree_get_tile_offsets(mt, 0, 0, &draw_x, &draw_y); - - if (draw_x != 0 || draw_y != 0) { - _mesa_error(&brw->ctx, GL_INVALID_OPERATION, __func__); - brw_miptree_release(&mt); - return NULL; - } - } - - if (mod_info && mod_info->aux_usage != ISL_AUX_USAGE_NONE) { - assert(mod_info->aux_usage == ISL_AUX_USAGE_CCS_E); - - mt->aux_usage = mod_info->aux_usage; - /* If we are a window system buffer, then we can support fast-clears - * even if the modifier doesn't support them by doing a partial resolve - * as part of the flush operation. - */ - mt->supports_fast_clear = - allow_internal_aux || mod_info->supports_clear_color; - - /* We don't know the actual state of the surface when we get it but we - * can make a pretty good guess based on the modifier. What we do know - * for sure is that it isn't in the AUX_INVALID state, so we just assume - * a worst case of compression. - */ - enum isl_aux_state initial_state = - isl_drm_modifier_get_default_aux_state(image->modifier); - - if (!create_ccs_buf_for_image(brw, image, mt, initial_state)) { - brw_miptree_release(&mt); - return NULL; - } - } - - /* Don't assume coherency for imported EGLimages. We don't know what - * external clients are going to do with it. They may scan it out. - */ - image->bo->cache_coherent = false; - - return mt; -} - -/** - * For a singlesample renderbuffer, this simply wraps the given BO with a - * miptree. - * - * For a multisample renderbuffer, this wraps the window system's - * (singlesample) BO with a singlesample miptree attached to the - * brw_renderbuffer, then creates a multisample miptree attached to irb->mt - * that will contain the actual rendering (which is lazily resolved to - * irb->singlesample_mt). - */ -bool -brw_update_winsys_renderbuffer_miptree(struct brw_context *intel, - struct brw_renderbuffer *irb, - struct brw_mipmap_tree *singlesample_mt, - uint32_t width, uint32_t height, - uint32_t pitch) -{ - struct brw_mipmap_tree *multisample_mt = NULL; - struct gl_renderbuffer *rb = &irb->Base.Base; - mesa_format format = rb->Format; - const unsigned num_samples = MAX2(rb->NumSamples, 1); - - /* Only the front and back buffers, which are color buffers, are allocated - * through the image loader. - */ - assert(_mesa_get_format_base_format(format) == GL_RGB || - _mesa_get_format_base_format(format) == GL_RGBA); - - assert(singlesample_mt); - - if (num_samples == 1) { - brw_miptree_release(&irb->mt); - irb->mt = singlesample_mt; - - assert(!irb->singlesample_mt); - } else { - brw_miptree_release(&irb->singlesample_mt); - irb->singlesample_mt = singlesample_mt; - - if (!irb->mt || - irb->mt->surf.logical_level0_px.width != width || - irb->mt->surf.logical_level0_px.height != height) { - multisample_mt = brw_miptree_create_for_renderbuffer(intel, - format, - width, - height, - num_samples); - if (!multisample_mt) - goto fail; - - irb->need_downsample = false; - brw_miptree_release(&irb->mt); - irb->mt = multisample_mt; - } - } - return true; - -fail: - brw_miptree_release(&irb->mt); - return false; -} - -struct brw_mipmap_tree* -brw_miptree_create_for_renderbuffer(struct brw_context *brw, - mesa_format format, - uint32_t width, - uint32_t height, - uint32_t num_samples) -{ - struct brw_mipmap_tree *mt; - uint32_t depth = 1; - GLenum target = num_samples > 1 ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D; - - mt = brw_miptree_create(brw, target, format, 0, 0, - width, height, depth, num_samples, - MIPTREE_CREATE_BUSY); - if (!mt) - goto fail; - - return mt; - -fail: - brw_miptree_release(&mt); - return NULL; -} - -void -brw_miptree_reference(struct brw_mipmap_tree **dst, - struct brw_mipmap_tree *src) -{ - if (*dst == src) - return; - - brw_miptree_release(dst); - - if (src) { - src->refcount++; - DBG("%s %p refcount now %d\n", __func__, src, src->refcount); - } - - *dst = src; -} - -static void -brw_miptree_aux_buffer_free(struct brw_miptree_aux_buffer *aux_buf) -{ - if (aux_buf == NULL) - return; - - brw_bo_unreference(aux_buf->bo); - brw_bo_unreference(aux_buf->clear_color_bo); - - free(aux_buf); -} - -void -brw_miptree_release(struct brw_mipmap_tree **mt) -{ - if (!*mt) - return; - - DBG("%s %p refcount will be %d\n", __func__, *mt, (*mt)->refcount - 1); - if (--(*mt)->refcount <= 0) { - GLuint i; - - DBG("%s deleting %p\n", __func__, *mt); - - brw_bo_unreference((*mt)->bo); - brw_miptree_release(&(*mt)->stencil_mt); - brw_miptree_release(&(*mt)->shadow_mt); - brw_miptree_aux_buffer_free((*mt)->aux_buf); - free_aux_state_map((*mt)->aux_state); - - brw_miptree_release(&(*mt)->plane[0]); - brw_miptree_release(&(*mt)->plane[1]); - - for (i = 0; i < MAX_TEXTURE_LEVELS; i++) { - free((*mt)->level[i].slice); - } - - free(*mt); - } - *mt = NULL; -} - - -void -brw_get_image_dims(struct gl_texture_image *image, - int *width, int *height, int *depth) -{ - switch (image->TexObject->Target) { - case GL_TEXTURE_1D_ARRAY: - /* For a 1D Array texture the OpenGL API will treat the image height as - * the number of array slices. For Intel hardware, we treat the 1D array - * as a 2D Array with a height of 1. So, here we want to swap image - * height and depth. - */ - assert(image->Depth == 1); - *width = image->Width; - *height = 1; - *depth = image->Height; - break; - case GL_TEXTURE_CUBE_MAP: - /* For Cube maps, the mesa/main api layer gives us a depth of 1 even - * though we really have 6 slices. - */ - assert(image->Depth == 1); - *width = image->Width; - *height = image->Height; - *depth = 6; - break; - default: - *width = image->Width; - *height = image->Height; - *depth = image->Depth; - break; - } -} - -/** - * Can the image be pulled into a unified mipmap tree? This mirrors - * the completeness test in a lot of ways. - * - * Not sure whether I want to pass gl_texture_image here. - */ -bool -brw_miptree_match_image(struct brw_mipmap_tree *mt, - struct gl_texture_image *image) -{ - struct brw_texture_image *brw_image = brw_texture_image(image); - GLuint level = brw_image->base.Base.Level; - int width, height, depth; - - /* glTexImage* choose the texture object based on the target passed in, and - * objects can't change targets over their lifetimes, so this should be - * true. - */ - assert(image->TexObject->Target == mt->target); - - mesa_format mt_format = mt->format; - if (mt->format == MESA_FORMAT_Z24_UNORM_X8_UINT && mt->stencil_mt) - mt_format = MESA_FORMAT_Z24_UNORM_S8_UINT; - if (mt->format == MESA_FORMAT_Z_FLOAT32 && mt->stencil_mt) - mt_format = MESA_FORMAT_Z32_FLOAT_S8X24_UINT; - - if (_mesa_get_srgb_format_linear(image->TexFormat) != - _mesa_get_srgb_format_linear(mt_format)) - return false; - - brw_get_image_dims(image, &width, &height, &depth); - - if (mt->target == GL_TEXTURE_CUBE_MAP) - depth = 6; - - if (level >= mt->surf.levels) - return false; - - const unsigned level_depth = - mt->surf.dim == ISL_SURF_DIM_3D ? - minify(mt->surf.logical_level0_px.depth, level) : - mt->surf.logical_level0_px.array_len; - - return width == minify(mt->surf.logical_level0_px.width, level) && - height == minify(mt->surf.logical_level0_px.height, level) && - depth == level_depth && - MAX2(image->NumSamples, 1) == mt->surf.samples; -} - -void -brw_miptree_get_image_offset(const struct brw_mipmap_tree *mt, - GLuint level, GLuint slice, - GLuint *x, GLuint *y) -{ - if (level == 0 && slice == 0) { - *x = mt->level[0].level_x; - *y = mt->level[0].level_y; - return; - } - - uint32_t x_offset_sa, y_offset_sa, z_offset_sa, array_offset; - - /* Miptree itself can have an offset only if it represents a single - * slice in an imported buffer object. - * See brw_miptree_create_for_dri_image(). - */ - assert(mt->level[0].level_x == 0); - assert(mt->level[0].level_y == 0); - - /* Given level is relative to level zero while the miptree may be - * represent just a subset of all levels starting from 'first_level'. - */ - assert(level >= mt->first_level); - level -= mt->first_level; - - const unsigned z = mt->surf.dim == ISL_SURF_DIM_3D ? slice : 0; - slice = mt->surf.dim == ISL_SURF_DIM_3D ? 0 : slice; - isl_surf_get_image_offset_el(&mt->surf, level, slice, z, - &x_offset_sa, &y_offset_sa, - &z_offset_sa, &array_offset); - - *x = x_offset_sa; - *y = y_offset_sa; - assert(z_offset_sa == 0); - assert(array_offset == 0); -} - -/** - * Compute the offset (in bytes) from the start of the BO to the given x - * and y coordinate. For tiled BOs, caller must ensure that x and y are - * multiples of the tile size. - */ -uint32_t -brw_miptree_get_aligned_offset(const struct brw_mipmap_tree *mt, - uint32_t x, uint32_t y) -{ - int cpp = mt->cpp; - uint32_t pitch = mt->surf.row_pitch_B; - - switch (mt->surf.tiling) { - default: - unreachable("not reached"); - case ISL_TILING_LINEAR: - return y * pitch + x * cpp; - case ISL_TILING_X: - assert((x % (512 / cpp)) == 0); - assert((y % 8) == 0); - return y * pitch + x / (512 / cpp) * 4096; - case ISL_TILING_Y0: - assert((x % (128 / cpp)) == 0); - assert((y % 32) == 0); - return y * pitch + x / (128 / cpp) * 4096; - } -} - -/** - * Rendering with tiled buffers requires that the base address of the buffer - * be aligned to a page boundary. For renderbuffers, and sometimes with - * textures, we may want the surface to point at a texture image level that - * isn't at a page boundary. - * - * This function returns an appropriately-aligned base offset - * according to the tiling restrictions, plus any required x/y offset - * from there. - */ -uint32_t -brw_miptree_get_tile_offsets(const struct brw_mipmap_tree *mt, - GLuint level, GLuint slice, - uint32_t *tile_x, - uint32_t *tile_y) -{ - uint32_t x, y; - uint32_t mask_x, mask_y; - - isl_get_tile_masks(mt->surf.tiling, mt->cpp, &mask_x, &mask_y); - brw_miptree_get_image_offset(mt, level, slice, &x, &y); - - *tile_x = x & mask_x; - *tile_y = y & mask_y; - - return brw_miptree_get_aligned_offset(mt, x & ~mask_x, y & ~mask_y); -} - -static void -brw_miptree_copy_slice_sw(struct brw_context *brw, - struct brw_mipmap_tree *src_mt, - unsigned src_level, unsigned src_layer, - struct brw_mipmap_tree *dst_mt, - unsigned dst_level, unsigned dst_layer, - unsigned width, unsigned height) -{ - void *src, *dst; - ptrdiff_t src_stride, dst_stride; - const unsigned cpp = (isl_format_get_layout(dst_mt->surf.format)->bpb / 8); - - brw_miptree_map(brw, src_mt, - src_level, src_layer, - 0, 0, - width, height, - GL_MAP_READ_BIT | BRW_MAP_DIRECT_BIT, - &src, &src_stride); - - brw_miptree_map(brw, dst_mt, - dst_level, dst_layer, - 0, 0, - width, height, - GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | - BRW_MAP_DIRECT_BIT, - &dst, &dst_stride); - - DBG("sw blit %s mt %p %p/%"PRIdPTR" -> %s mt %p %p/%"PRIdPTR" (%dx%d)\n", - _mesa_get_format_name(src_mt->format), - src_mt, src, src_stride, - _mesa_get_format_name(dst_mt->format), - dst_mt, dst, dst_stride, - width, height); - - int row_size = cpp * width; - if (src_stride == row_size && - dst_stride == row_size) { - memcpy(dst, src, row_size * height); - } else { - for (int i = 0; i < height; i++) { - memcpy(dst, src, row_size); - dst += dst_stride; - src += src_stride; - } - } - - brw_miptree_unmap(brw, dst_mt, dst_level, dst_layer); - brw_miptree_unmap(brw, src_mt, src_level, src_layer); - - /* Don't forget to copy the stencil data over, too. We could have skipped - * passing BRW_MAP_DIRECT_BIT, but that would have meant brw_miptree_map - * shuffling the two data sources in/out of temporary storage instead of - * the direct mapping we get this way. - */ - if (dst_mt->stencil_mt) { - assert(src_mt->stencil_mt); - brw_miptree_copy_slice_sw(brw, - src_mt->stencil_mt, src_level, src_layer, - dst_mt->stencil_mt, dst_level, dst_layer, - width, height); - } -} - -void -brw_miptree_copy_slice(struct brw_context *brw, - struct brw_mipmap_tree *src_mt, - unsigned src_level, unsigned src_layer, - struct brw_mipmap_tree *dst_mt, - unsigned dst_level, unsigned dst_layer) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - mesa_format format = src_mt->format; - unsigned width = minify(src_mt->surf.phys_level0_sa.width, - src_level - src_mt->first_level); - unsigned height = minify(src_mt->surf.phys_level0_sa.height, - src_level - src_mt->first_level); - - assert(src_layer < get_num_phys_layers(&src_mt->surf, - src_level - src_mt->first_level)); - - assert(_mesa_get_srgb_format_linear(src_mt->format) == - _mesa_get_srgb_format_linear(dst_mt->format)); - - DBG("validate blit mt %s %p %d,%d -> mt %s %p %d,%d (%dx%d)\n", - _mesa_get_format_name(src_mt->format), - src_mt, src_level, src_layer, - _mesa_get_format_name(dst_mt->format), - dst_mt, dst_level, dst_layer, - width, height); - - if (devinfo->ver >= 6) { - /* On gfx6 and above, we just use blorp. It's faster than the blitter - * and can handle everything without software fallbacks. - */ - brw_blorp_copy_miptrees(brw, - src_mt, src_level, src_layer, - dst_mt, dst_level, dst_layer, - 0, 0, 0, 0, width, height); - - if (src_mt->stencil_mt) { - assert(dst_mt->stencil_mt); - brw_blorp_copy_miptrees(brw, - src_mt->stencil_mt, src_level, src_layer, - dst_mt->stencil_mt, dst_level, dst_layer, - 0, 0, 0, 0, width, height); - } - return; - } - - if (dst_mt->compressed) { - unsigned int i, j; - _mesa_get_format_block_size(dst_mt->format, &i, &j); - height = ALIGN_NPOT(height, j) / j; - width = ALIGN_NPOT(width, i) / i; - } - - /* Gfx4-5 doesn't support separate stencil */ - assert(!src_mt->stencil_mt); - - uint32_t dst_x, dst_y, src_x, src_y; - brw_miptree_get_image_offset(dst_mt, dst_level, dst_layer, &dst_x, &dst_y); - brw_miptree_get_image_offset(src_mt, src_level, src_layer, &src_x, &src_y); - - DBG("validate blit mt %s %p %d,%d/%d -> mt %s %p %d,%d/%d (%dx%d)\n", - _mesa_get_format_name(src_mt->format), - src_mt, src_x, src_y, src_mt->surf.row_pitch_B, - _mesa_get_format_name(dst_mt->format), - dst_mt, dst_x, dst_y, dst_mt->surf.row_pitch_B, - width, height); - - if (!brw_miptree_blit(brw, - src_mt, src_level, src_layer, 0, 0, false, - dst_mt, dst_level, dst_layer, 0, 0, false, - width, height, COLOR_LOGICOP_COPY)) { - perf_debug("miptree validate blit for %s failed\n", - _mesa_get_format_name(format)); - - brw_miptree_copy_slice_sw(brw, - src_mt, src_level, src_layer, - dst_mt, dst_level, dst_layer, - width, height); - } -} - -/** - * Copies the image's current data to the given miptree, and associates that - * miptree with the image. - */ -void -brw_miptree_copy_teximage(struct brw_context *brw, - struct brw_texture_image *brw_image, - struct brw_mipmap_tree *dst_mt) -{ - struct brw_mipmap_tree *src_mt = brw_image->mt; - struct brw_texture_object *intel_obj = - brw_texture_object(brw_image->base.Base.TexObject); - int level = brw_image->base.Base.Level; - const unsigned face = brw_image->base.Base.Face; - unsigned start_layer, end_layer; - - if (intel_obj->base.Target == GL_TEXTURE_1D_ARRAY) { - assert(face == 0); - assert(brw_image->base.Base.Height); - start_layer = 0; - end_layer = brw_image->base.Base.Height - 1; - } else if (face > 0) { - start_layer = face; - end_layer = face; - } else { - assert(brw_image->base.Base.Depth); - start_layer = 0; - end_layer = brw_image->base.Base.Depth - 1; - } - - for (unsigned i = start_layer; i <= end_layer; i++) { - brw_miptree_copy_slice(brw, src_mt, level, i, dst_mt, level, i); - } - - brw_miptree_reference(&brw_image->mt, dst_mt); - intel_obj->needs_validate = true; -} - -static struct brw_miptree_aux_buffer * -brw_alloc_aux_buffer(struct brw_context *brw, - const struct isl_surf *aux_surf, - bool wants_memset, - uint8_t memset_value) -{ - struct brw_miptree_aux_buffer *buf = calloc(sizeof(*buf), 1); - if (!buf) - return false; - - uint64_t size = aux_surf->size_B; - - const bool has_indirect_clear = brw->isl_dev.ss.clear_color_state_size > 0; - if (has_indirect_clear) { - /* On CNL+, instead of setting the clear color in the SURFACE_STATE, we - * will set a pointer to a dword somewhere that contains the color. So, - * allocate the space for the clear color value here on the aux buffer. - */ - buf->clear_color_offset = size; - size += brw->isl_dev.ss.clear_color_state_size; - } - - /* If the buffer needs to be initialised (requiring the buffer to be - * immediately mapped to cpu space for writing), do not use the gpu access - * flag which can cause an unnecessary delay if the backing pages happened - * to be just used by the GPU. - */ - const bool alloc_zeroed = wants_memset && memset_value == 0; - const bool needs_memset = - !alloc_zeroed && (wants_memset || has_indirect_clear); - const uint32_t alloc_flags = - alloc_zeroed ? BO_ALLOC_ZEROED : (needs_memset ? 0 : BO_ALLOC_BUSY); - - /* ISL has stricter set of alignment rules then the drm allocator. - * Therefore one can pass the ISL dimensions in terms of bytes instead of - * trying to recalculate based on different format block sizes. - */ - buf->bo = brw_bo_alloc_tiled(brw->bufmgr, "aux-miptree", size, - BRW_MEMZONE_OTHER, I915_TILING_Y, - aux_surf->row_pitch_B, alloc_flags); - if (!buf->bo) { - free(buf); - return NULL; - } - - /* Initialize the bo to the desired value */ - if (needs_memset) { - assert(!(alloc_flags & BO_ALLOC_BUSY)); - - void *map = brw_bo_map(brw, buf->bo, MAP_WRITE | MAP_RAW); - if (map == NULL) { - brw_miptree_aux_buffer_free(buf); - return NULL; - } - - /* Memset the aux_surf portion of the BO. */ - if (wants_memset) - memset(map, memset_value, aux_surf->size_B); - - /* Zero the indirect clear color to match ::fast_clear_color. */ - if (has_indirect_clear) { - memset((char *)map + buf->clear_color_offset, 0, - brw->isl_dev.ss.clear_color_state_size); - } - - brw_bo_unmap(buf->bo); - } - - if (has_indirect_clear) { - buf->clear_color_bo = buf->bo; - brw_bo_reference(buf->clear_color_bo); - } - - buf->surf = *aux_surf; - - return buf; -} - - -/** - * Helper for brw_miptree_alloc_aux() that sets - * \c mt->level[level].has_hiz. Return true if and only if - * \c has_hiz was set. - */ -static bool -brw_miptree_level_enable_hiz(struct brw_context *brw, - struct brw_mipmap_tree *mt, - uint32_t level) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - assert(mt->aux_buf); - assert(mt->surf.size_B > 0); - - if (devinfo->verx10 >= 75) { - uint32_t width = minify(mt->surf.phys_level0_sa.width, level); - uint32_t height = minify(mt->surf.phys_level0_sa.height, level); - - /* Disable HiZ for LOD > 0 unless the width is 8 aligned - * and the height is 4 aligned. This allows our HiZ support - * to fulfill Haswell restrictions for HiZ ops. For LOD == 0, - * we can grow the width & height to allow the HiZ op to - * force the proper size alignments. - */ - if (level > 0 && ((width & 7) || (height & 3))) { - DBG("mt %p level %d: HiZ DISABLED\n", mt, level); - return false; - } - } - - DBG("mt %p level %d: HiZ enabled\n", mt, level); - mt->level[level].has_hiz = true; - return true; -} - - -/** - * Allocate the initial aux surface for a miptree based on mt->aux_usage - * - * Since MCS, HiZ, and CCS_E can compress more than just clear color, we - * create the auxiliary surfaces up-front. CCS_D, on the other hand, can only - * compress clear color so we wait until an actual fast-clear to allocate it. - */ -bool -brw_miptree_alloc_aux(struct brw_context *brw, struct brw_mipmap_tree *mt) -{ - assert(mt->aux_buf == NULL); - - /* Get the aux buf allocation parameters for this miptree. */ - enum isl_aux_state initial_state; - uint8_t memset_value; - struct isl_surf aux_surf = {0,}; - bool aux_surf_ok = false; - - switch (mt->aux_usage) { - case ISL_AUX_USAGE_NONE: - aux_surf.size_B = 0; - aux_surf_ok = true; - break; - case ISL_AUX_USAGE_HIZ: - initial_state = ISL_AUX_STATE_AUX_INVALID; - memset_value = 0; - aux_surf_ok = isl_surf_get_hiz_surf(&brw->isl_dev, &mt->surf, &aux_surf); - break; - case ISL_AUX_USAGE_MCS: - /* From the Ivy Bridge PRM, Vol 2 Part 1 p326: - * - * When MCS buffer is enabled and bound to MSRT, it is required that - * it is cleared prior to any rendering. - * - * Since we don't use the MCS buffer for any purpose other than - * rendering, it makes sense to just clear it immediately upon - * allocation. - * - * Note: the clear value for MCS buffers is all 1's, so we memset to - * 0xff. - */ - initial_state = ISL_AUX_STATE_CLEAR; - memset_value = 0xFF; - aux_surf_ok = isl_surf_get_mcs_surf(&brw->isl_dev, &mt->surf, &aux_surf); - break; - case ISL_AUX_USAGE_CCS_D: - case ISL_AUX_USAGE_CCS_E: - /* When CCS_E is used, we need to ensure that the CCS starts off in a - * valid state. From the Sky Lake PRM, "MCS Buffer for Render - * Target(s)": - * - * "If Software wants to enable Color Compression without Fast - * clear, Software needs to initialize MCS with zeros." - * - * A CCS value of 0 indicates that the corresponding block is in the - * pass-through state which is what we want. - * - * For CCS_D, do the same thing. On gfx9+, this avoids having any - * undefined bits in the aux buffer. - */ - initial_state = ISL_AUX_STATE_PASS_THROUGH; - memset_value = 0; - aux_surf_ok = - isl_surf_get_ccs_surf(&brw->isl_dev, &mt->surf, NULL, &aux_surf, 0); - break; - - default: - unreachable("Invalid aux usage"); - } - - /* We should have a valid aux_surf. */ - if (!aux_surf_ok) - return false; - - /* No work is needed for a zero-sized auxiliary buffer. */ - if (aux_surf.size_B == 0) - return true; - - /* Create the aux_state for the auxiliary buffer. */ - mt->aux_state = create_aux_state_map(mt, initial_state); - if (mt->aux_state == NULL) - return false; - - /* Allocate the auxiliary buffer. */ - const bool needs_memset = initial_state != ISL_AUX_STATE_AUX_INVALID; - mt->aux_buf = brw_alloc_aux_buffer(brw, &aux_surf, needs_memset, - memset_value); - if (mt->aux_buf == NULL) { - free_aux_state_map(mt->aux_state); - mt->aux_state = NULL; - return false; - } - - /* Perform aux_usage-specific initialization. */ - if (mt->aux_usage == ISL_AUX_USAGE_HIZ) { - for (unsigned level = mt->first_level; level <= mt->last_level; ++level) - brw_miptree_level_enable_hiz(brw, mt, level); - } - - return true; -} - - -/** - * Can the miptree sample using the hiz buffer? - */ -bool -brw_miptree_sample_with_hiz(struct brw_context *brw, - struct brw_mipmap_tree *mt) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - if (!devinfo->has_sample_with_hiz) { - return false; - } - - if (!mt->aux_buf) { - return false; - } - - for (unsigned level = 0; level < mt->surf.levels; ++level) { - if (!brw_miptree_level_has_hiz(mt, level)) - return false; - } - - /* From the BDW PRM (Volume 2d: Command Reference: Structures - * RENDER_SURFACE_STATE.AuxiliarySurfaceMode): - * - * "If this field is set to AUX_HIZ, Number of Multisamples must be - * MULTISAMPLECOUNT_1, and Surface Type cannot be SURFTYPE_3D. - * - * There is no such blurb for 1D textures, but there is sufficient evidence - * that this is broken on SKL+. - */ - return (mt->surf.samples == 1 && - mt->target != GL_TEXTURE_3D && - mt->target != GL_TEXTURE_1D /* gfx9+ restriction */); -} - -static bool -level_has_aux(const struct brw_mipmap_tree *mt, uint32_t level) -{ - return isl_aux_usage_has_hiz(mt->aux_usage) ? - brw_miptree_level_has_hiz(mt, level) : - mt->aux_usage != ISL_AUX_USAGE_NONE && mt->aux_buf; -} - -/** - * Does the miptree slice have hiz enabled? - */ -bool -brw_miptree_level_has_hiz(const struct brw_mipmap_tree *mt, uint32_t level) -{ - brw_miptree_check_level_layer(mt, level, 0); - return mt->level[level].has_hiz; -} - -static inline uint32_t -miptree_level_range_length(const struct brw_mipmap_tree *mt, - uint32_t start_level, uint32_t num_levels) -{ - assert(start_level >= mt->first_level); - assert(start_level <= mt->last_level); - - if (num_levels == INTEL_REMAINING_LAYERS) - num_levels = mt->last_level - start_level + 1; - /* Check for overflow */ - assert(start_level + num_levels >= start_level); - assert(start_level + num_levels <= mt->last_level + 1); - - return num_levels; -} - -static inline uint32_t -miptree_layer_range_length(const struct brw_mipmap_tree *mt, uint32_t level, - uint32_t start_layer, uint32_t num_layers) -{ - assert(level <= mt->last_level); - - const uint32_t total_num_layers = brw_get_num_logical_layers(mt, level); - assert(start_layer < total_num_layers); - if (num_layers == INTEL_REMAINING_LAYERS) - num_layers = total_num_layers - start_layer; - /* Check for overflow */ - assert(start_layer + num_layers >= start_layer); - assert(start_layer + num_layers <= total_num_layers); - - return num_layers; -} - -bool -brw_miptree_has_color_unresolved(const struct brw_mipmap_tree *mt, - unsigned start_level, unsigned num_levels, - unsigned start_layer, unsigned num_layers) -{ - assert(_mesa_is_format_color_format(mt->format)); - - if (!mt->aux_buf) - return false; - - /* Clamp the level range to fit the miptree */ - num_levels = miptree_level_range_length(mt, start_level, num_levels); - - for (uint32_t l = 0; l < num_levels; l++) { - const uint32_t level = start_level + l; - const uint32_t level_layers = - miptree_layer_range_length(mt, level, start_layer, num_layers); - for (unsigned a = 0; a < level_layers; a++) { - enum isl_aux_state aux_state = - brw_miptree_get_aux_state(mt, level, start_layer + a); - assert(aux_state != ISL_AUX_STATE_AUX_INVALID); - if (aux_state != ISL_AUX_STATE_PASS_THROUGH) - return true; - } - } - - return false; -} - -static void -brw_miptree_check_color_resolve(const struct brw_context *brw, - const struct brw_mipmap_tree *mt, - unsigned level, unsigned layer) -{ - if (!mt->aux_buf) - return; - - /* Fast color clear is supported for mipmapped surfaces only on Gfx8+. */ - assert(brw->screen->devinfo.ver >= 8 || - (level == 0 && mt->first_level == 0 && mt->last_level == 0)); - - /* Compression of arrayed msaa surfaces is supported. */ - if (mt->surf.samples > 1) - return; - - /* Fast color clear is supported for non-msaa arrays only on Gfx8+. */ - assert(brw->screen->devinfo.ver >= 8 || - (layer == 0 && - mt->surf.logical_level0_px.depth == 1 && - mt->surf.logical_level0_px.array_len == 1)); - - (void)level; - (void)layer; -} - -void -brw_miptree_prepare_access(struct brw_context *brw, - struct brw_mipmap_tree *mt, - uint32_t start_level, uint32_t num_levels, - uint32_t start_layer, uint32_t num_layers, - enum isl_aux_usage aux_usage, - bool fast_clear_supported) -{ - const uint32_t clamped_levels = - miptree_level_range_length(mt, start_level, num_levels); - for (uint32_t l = 0; l < clamped_levels; l++) { - const uint32_t level = start_level + l; - if (!level_has_aux(mt, level)) - continue; - - const uint32_t level_layers = - miptree_layer_range_length(mt, level, start_layer, num_layers); - for (uint32_t a = 0; a < level_layers; a++) { - const uint32_t layer = start_layer + a; - const enum isl_aux_state aux_state = - brw_miptree_get_aux_state(mt, level, layer); - const enum isl_aux_op aux_op = - isl_aux_prepare_access(aux_state, aux_usage, fast_clear_supported); - - if (aux_op == ISL_AUX_OP_NONE) { - /* Nothing to do here. */ - } else if (isl_aux_usage_has_mcs(mt->aux_usage)) { - assert(aux_op == ISL_AUX_OP_PARTIAL_RESOLVE); - brw_blorp_mcs_partial_resolve(brw, mt, layer, 1); - } else if (isl_aux_usage_has_hiz(mt->aux_usage)) { - brw_hiz_exec(brw, mt, level, layer, 1, aux_op); - } else { - assert(isl_aux_usage_has_ccs(mt->aux_usage)); - brw_miptree_check_color_resolve(brw, mt, level, layer); - brw_blorp_resolve_color(brw, mt, level, layer, aux_op); - } - - const enum isl_aux_state new_state = - isl_aux_state_transition_aux_op(aux_state, mt->aux_usage, aux_op); - brw_miptree_set_aux_state(brw, mt, level, layer, 1, new_state); - } - } -} - -void -brw_miptree_finish_write(struct brw_context *brw, - struct brw_mipmap_tree *mt, uint32_t level, - uint32_t start_layer, uint32_t num_layers, - enum isl_aux_usage aux_usage) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - if (mt->format == MESA_FORMAT_S_UINT8 && devinfo->ver <= 7) { - mt->shadow_needs_update = true; - } else if (brw_miptree_has_etc_shadow(brw, mt)) { - mt->shadow_needs_update = true; - } - - if (!level_has_aux(mt, level)) - return; - - const uint32_t level_layers = - miptree_layer_range_length(mt, level, start_layer, num_layers); - - for (uint32_t a = 0; a < level_layers; a++) { - const uint32_t layer = start_layer + a; - const enum isl_aux_state aux_state = - brw_miptree_get_aux_state(mt, level, layer); - const enum isl_aux_state new_aux_state = - isl_aux_state_transition_write(aux_state, aux_usage, false); - brw_miptree_set_aux_state(brw, mt, level, layer, 1, new_aux_state); - } -} - -enum isl_aux_state -brw_miptree_get_aux_state(const struct brw_mipmap_tree *mt, - uint32_t level, uint32_t layer) -{ - brw_miptree_check_level_layer(mt, level, layer); - - if (_mesa_is_format_color_format(mt->format)) { - assert(mt->aux_buf != NULL); - assert(mt->surf.samples == 1 || - mt->surf.msaa_layout == ISL_MSAA_LAYOUT_ARRAY); - } else if (mt->format == MESA_FORMAT_S_UINT8) { - unreachable("Cannot get aux state for stencil"); - } else { - assert(brw_miptree_level_has_hiz(mt, level)); - } - - return mt->aux_state[level][layer]; -} - -void -brw_miptree_set_aux_state(struct brw_context *brw, - struct brw_mipmap_tree *mt, uint32_t level, - uint32_t start_layer, uint32_t num_layers, - enum isl_aux_state aux_state) -{ - num_layers = miptree_layer_range_length(mt, level, start_layer, num_layers); - - if (_mesa_is_format_color_format(mt->format)) { - assert(mt->aux_buf != NULL); - assert(mt->surf.samples == 1 || - mt->surf.msaa_layout == ISL_MSAA_LAYOUT_ARRAY); - } else if (mt->format == MESA_FORMAT_S_UINT8) { - unreachable("Cannot get aux state for stencil"); - } else { - assert(brw_miptree_level_has_hiz(mt, level)); - } - - for (unsigned a = 0; a < num_layers; a++) { - if (mt->aux_state[level][start_layer + a] != aux_state) { - mt->aux_state[level][start_layer + a] = aux_state; - brw->ctx.NewDriverState |= BRW_NEW_AUX_STATE; - } - } -} - -/* On Gfx9 color buffers may be compressed by the hardware (lossless - * compression). There are, however, format restrictions and care needs to be - * taken that the sampler engine is capable for re-interpreting a buffer with - * format different the buffer was originally written with. - * - * For example, SRGB formats are not compressible and the sampler engine isn't - * capable of treating RGBA_UNORM as SRGB_ALPHA. In such a case the underlying - * color buffer needs to be resolved so that the sampling surface can be - * sampled as non-compressed (i.e., without the auxiliary MCS buffer being - * set). - */ -static bool -can_texture_with_ccs(struct brw_context *brw, - struct brw_mipmap_tree *mt, - enum isl_format view_format) -{ - if (mt->aux_usage != ISL_AUX_USAGE_CCS_E) - return false; - - if (!format_ccs_e_compat_with_miptree(&brw->screen->devinfo, - mt, view_format)) { - perf_debug("Incompatible sampling format (%s) for rbc (%s)\n", - isl_format_get_name(view_format), - _mesa_get_format_name(mt->format)); - return false; - } - - return true; -} - -enum isl_aux_usage -brw_miptree_texture_aux_usage(struct brw_context *brw, - struct brw_mipmap_tree *mt, - enum isl_format view_format, - enum gfx9_astc5x5_wa_tex_type astc5x5_wa_bits) -{ - assert(brw->screen->devinfo.ver == 9 || astc5x5_wa_bits == 0); - - /* On gfx9, ASTC 5x5 textures cannot live in the sampler cache along side - * CCS or HiZ compressed textures. See gfx9_apply_astc5x5_wa_flush() for - * details. - */ - if ((astc5x5_wa_bits & GFX9_ASTC5X5_WA_TEX_TYPE_ASTC5x5) && - mt->aux_usage != ISL_AUX_USAGE_MCS) - return ISL_AUX_USAGE_NONE; - - switch (mt->aux_usage) { - case ISL_AUX_USAGE_HIZ: - if (brw_miptree_sample_with_hiz(brw, mt)) - return ISL_AUX_USAGE_HIZ; - break; - - case ISL_AUX_USAGE_MCS: - return ISL_AUX_USAGE_MCS; - - case ISL_AUX_USAGE_CCS_D: - case ISL_AUX_USAGE_CCS_E: - if (!mt->aux_buf) { - assert(mt->aux_usage == ISL_AUX_USAGE_CCS_D); - return ISL_AUX_USAGE_NONE; - } - - /* If we don't have any unresolved color, report an aux usage of - * ISL_AUX_USAGE_NONE. This way, texturing won't even look at the - * aux surface and we can save some bandwidth. - */ - if (!brw_miptree_has_color_unresolved(mt, 0, INTEL_REMAINING_LEVELS, - 0, INTEL_REMAINING_LAYERS)) - return ISL_AUX_USAGE_NONE; - - if (can_texture_with_ccs(brw, mt, view_format)) - return ISL_AUX_USAGE_CCS_E; - break; - - default: - break; - } - - return ISL_AUX_USAGE_NONE; -} - -static bool -isl_formats_are_fast_clear_compatible(enum isl_format a, enum isl_format b) -{ - /* On gfx8 and earlier, the hardware was only capable of handling 0/1 clear - * values so sRGB curve application was a no-op for all fast-clearable - * formats. - * - * On gfx9+, the hardware supports arbitrary clear values. For sRGB clear - * values, the hardware interprets the floats, not as what would be - * returned from the sampler (or written by the shader), but as being - * between format conversion and sRGB curve application. This means that - * we can switch between sRGB and UNORM without having to whack the clear - * color. - */ - return isl_format_srgb_to_linear(a) == isl_format_srgb_to_linear(b); -} - -void -brw_miptree_prepare_texture(struct brw_context *brw, - struct brw_mipmap_tree *mt, - enum isl_format view_format, - uint32_t start_level, uint32_t num_levels, - uint32_t start_layer, uint32_t num_layers, - enum gfx9_astc5x5_wa_tex_type astc5x5_wa_bits) -{ - enum isl_aux_usage aux_usage = - brw_miptree_texture_aux_usage(brw, mt, view_format, astc5x5_wa_bits); - - bool clear_supported = aux_usage != ISL_AUX_USAGE_NONE; - - /* Clear color is specified as ints or floats and the conversion is done by - * the sampler. If we have a texture view, we would have to perform the - * clear color conversion manually. Just disable clear color. - */ - if (!isl_formats_are_fast_clear_compatible(mt->surf.format, view_format)) - clear_supported = false; - - brw_miptree_prepare_access(brw, mt, start_level, num_levels, - start_layer, num_layers, - aux_usage, clear_supported); -} - -void -brw_miptree_prepare_image(struct brw_context *brw, struct brw_mipmap_tree *mt) -{ - /* The data port doesn't understand any compression */ - brw_miptree_prepare_access(brw, mt, 0, INTEL_REMAINING_LEVELS, - 0, INTEL_REMAINING_LAYERS, - ISL_AUX_USAGE_NONE, false); -} - -enum isl_aux_usage -brw_miptree_render_aux_usage(struct brw_context *brw, - struct brw_mipmap_tree *mt, - enum isl_format render_format, - bool blend_enabled, - bool draw_aux_disabled) -{ - struct intel_device_info *devinfo = &brw->screen->devinfo; - - if (draw_aux_disabled) - return ISL_AUX_USAGE_NONE; - - switch (mt->aux_usage) { - case ISL_AUX_USAGE_MCS: - assert(mt->aux_buf); - return ISL_AUX_USAGE_MCS; - - case ISL_AUX_USAGE_CCS_D: - case ISL_AUX_USAGE_CCS_E: - if (!mt->aux_buf) { - assert(mt->aux_usage == ISL_AUX_USAGE_CCS_D); - return ISL_AUX_USAGE_NONE; - } - - /* gfx9+ hardware technically supports non-0/1 clear colors with sRGB - * formats. However, there are issues with blending where it doesn't - * properly apply the sRGB curve to the clear color when blending. - */ - if (devinfo->ver >= 9 && blend_enabled && - isl_format_is_srgb(render_format) && - !isl_color_value_is_zero_one(mt->fast_clear_color, render_format)) - return ISL_AUX_USAGE_NONE; - - if (mt->aux_usage == ISL_AUX_USAGE_CCS_E && - format_ccs_e_compat_with_miptree(&brw->screen->devinfo, - mt, render_format)) - return ISL_AUX_USAGE_CCS_E; - - /* Otherwise, we have to fall back to CCS_D */ - return ISL_AUX_USAGE_CCS_D; - - default: - return ISL_AUX_USAGE_NONE; - } -} - -void -brw_miptree_prepare_render(struct brw_context *brw, - struct brw_mipmap_tree *mt, uint32_t level, - uint32_t start_layer, uint32_t layer_count, - enum isl_aux_usage aux_usage) -{ - brw_miptree_prepare_access(brw, mt, level, 1, start_layer, layer_count, - aux_usage, aux_usage != ISL_AUX_USAGE_NONE); -} - -void -brw_miptree_finish_render(struct brw_context *brw, - struct brw_mipmap_tree *mt, uint32_t level, - uint32_t start_layer, uint32_t layer_count, - enum isl_aux_usage aux_usage) -{ - assert(_mesa_is_format_color_format(mt->format)); - - brw_miptree_finish_write(brw, mt, level, start_layer, layer_count, - aux_usage); -} - -void -brw_miptree_prepare_depth(struct brw_context *brw, - struct brw_mipmap_tree *mt, uint32_t level, - uint32_t start_layer, uint32_t layer_count) -{ - brw_miptree_prepare_access(brw, mt, level, 1, start_layer, layer_count, - mt->aux_usage, mt->aux_buf != NULL); -} - -void -brw_miptree_finish_depth(struct brw_context *brw, - struct brw_mipmap_tree *mt, uint32_t level, - uint32_t start_layer, uint32_t layer_count, - bool depth_written) -{ - if (depth_written) { - brw_miptree_finish_write(brw, mt, level, start_layer, layer_count, - mt->aux_usage); - } -} - -void -brw_miptree_prepare_external(struct brw_context *brw, - struct brw_mipmap_tree *mt) -{ - enum isl_aux_usage aux_usage = ISL_AUX_USAGE_NONE; - bool supports_fast_clear = false; - - const struct isl_drm_modifier_info *mod_info = - isl_drm_modifier_get_info(mt->drm_modifier); - - if (mod_info && mod_info->aux_usage != ISL_AUX_USAGE_NONE) { - /* CCS_E is the only supported aux for external images and it's only - * supported on very simple images. - */ - assert(mod_info->aux_usage == ISL_AUX_USAGE_CCS_E); - assert(_mesa_is_format_color_format(mt->format)); - assert(mt->first_level == 0 && mt->last_level == 0); - assert(mt->surf.logical_level0_px.depth == 1); - assert(mt->surf.logical_level0_px.array_len == 1); - assert(mt->surf.samples == 1); - assert(mt->aux_buf != NULL); - - aux_usage = mod_info->aux_usage; - supports_fast_clear = mod_info->supports_clear_color; - } - - brw_miptree_prepare_access(brw, mt, 0, INTEL_REMAINING_LEVELS, - 0, INTEL_REMAINING_LAYERS, - aux_usage, supports_fast_clear); -} - -void -brw_miptree_finish_external(struct brw_context *brw, - struct brw_mipmap_tree *mt) -{ - if (!mt->aux_buf) - return; - - /* We don't know the actual aux state of the aux surface. The previous - * owner could have given it to us in a number of different states. - * Because we don't know the aux state, we reset the aux state to the - * least common denominator of possible valid states. - */ - enum isl_aux_state default_aux_state = - isl_drm_modifier_get_default_aux_state(mt->drm_modifier); - assert(mt->last_level == mt->first_level); - brw_miptree_set_aux_state(brw, mt, 0, 0, INTEL_REMAINING_LAYERS, - default_aux_state); -} - -/** - * Make it possible to share the BO backing the given miptree with another - * process or another miptree. - * - * Fast color clears are unsafe with shared buffers, so we need to resolve and - * then discard the MCS buffer, if present. We also set the no_ccs flag to - * ensure that no MCS buffer gets allocated in the future. - * - * HiZ is similarly unsafe with shared buffers. - */ -void -brw_miptree_make_shareable(struct brw_context *brw, - struct brw_mipmap_tree *mt) -{ - /* MCS buffers are also used for multisample buffers, but we can't resolve - * away a multisample MCS buffer because it's an integral part of how the - * pixel data is stored. Fortunately this code path should never be - * reached for multisample buffers. - */ - assert(mt->surf.msaa_layout == ISL_MSAA_LAYOUT_NONE || - mt->surf.samples == 1); - - brw_miptree_prepare_access(brw, mt, 0, INTEL_REMAINING_LEVELS, - 0, INTEL_REMAINING_LAYERS, - ISL_AUX_USAGE_NONE, false); - - if (mt->aux_buf) { - brw_miptree_aux_buffer_free(mt->aux_buf); - mt->aux_buf = NULL; - - /* Make future calls of brw_miptree_level_has_hiz() return false. */ - for (uint32_t l = mt->first_level; l <= mt->last_level; ++l) { - mt->level[l].has_hiz = false; - } - - free(mt->aux_state); - mt->aux_state = NULL; - brw->ctx.NewDriverState |= BRW_NEW_AUX_STATE; - } - - mt->aux_usage = ISL_AUX_USAGE_NONE; - mt->supports_fast_clear = false; -} - - -/** - * \brief Get pointer offset into stencil buffer. - * - * The stencil buffer is W tiled. Since the GTT is incapable of W fencing, we - * must decode the tile's layout in software. - * - * See - * - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.2.1 W-Major Tile - * Format. - * - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.3 Tiling Algorithm - * - * Even though the returned offset is always positive, the return type is - * signed due to - * commit e8b1c6d6f55f5be3bef25084fdd8b6127517e137 - * mesa: Fix return type of _mesa_get_format_bytes() (#37351) - */ -static intptr_t -brw_offset_S8(uint32_t stride, uint32_t x, uint32_t y, bool swizzled) -{ - uint32_t tile_size = 4096; - uint32_t tile_width = 64; - uint32_t tile_height = 64; - uint32_t row_size = 64 * stride / 2; /* Two rows are interleaved. */ - - uint32_t tile_x = x / tile_width; - uint32_t tile_y = y / tile_height; - - /* The byte's address relative to the tile's base addres. */ - uint32_t byte_x = x % tile_width; - uint32_t byte_y = y % tile_height; - - uintptr_t u = tile_y * row_size - + tile_x * tile_size - + 512 * (byte_x / 8) - + 64 * (byte_y / 8) - + 32 * ((byte_y / 4) % 2) - + 16 * ((byte_x / 4) % 2) - + 8 * ((byte_y / 2) % 2) - + 4 * ((byte_x / 2) % 2) - + 2 * (byte_y % 2) - + 1 * (byte_x % 2); - - if (swizzled) { - /* adjust for bit6 swizzling */ - if (((byte_x / 8) % 2) == 1) { - if (((byte_y / 8) % 2) == 0) { - u += 64; - } else { - u -= 64; - } - } - } - - return u; -} - -void -brw_miptree_updownsample(struct brw_context *brw, - struct brw_mipmap_tree *src, - struct brw_mipmap_tree *dst) -{ - unsigned src_w = src->surf.logical_level0_px.width; - unsigned src_h = src->surf.logical_level0_px.height; - unsigned dst_w = dst->surf.logical_level0_px.width; - unsigned dst_h = dst->surf.logical_level0_px.height; - - brw_blorp_blit_miptrees(brw, - src, 0 /* level */, 0 /* layer */, - src->format, SWIZZLE_XYZW, - dst, 0 /* level */, 0 /* layer */, dst->format, - 0, 0, src_w, src_h, - 0, 0, dst_w, dst_h, - GL_NEAREST, false, false /*mirror x, y*/, - false, false); - - if (src->stencil_mt) { - src_w = src->stencil_mt->surf.logical_level0_px.width; - src_h = src->stencil_mt->surf.logical_level0_px.height; - dst_w = dst->stencil_mt->surf.logical_level0_px.width; - dst_h = dst->stencil_mt->surf.logical_level0_px.height; - - brw_blorp_blit_miptrees(brw, - src->stencil_mt, 0 /* level */, 0 /* layer */, - src->stencil_mt->format, SWIZZLE_XYZW, - dst->stencil_mt, 0 /* level */, 0 /* layer */, - dst->stencil_mt->format, - 0, 0, src_w, src_h, - 0, 0, dst_w, dst_h, - GL_NEAREST, false, false /*mirror x, y*/, - false, false /* decode/encode srgb */); - } -} - -void -brw_update_r8stencil(struct brw_context *brw, - struct brw_mipmap_tree *mt) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - assert(devinfo->ver >= 7); - struct brw_mipmap_tree *src = - mt->format == MESA_FORMAT_S_UINT8 ? mt : mt->stencil_mt; - if (!src || devinfo->ver >= 8) - return; - - assert(src->surf.size_B > 0); - - if (!mt->shadow_mt) { - assert(devinfo->ver > 6); /* Handle MIPTREE_LAYOUT_GFX6_HIZ_STENCIL */ - mt->shadow_mt = make_surface( - brw, - src->target, - MESA_FORMAT_R_UINT8, - src->first_level, src->last_level, - src->surf.logical_level0_px.width, - src->surf.logical_level0_px.height, - src->surf.dim == ISL_SURF_DIM_3D ? - src->surf.logical_level0_px.depth : - src->surf.logical_level0_px.array_len, - src->surf.samples, - ISL_TILING_Y0_BIT, - ISL_SURF_USAGE_TEXTURE_BIT, - BO_ALLOC_BUSY, 0, NULL); - assert(mt->shadow_mt); - } - - if (src->shadow_needs_update == false) - return; - - struct brw_mipmap_tree *dst = mt->shadow_mt; - - for (int level = src->first_level; level <= src->last_level; level++) { - const unsigned depth = src->surf.dim == ISL_SURF_DIM_3D ? - minify(src->surf.phys_level0_sa.depth, level) : - src->surf.phys_level0_sa.array_len; - - for (unsigned layer = 0; layer < depth; layer++) { - brw_blorp_copy_miptrees(brw, - src, level, layer, - dst, level, layer, - 0, 0, 0, 0, - minify(src->surf.logical_level0_px.width, - level), - minify(src->surf.logical_level0_px.height, - level)); - } - } - - brw_cache_flush_for_read(brw, dst->bo); - src->shadow_needs_update = false; -} - -static void * -brw_miptree_map_raw(struct brw_context *brw, - struct brw_mipmap_tree *mt, - GLbitfield mode) -{ - struct brw_bo *bo = mt->bo; - - if (brw_batch_references(&brw->batch, bo)) - brw_batch_flush(brw); - - return brw_bo_map(brw, bo, mode); -} - -static void -brw_miptree_unmap_raw(struct brw_mipmap_tree *mt) -{ - brw_bo_unmap(mt->bo); -} - -static void -brw_miptree_unmap_map(struct brw_context *brw, - struct brw_mipmap_tree *mt, - struct brw_miptree_map *map, - unsigned int level, unsigned int slice) -{ - brw_miptree_unmap_raw(mt); -} - -static void -brw_miptree_map_map(struct brw_context *brw, - struct brw_mipmap_tree *mt, - struct brw_miptree_map *map, - unsigned int level, unsigned int slice) -{ - unsigned int bw, bh; - void *base; - unsigned int image_x, image_y; - intptr_t x = map->x; - intptr_t y = map->y; - - /* For compressed formats, the stride is the number of bytes per - * row of blocks. brw_miptree_get_image_offset() already does - * the divide. - */ - _mesa_get_format_block_size(mt->format, &bw, &bh); - assert(y % bh == 0); - assert(x % bw == 0); - y /= bh; - x /= bw; - - brw_miptree_access_raw(brw, mt, level, slice, - map->mode & GL_MAP_WRITE_BIT); - - base = brw_miptree_map_raw(brw, mt, map->mode); - - if (base == NULL) - map->ptr = NULL; - else { - base += mt->offset; - - /* Note that in the case of cube maps, the caller must have passed the - * slice number referencing the face. - */ - brw_miptree_get_image_offset(mt, level, slice, &image_x, &image_y); - x += image_x; - y += image_y; - - map->stride = mt->surf.row_pitch_B; - map->ptr = base + y * map->stride + x * mt->cpp; - } - - DBG("%s: %d,%d %dx%d from mt %p (%s) " - "%"PRIiPTR",%"PRIiPTR" = %p/%d\n", __func__, - map->x, map->y, map->w, map->h, - mt, _mesa_get_format_name(mt->format), - x, y, map->ptr, map->stride); - - map->unmap = brw_miptree_unmap_map; -} - -static void -brw_miptree_unmap_blit(struct brw_context *brw, - struct brw_mipmap_tree *mt, - struct brw_miptree_map *map, - unsigned int level, - unsigned int slice) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - struct gl_context *ctx = &brw->ctx; - - brw_miptree_unmap_raw(map->linear_mt); - - if (map->mode & GL_MAP_WRITE_BIT) { - if (devinfo->ver >= 6) { - brw_blorp_copy_miptrees(brw, map->linear_mt, 0, 0, - mt, level, slice, - 0, 0, map->x, map->y, map->w, map->h); - } else { - bool ok = brw_miptree_copy(brw, - map->linear_mt, 0, 0, 0, 0, - mt, level, slice, map->x, map->y, - map->w, map->h); - WARN_ONCE(!ok, "Failed to blit from linear temporary mapping"); - } - } - - brw_miptree_release(&map->linear_mt); -} - -/* Compute extent parameters for use with tiled_memcpy functions. - * xs are in units of bytes and ys are in units of strides. - */ -static inline void -tile_extents(struct brw_mipmap_tree *mt, struct brw_miptree_map *map, - unsigned int level, unsigned int slice, unsigned int *x1_B, - unsigned int *x2_B, unsigned int *y1_el, unsigned int *y2_el) -{ - unsigned int block_width, block_height; - unsigned int x0_el, y0_el; - - _mesa_get_format_block_size(mt->format, &block_width, &block_height); - - assert(map->x % block_width == 0); - assert(map->y % block_height == 0); - - brw_miptree_get_image_offset(mt, level, slice, &x0_el, &y0_el); - *x1_B = (map->x / block_width + x0_el) * mt->cpp; - *y1_el = map->y / block_height + y0_el; - *x2_B = (DIV_ROUND_UP(map->x + map->w, block_width) + x0_el) * mt->cpp; - *y2_el = DIV_ROUND_UP(map->y + map->h, block_height) + y0_el; -} - -static void -brw_miptree_unmap_tiled_memcpy(struct brw_context *brw, - struct brw_mipmap_tree *mt, - struct brw_miptree_map *map, - unsigned int level, - unsigned int slice) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - if (map->mode & GL_MAP_WRITE_BIT) { - unsigned int x1, x2, y1, y2; - tile_extents(mt, map, level, slice, &x1, &x2, &y1, &y2); - - char *dst = brw_miptree_map_raw(brw, mt, map->mode | MAP_RAW); - dst += mt->offset; - - isl_memcpy_linear_to_tiled( - x1, x2, y1, y2, dst, map->ptr, mt->surf.row_pitch_B, map->stride, - devinfo->has_bit6_swizzle, mt->surf.tiling, ISL_MEMCPY); - - brw_miptree_unmap_raw(mt); - } - align_free(map->buffer); - map->buffer = map->ptr = NULL; -} - -/** - * Determine which copy function to use for the given format combination - * - * The only two possible copy functions which are ever returned are a - * direct memcpy and a RGBA <-> BGRA copy function. Since RGBA -> BGRA and - * BGRA -> RGBA are exactly the same operation (and memcpy is obviously - * symmetric), it doesn't matter whether the copy is from the tiled image - * to the untiled or vice versa. The copy function required is the same in - * either case so this function can be used. - * - * \param[in] tiledFormat The format of the tiled image - * \param[in] format The GL format of the client data - * \param[in] type The GL type of the client data - * \param[out] mem_copy Will be set to one of either the standard - * library's memcpy or a different copy function - * that performs an RGBA to BGRA conversion - * \param[out] cpp Number of bytes per channel - * - * \return true if the format and type combination are valid - */ -isl_memcpy_type -brw_miptree_get_memcpy_type(mesa_format tiledFormat, GLenum format, GLenum type, - uint32_t *cpp) -{ - if (type == GL_UNSIGNED_INT_8_8_8_8_REV && - !(format == GL_RGBA || format == GL_BGRA)) - return ISL_MEMCPY_INVALID; /* Invalid type/format combination */ - - if ((tiledFormat == MESA_FORMAT_L_UNORM8 && format == GL_LUMINANCE) || - (tiledFormat == MESA_FORMAT_A_UNORM8 && format == GL_ALPHA)) { - *cpp = 1; - return ISL_MEMCPY; - } else if ((tiledFormat == MESA_FORMAT_B8G8R8A8_UNORM) || - (tiledFormat == MESA_FORMAT_B8G8R8X8_UNORM) || - (tiledFormat == MESA_FORMAT_B8G8R8A8_SRGB) || - (tiledFormat == MESA_FORMAT_B8G8R8X8_SRGB)) { - *cpp = 4; - if (format == GL_BGRA) { - return ISL_MEMCPY; - } else if (format == GL_RGBA) { - return ISL_MEMCPY_BGRA8; - } - } else if ((tiledFormat == MESA_FORMAT_R8G8B8A8_UNORM) || - (tiledFormat == MESA_FORMAT_R8G8B8X8_UNORM) || - (tiledFormat == MESA_FORMAT_R8G8B8A8_SRGB) || - (tiledFormat == MESA_FORMAT_R8G8B8X8_SRGB)) { - *cpp = 4; - if (format == GL_BGRA) { - /* Copying from RGBA to BGRA is the same as BGRA to RGBA so we can - * use the same function. - */ - return ISL_MEMCPY_BGRA8; - } else if (format == GL_RGBA) { - return ISL_MEMCPY; - } - } - - return ISL_MEMCPY_INVALID; -} - -static void -brw_miptree_map_tiled_memcpy(struct brw_context *brw, - struct brw_mipmap_tree *mt, - struct brw_miptree_map *map, - unsigned int level, unsigned int slice) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - brw_miptree_access_raw(brw, mt, level, slice, - map->mode & GL_MAP_WRITE_BIT); - - unsigned int x1, x2, y1, y2; - tile_extents(mt, map, level, slice, &x1, &x2, &y1, &y2); - map->stride = ALIGN(_mesa_format_row_stride(mt->format, map->w), 16); - - /* The tiling and detiling functions require that the linear buffer - * has proper 16-byte alignment (that is, its `x0` is 16-byte - * aligned). Here we over-allocate the linear buffer by enough - * bytes to get the proper alignment. - */ - map->buffer = align_malloc(map->stride * (y2 - y1) + (x1 & 0xf), 16); - map->ptr = (char *)map->buffer + (x1 & 0xf); - assert(map->buffer); - - if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) { - char *src = brw_miptree_map_raw(brw, mt, map->mode | MAP_RAW); - src += mt->offset; - - const isl_memcpy_type copy_type = -#if defined(USE_SSE41) - cpu_has_sse4_1 ? ISL_MEMCPY_STREAMING_LOAD : -#endif - ISL_MEMCPY; - - isl_memcpy_tiled_to_linear( - x1, x2, y1, y2, map->ptr, src, map->stride, - mt->surf.row_pitch_B, devinfo->has_bit6_swizzle, mt->surf.tiling, - copy_type); - - brw_miptree_unmap_raw(mt); - } - - map->unmap = brw_miptree_unmap_tiled_memcpy; -} - -static void -brw_miptree_map_blit(struct brw_context *brw, - struct brw_mipmap_tree *mt, - struct brw_miptree_map *map, - unsigned int level, unsigned int slice) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - map->linear_mt = make_surface(brw, GL_TEXTURE_2D, mt->format, - 0, 0, map->w, map->h, 1, 1, - ISL_TILING_LINEAR_BIT, - ISL_SURF_USAGE_RENDER_TARGET_BIT | - ISL_SURF_USAGE_TEXTURE_BIT, - 0, 0, NULL); - - if (!map->linear_mt) { - fprintf(stderr, "Failed to allocate blit temporary\n"); - goto fail; - } - map->stride = map->linear_mt->surf.row_pitch_B; - - /* One of either READ_BIT or WRITE_BIT or both is set. READ_BIT implies no - * INVALIDATE_RANGE_BIT. WRITE_BIT needs the original values read in unless - * invalidate is set, since we'll be writing the whole rectangle from our - * temporary buffer back out. - */ - if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) { - if (devinfo->ver >= 6) { - brw_blorp_copy_miptrees(brw, mt, level, slice, - map->linear_mt, 0, 0, - map->x, map->y, 0, 0, map->w, map->h); - } else { - if (!brw_miptree_copy(brw, - mt, level, slice, map->x, map->y, - map->linear_mt, 0, 0, 0, 0, - map->w, map->h)) { - fprintf(stderr, "Failed to blit\n"); - goto fail; - } - } - } - - map->ptr = brw_miptree_map_raw(brw, map->linear_mt, map->mode); - - DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __func__, - map->x, map->y, map->w, map->h, - mt, _mesa_get_format_name(mt->format), - level, slice, map->ptr, map->stride); - - map->unmap = brw_miptree_unmap_blit; - return; - -fail: - brw_miptree_release(&map->linear_mt); - map->ptr = NULL; - map->stride = 0; -} - -/** - * "Map" a buffer by copying it to an untiled temporary using MOVNTDQA. - */ -#if defined(USE_SSE41) -static void -brw_miptree_unmap_movntdqa(struct brw_context *brw, - struct brw_mipmap_tree *mt, - struct brw_miptree_map *map, - unsigned int level, - unsigned int slice) -{ - align_free(map->buffer); - map->buffer = NULL; - map->ptr = NULL; -} - -static void -brw_miptree_map_movntdqa(struct brw_context *brw, - struct brw_mipmap_tree *mt, - struct brw_miptree_map *map, - unsigned int level, unsigned int slice) -{ - assert(map->mode & GL_MAP_READ_BIT); - assert(!(map->mode & GL_MAP_WRITE_BIT)); - - brw_miptree_access_raw(brw, mt, level, slice, false); - - DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __func__, - map->x, map->y, map->w, map->h, - mt, _mesa_get_format_name(mt->format), - level, slice, map->ptr, map->stride); - - /* Map the original image */ - uint32_t image_x; - uint32_t image_y; - brw_miptree_get_image_offset(mt, level, slice, &image_x, &image_y); - image_x += map->x; - image_y += map->y; - - void *src = brw_miptree_map_raw(brw, mt, map->mode); - if (!src) - return; - - src += mt->offset; - - src += image_y * mt->surf.row_pitch_B; - src += image_x * mt->cpp; - - /* Due to the pixel offsets for the particular image being mapped, our - * src pointer may not be 16-byte aligned. However, if the pitch is - * divisible by 16, then the amount by which it's misaligned will remain - * consistent from row to row. - */ - assert((mt->surf.row_pitch_B % 16) == 0); - const int misalignment = ((uintptr_t) src) & 15; - - /* Create an untiled temporary buffer for the mapping. */ - const unsigned width_bytes = _mesa_format_row_stride(mt->format, map->w); - - map->stride = ALIGN(misalignment + width_bytes, 16); - - map->buffer = align_malloc(map->stride * map->h, 16); - /* Offset the destination so it has the same misalignment as src. */ - map->ptr = map->buffer + misalignment; - - assert((((uintptr_t) map->ptr) & 15) == misalignment); - - for (uint32_t y = 0; y < map->h; y++) { - void *dst_ptr = map->ptr + y * map->stride; - void *src_ptr = src + y * mt->surf.row_pitch_B; - - _mesa_streaming_load_memcpy(dst_ptr, src_ptr, width_bytes); - } - - brw_miptree_unmap_raw(mt); - - map->unmap = brw_miptree_unmap_movntdqa; -} -#endif - -static void -brw_miptree_unmap_s8(struct brw_context *brw, - struct brw_mipmap_tree *mt, - struct brw_miptree_map *map, - unsigned int level, - unsigned int slice) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - if (map->mode & GL_MAP_WRITE_BIT) { - unsigned int image_x, image_y; - uint8_t *untiled_s8_map = map->ptr; - uint8_t *tiled_s8_map = brw_miptree_map_raw(brw, mt, GL_MAP_WRITE_BIT); - - brw_miptree_get_image_offset(mt, level, slice, &image_x, &image_y); - - for (uint32_t y = 0; y < map->h; y++) { - for (uint32_t x = 0; x < map->w; x++) { - ptrdiff_t offset = brw_offset_S8(mt->surf.row_pitch_B, - image_x + x + map->x, - image_y + y + map->y, - devinfo->has_bit6_swizzle); - tiled_s8_map[offset] = untiled_s8_map[y * map->w + x]; - } - } - - brw_miptree_unmap_raw(mt); - } - - free(map->buffer); -} - -static void -brw_miptree_map_s8(struct brw_context *brw, - struct brw_mipmap_tree *mt, - struct brw_miptree_map *map, - unsigned int level, unsigned int slice) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - map->stride = map->w; - map->buffer = map->ptr = malloc(map->stride * map->h); - if (!map->buffer) - return; - - brw_miptree_access_raw(brw, mt, level, slice, - map->mode & GL_MAP_WRITE_BIT); - - /* One of either READ_BIT or WRITE_BIT or both is set. READ_BIT implies no - * INVALIDATE_RANGE_BIT. WRITE_BIT needs the original values read in unless - * invalidate is set, since we'll be writing the whole rectangle from our - * temporary buffer back out. - */ - if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) { - uint8_t *untiled_s8_map = map->ptr; - uint8_t *tiled_s8_map = brw_miptree_map_raw(brw, mt, GL_MAP_READ_BIT); - unsigned int image_x, image_y; - - brw_miptree_get_image_offset(mt, level, slice, &image_x, &image_y); - - for (uint32_t y = 0; y < map->h; y++) { - for (uint32_t x = 0; x < map->w; x++) { - ptrdiff_t offset = brw_offset_S8(mt->surf.row_pitch_B, - x + image_x + map->x, - y + image_y + map->y, - devinfo->has_bit6_swizzle); - untiled_s8_map[y * map->w + x] = tiled_s8_map[offset]; - } - } - - brw_miptree_unmap_raw(mt); - - DBG("%s: %d,%d %dx%d from mt %p %d,%d = %p/%d\n", __func__, - map->x, map->y, map->w, map->h, - mt, map->x + image_x, map->y + image_y, map->ptr, map->stride); - } else { - DBG("%s: %d,%d %dx%d from mt %p = %p/%d\n", __func__, - map->x, map->y, map->w, map->h, - mt, map->ptr, map->stride); - } - - map->unmap = brw_miptree_unmap_s8; -} - -/** - * Mapping functions for packed depth/stencil miptrees backed by real separate - * miptrees for depth and stencil. - * - * On gfx7, and to support HiZ pre-gfx7, we have to have the stencil buffer - * separate from the depth buffer. Yet at the GL API level, we have to expose - * packed depth/stencil textures and FBO attachments, and Mesa core expects to - * be able to map that memory for texture storage and glReadPixels-type - * operations. We give Mesa core that access by mallocing a temporary and - * copying the data between the actual backing store and the temporary. - */ -static void -brw_miptree_unmap_depthstencil(struct brw_context *brw, - struct brw_mipmap_tree *mt, - struct brw_miptree_map *map, - unsigned int level, - unsigned int slice) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - struct brw_mipmap_tree *z_mt = mt; - struct brw_mipmap_tree *s_mt = mt->stencil_mt; - bool map_z32f_x24s8 = mt->format == MESA_FORMAT_Z_FLOAT32; - - if (map->mode & GL_MAP_WRITE_BIT) { - uint32_t *packed_map = map->ptr; - uint8_t *s_map = brw_miptree_map_raw(brw, s_mt, GL_MAP_WRITE_BIT); - uint32_t *z_map = brw_miptree_map_raw(brw, z_mt, GL_MAP_WRITE_BIT); - unsigned int s_image_x, s_image_y; - unsigned int z_image_x, z_image_y; - - brw_miptree_get_image_offset(s_mt, level, slice, - &s_image_x, &s_image_y); - brw_miptree_get_image_offset(z_mt, level, slice, - &z_image_x, &z_image_y); - - for (uint32_t y = 0; y < map->h; y++) { - for (uint32_t x = 0; x < map->w; x++) { - ptrdiff_t s_offset = brw_offset_S8(s_mt->surf.row_pitch_B, - x + s_image_x + map->x, - y + s_image_y + map->y, - devinfo->has_bit6_swizzle); - ptrdiff_t z_offset = ((y + z_image_y + map->y) * - (z_mt->surf.row_pitch_B / 4) + - (x + z_image_x + map->x)); - - if (map_z32f_x24s8) { - z_map[z_offset] = packed_map[(y * map->w + x) * 2 + 0]; - s_map[s_offset] = packed_map[(y * map->w + x) * 2 + 1]; - } else { - uint32_t packed = packed_map[y * map->w + x]; - s_map[s_offset] = packed >> 24; - z_map[z_offset] = packed; - } - } - } - - brw_miptree_unmap_raw(s_mt); - brw_miptree_unmap_raw(z_mt); - - DBG("%s: %d,%d %dx%d from z mt %p (%s) %d,%d, s mt %p %d,%d = %p/%d\n", - __func__, - map->x, map->y, map->w, map->h, - z_mt, _mesa_get_format_name(z_mt->format), - map->x + z_image_x, map->y + z_image_y, - s_mt, map->x + s_image_x, map->y + s_image_y, - map->ptr, map->stride); - } - - free(map->buffer); -} - -static void -brw_miptree_map_depthstencil(struct brw_context *brw, - struct brw_mipmap_tree *mt, - struct brw_miptree_map *map, - unsigned int level, unsigned int slice) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - struct brw_mipmap_tree *z_mt = mt; - struct brw_mipmap_tree *s_mt = mt->stencil_mt; - bool map_z32f_x24s8 = mt->format == MESA_FORMAT_Z_FLOAT32; - int packed_bpp = map_z32f_x24s8 ? 8 : 4; - - map->stride = map->w * packed_bpp; - map->buffer = map->ptr = malloc(map->stride * map->h); - if (!map->buffer) - return; - - brw_miptree_access_raw(brw, z_mt, level, slice, - map->mode & GL_MAP_WRITE_BIT); - brw_miptree_access_raw(brw, s_mt, level, slice, - map->mode & GL_MAP_WRITE_BIT); - - /* One of either READ_BIT or WRITE_BIT or both is set. READ_BIT implies no - * INVALIDATE_RANGE_BIT. WRITE_BIT needs the original values read in unless - * invalidate is set, since we'll be writing the whole rectangle from our - * temporary buffer back out. - */ - if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) { - uint32_t *packed_map = map->ptr; - uint8_t *s_map = brw_miptree_map_raw(brw, s_mt, GL_MAP_READ_BIT); - uint32_t *z_map = brw_miptree_map_raw(brw, z_mt, GL_MAP_READ_BIT); - unsigned int s_image_x, s_image_y; - unsigned int z_image_x, z_image_y; - - brw_miptree_get_image_offset(s_mt, level, slice, - &s_image_x, &s_image_y); - brw_miptree_get_image_offset(z_mt, level, slice, - &z_image_x, &z_image_y); - - for (uint32_t y = 0; y < map->h; y++) { - for (uint32_t x = 0; x < map->w; x++) { - int map_x = map->x + x, map_y = map->y + y; - ptrdiff_t s_offset = brw_offset_S8(s_mt->surf.row_pitch_B, - map_x + s_image_x, - map_y + s_image_y, - devinfo->has_bit6_swizzle); - ptrdiff_t z_offset = ((map_y + z_image_y) * - (z_mt->surf.row_pitch_B / 4) + - (map_x + z_image_x)); - uint8_t s = s_map[s_offset]; - uint32_t z = z_map[z_offset]; - - if (map_z32f_x24s8) { - packed_map[(y * map->w + x) * 2 + 0] = z; - packed_map[(y * map->w + x) * 2 + 1] = s; - } else { - packed_map[y * map->w + x] = (s << 24) | (z & 0x00ffffff); - } - } - } - - brw_miptree_unmap_raw(s_mt); - brw_miptree_unmap_raw(z_mt); - - DBG("%s: %d,%d %dx%d from z mt %p %d,%d, s mt %p %d,%d = %p/%d\n", - __func__, - map->x, map->y, map->w, map->h, - z_mt, map->x + z_image_x, map->y + z_image_y, - s_mt, map->x + s_image_x, map->y + s_image_y, - map->ptr, map->stride); - } else { - DBG("%s: %d,%d %dx%d from mt %p = %p/%d\n", __func__, - map->x, map->y, map->w, map->h, - mt, map->ptr, map->stride); - } - - map->unmap = brw_miptree_unmap_depthstencil; -} - -/** - * Create and attach a map to the miptree at (level, slice). Return the - * attached map. - */ -static struct brw_miptree_map* -brw_miptree_attach_map(struct brw_mipmap_tree *mt, - unsigned int level, - unsigned int slice, - unsigned int x, - unsigned int y, - unsigned int w, - unsigned int h, - GLbitfield mode) -{ - struct brw_miptree_map *map = calloc(1, sizeof(*map)); - - if (!map) - return NULL; - - assert(mt->level[level].slice[slice].map == NULL); - mt->level[level].slice[slice].map = map; - - map->mode = mode; - map->x = x; - map->y = y; - map->w = w; - map->h = h; - - return map; -} - -/** - * Release the map at (level, slice). - */ -static void -brw_miptree_release_map(struct brw_mipmap_tree *mt, - unsigned int level, - unsigned int slice) -{ - struct brw_miptree_map **map; - - map = &mt->level[level].slice[slice].map; - free(*map); - *map = NULL; -} - -static bool -can_blit_slice(struct brw_mipmap_tree *mt, - const struct brw_miptree_map *map) -{ - /* See brw_miptree_blit() for details on the 32k pitch limit. */ - const unsigned src_blt_pitch = brw_miptree_blt_pitch(mt); - const unsigned dst_blt_pitch = ALIGN(map->w * mt->cpp, 64); - return src_blt_pitch < 32768 && dst_blt_pitch < 32768; -} - -static bool -use_blitter_to_map(struct brw_context *brw, - struct brw_mipmap_tree *mt, - const struct brw_miptree_map *map) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - if (devinfo->has_llc && - /* It's probably not worth swapping to the blit ring because of - * all the overhead involved. - */ - !(map->mode & GL_MAP_WRITE_BIT) && - !mt->compressed && - (mt->surf.tiling == ISL_TILING_X || - /* Prior to Sandybridge, the blitter can't handle Y tiling */ - (devinfo->ver >= 6 && mt->surf.tiling == ISL_TILING_Y0) || - /* Fast copy blit on skl+ supports all tiling formats. */ - devinfo->ver >= 9) && - can_blit_slice(mt, map)) - return true; - - if (mt->surf.tiling != ISL_TILING_LINEAR && - mt->bo->size >= brw->max_gtt_map_object_size) { - assert(can_blit_slice(mt, map)); - return true; - } - - return false; -} - -/** - * Parameter \a out_stride has type ptrdiff_t not because the buffer stride may - * exceed 32 bits but to diminish the likelihood subtle bugs in pointer - * arithmetic overflow. - * - * If you call this function and use \a out_stride, then you're doing pointer - * arithmetic on \a out_ptr. The type of \a out_stride doesn't prevent all - * bugs. The caller must still take care to avoid 32-bit overflow errors in - * all arithmetic expressions that contain buffer offsets and pixel sizes, - * which usually have type uint32_t or GLuint. - */ -void -brw_miptree_map(struct brw_context *brw, - struct brw_mipmap_tree *mt, - unsigned int level, - unsigned int slice, - unsigned int x, - unsigned int y, - unsigned int w, - unsigned int h, - GLbitfield mode, - void **out_ptr, - ptrdiff_t *out_stride) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - struct brw_miptree_map *map; - - assert(mt->surf.samples == 1); - - map = brw_miptree_attach_map(mt, level, slice, x, y, w, h, mode); - if (!map){ - *out_ptr = NULL; - *out_stride = 0; - return; - } - - if (mt->format == MESA_FORMAT_S_UINT8) { - brw_miptree_map_s8(brw, mt, map, level, slice); - } else if (mt->stencil_mt && !(mode & BRW_MAP_DIRECT_BIT)) { - brw_miptree_map_depthstencil(brw, mt, map, level, slice); - } else if (use_blitter_to_map(brw, mt, map)) { - brw_miptree_map_blit(brw, mt, map, level, slice); - } else if (mt->surf.tiling != ISL_TILING_LINEAR && devinfo->ver > 4) { - brw_miptree_map_tiled_memcpy(brw, mt, map, level, slice); -#if defined(USE_SSE41) - } else if (!(mode & GL_MAP_WRITE_BIT) && - !mt->compressed && cpu_has_sse4_1 && - (mt->surf.row_pitch_B % 16 == 0)) { - brw_miptree_map_movntdqa(brw, mt, map, level, slice); -#endif - } else { - if (mt->surf.tiling != ISL_TILING_LINEAR) - perf_debug("brw_miptree_map: mapping via gtt"); - brw_miptree_map_map(brw, mt, map, level, slice); - } - - *out_ptr = map->ptr; - *out_stride = map->stride; - - if (map->ptr == NULL) - brw_miptree_release_map(mt, level, slice); -} - -void -brw_miptree_unmap(struct brw_context *brw, - struct brw_mipmap_tree *mt, - unsigned int level, - unsigned int slice) -{ - struct brw_miptree_map *map = mt->level[level].slice[slice].map; - - assert(mt->surf.samples == 1); - - if (!map) - return; - - DBG("%s: mt %p (%s) level %d slice %d\n", __func__, - mt, _mesa_get_format_name(mt->format), level, slice); - - if (map->unmap) - map->unmap(brw, mt, map, level, slice); - - brw_miptree_release_map(mt, level, slice); -} - -enum isl_surf_dim -get_isl_surf_dim(GLenum target) -{ - switch (target) { - case GL_TEXTURE_1D: - case GL_TEXTURE_1D_ARRAY: - return ISL_SURF_DIM_1D; - - case GL_TEXTURE_2D: - case GL_TEXTURE_2D_ARRAY: - case GL_TEXTURE_RECTANGLE: - case GL_TEXTURE_CUBE_MAP: - case GL_TEXTURE_CUBE_MAP_ARRAY: - case GL_TEXTURE_2D_MULTISAMPLE: - case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: - case GL_TEXTURE_EXTERNAL_OES: - return ISL_SURF_DIM_2D; - - case GL_TEXTURE_3D: - return ISL_SURF_DIM_3D; - } - - unreachable("Invalid texture target"); -} - -enum isl_dim_layout -get_isl_dim_layout(const struct intel_device_info *devinfo, - enum isl_tiling tiling, GLenum target) -{ - switch (target) { - case GL_TEXTURE_1D: - case GL_TEXTURE_1D_ARRAY: - return (devinfo->ver >= 9 && tiling == ISL_TILING_LINEAR ? - ISL_DIM_LAYOUT_GFX9_1D : ISL_DIM_LAYOUT_GFX4_2D); - - case GL_TEXTURE_2D: - case GL_TEXTURE_2D_ARRAY: - case GL_TEXTURE_RECTANGLE: - case GL_TEXTURE_2D_MULTISAMPLE: - case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: - case GL_TEXTURE_EXTERNAL_OES: - return ISL_DIM_LAYOUT_GFX4_2D; - - case GL_TEXTURE_CUBE_MAP: - case GL_TEXTURE_CUBE_MAP_ARRAY: - return (devinfo->ver == 4 ? ISL_DIM_LAYOUT_GFX4_3D : - ISL_DIM_LAYOUT_GFX4_2D); - - case GL_TEXTURE_3D: - return (devinfo->ver >= 9 ? - ISL_DIM_LAYOUT_GFX4_2D : ISL_DIM_LAYOUT_GFX4_3D); - } - - unreachable("Invalid texture target"); -} - -bool -brw_miptree_set_clear_color(struct brw_context *brw, - struct brw_mipmap_tree *mt, - union isl_color_value clear_color) -{ - if (memcmp(&mt->fast_clear_color, &clear_color, sizeof(clear_color)) != 0) { - mt->fast_clear_color = clear_color; - if (mt->aux_buf->clear_color_bo) { - /* We can't update the clear color while the hardware is still using - * the previous one for a resolve or sampling from it. Make sure that - * there are no pending commands at this point. - */ - brw_emit_pipe_control_flush(brw, PIPE_CONTROL_CS_STALL); - for (int i = 0; i < 4; i++) { - brw_store_data_imm32(brw, mt->aux_buf->clear_color_bo, - mt->aux_buf->clear_color_offset + i * 4, - mt->fast_clear_color.u32[i]); - } - brw_emit_pipe_control_flush(brw, PIPE_CONTROL_STATE_CACHE_INVALIDATE); - } - brw->ctx.NewDriverState |= BRW_NEW_AUX_STATE; - return true; - } - return false; -} - -union isl_color_value -brw_miptree_get_clear_color(const struct brw_mipmap_tree *mt, - struct brw_bo **clear_color_bo, - uint64_t *clear_color_offset) -{ - assert(mt->aux_buf); - - *clear_color_bo = mt->aux_buf->clear_color_bo; - *clear_color_offset = mt->aux_buf->clear_color_offset; - return mt->fast_clear_color; -} - -static void -brw_miptree_update_etc_shadow(struct brw_context *brw, - struct brw_mipmap_tree *mt, - unsigned int level, - unsigned int slice, - int level_w, - int level_h) -{ - ptrdiff_t etc_stride, shadow_stride; - void *mptr, *sptr; - struct brw_mipmap_tree *smt = mt->shadow_mt; - - assert(brw_miptree_has_etc_shadow(brw, mt)); - - brw_miptree_map(brw, mt, level, slice, 0, 0, level_w, level_h, - GL_MAP_READ_BIT, &mptr, &etc_stride); - brw_miptree_map(brw, smt, level, slice, 0, 0, level_w, level_h, - GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT, - &sptr, &shadow_stride); - - if (mt->format == MESA_FORMAT_ETC1_RGB8) { - _mesa_etc1_unpack_rgba8888(sptr, shadow_stride, mptr, etc_stride, - level_w, level_h); - } else { - /* destination and source images must have the same swizzle */ - bool is_bgra = (smt->format == MESA_FORMAT_B8G8R8A8_SRGB); - _mesa_unpack_etc2_format(sptr, shadow_stride, mptr, etc_stride, - level_w, level_h, mt->format, is_bgra); - } - - brw_miptree_unmap(brw, mt, level, slice); - brw_miptree_unmap(brw, smt, level, slice); -} - -void -brw_miptree_update_etc_shadow_levels(struct brw_context *brw, - struct brw_mipmap_tree *mt) -{ - struct brw_mipmap_tree *smt; - int num_slices; - - assert(mt); - assert(mt->surf.size_B > 0); - assert(brw_miptree_has_etc_shadow(brw, mt)); - - smt = mt->shadow_mt; - num_slices = smt->surf.logical_level0_px.array_len; - - for (int level = smt->first_level; level <= smt->last_level; level++) { - int level_w = minify(smt->surf.logical_level0_px.width, - level - smt->first_level); - int level_h = minify(smt->surf.logical_level0_px.height, - level - smt->first_level); - - for (unsigned int slice = 0; slice < num_slices; slice++) { - brw_miptree_update_etc_shadow(brw, mt, level, slice, level_w, - level_h); - } - } - - mt->shadow_needs_update = false; -} diff --git a/src/mesa/drivers/dri/i965/brw_mipmap_tree.h b/src/mesa/drivers/dri/i965/brw_mipmap_tree.h deleted file mode 100644 index 956163b..0000000 --- a/src/mesa/drivers/dri/i965/brw_mipmap_tree.h +++ /dev/null @@ -1,741 +0,0 @@ -/* - * Copyright 2006 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -/** @file intel_mipmap_tree.h - * - * This file defines the structure that wraps a BO and describes how the - * mipmap levels and slices of a texture are laid out. - * - * The hardware has a fixed layout of a texture depending on parameters such - * as the target/type (2D, 3D, CUBE), width, height, pitch, and number of - * mipmap levels. The individual level/layer slices are each 2D rectangles of - * pixels at some x/y offset from the start of the brw_bo. - * - * Original OpenGL allowed texture miplevels to be specified in arbitrary - * order, and a texture may change size over time. Thus, each - * brw_texture_image has a reference to a miptree that contains the pixel - * data sized appropriately for it, which will later be referenced by/copied - * to the brw_texture_object at draw time (brw_finalize_mipmap_tree()) so - * that there's a single miptree for the complete texture. - */ - -#ifndef BRW_MIPMAP_TREE_H -#define BRW_MIPMAP_TREE_H - -#include - -#include "main/mtypes.h" -#include "isl/isl.h" -#include "blorp/blorp.h" -#include "brw_bufmgr.h" -#include "brw_context.h" -#include - -#ifdef __cplusplus -extern "C" { -#endif - -struct brw_context; -struct brw_renderbuffer; - -struct brw_texture_image; - -/** - * This bit extends the set of GL_MAP_*_BIT enums. - * - * When calling brw_miptree_map() on an ETC-transcoded-to-RGB miptree or a - * depthstencil-split-to-separate-stencil miptree, we'll normally make a - * temporary and recreate the kind of data requested by Mesa core, since we're - * satisfying some glGetTexImage() request or something. - * - * However, occasionally you want to actually map the miptree's current data - * without transcoding back. This flag to brw_miptree_map() gets you that. - */ -#define BRW_MAP_DIRECT_BIT 0x80000000 - -struct brw_miptree_map { - /** Bitfield of GL_MAP_*_BIT and BRW_MAP_*_BIT. */ - GLbitfield mode; - /** Region of interest for the map. */ - int x, y, w, h; - /** Possibly malloced temporary buffer for the mapping. */ - void *buffer; - /** Possible pointer to a temporary linear miptree for the mapping. */ - struct brw_mipmap_tree *linear_mt; - /** Pointer to the start of (map_x, map_y) returned by the mapping. */ - void *ptr; - /** Stride of the mapping. */ - int stride; - - void (*unmap)(struct brw_context *brw, - struct brw_mipmap_tree *mt, - struct brw_miptree_map *map, - unsigned int level, - unsigned int slice); -}; - -/** - * Describes the location of each texture image within a miptree. - */ -struct brw_mipmap_level -{ - /** Offset to this miptree level, used in computing x_offset. */ - GLuint level_x; - /** Offset to this miptree level, used in computing y_offset. */ - GLuint level_y; - - /** - * \brief Is HiZ enabled for this level? - * - * If \c mt->level[l].has_hiz is set, then (1) \c mt->hiz_mt has been - * allocated and (2) the HiZ memory for the slices in this level reside at - * \c mt->hiz_mt->level[l]. - */ - bool has_hiz; - - /** - * \brief List of 2D images in this mipmap level. - * - * This may be a list of cube faces, array slices in 2D array texture, or - * layers in a 3D texture. The list's length is \c depth. - */ - struct brw_mipmap_slice { - /** - * Mapping information. Persistent for the duration of - * brw_miptree_map/unmap on this slice. - */ - struct brw_miptree_map *map; - } *slice; -}; - -/** - * Miptree aux buffer. These buffers are associated with a miptree, but the - * format is managed by the hardware. - * - * For Gfx7+, we always give the hardware the start of the buffer, and let it - * handle all accesses to the buffer. Therefore we don't need the full miptree - * layout structure for this buffer. - */ -struct brw_miptree_aux_buffer -{ - struct isl_surf surf; - - /** - * Buffer object containing the pixel data. - * - * @see RENDER_SURFACE_STATE.AuxiliarySurfaceBaseAddress - * @see 3DSTATE_HIER_DEPTH_BUFFER.AuxiliarySurfaceBaseAddress - */ - struct brw_bo *bo; - - /** - * Offset into bo where the surface starts. - * - * @see brw_mipmap_aux_buffer::bo - * - * @see RENDER_SURFACE_STATE.AuxiliarySurfaceBaseAddress - * @see 3DSTATE_DEPTH_BUFFER.SurfaceBaseAddress - * @see 3DSTATE_HIER_DEPTH_BUFFER.SurfaceBaseAddress - * @see 3DSTATE_STENCIL_BUFFER.SurfaceBaseAddress - */ - uint32_t offset; - - /** - * Buffer object containing the indirect clear color. - * - * @see create_ccs_buf_for_image - * @see RENDER_SURFACE_STATE.ClearValueAddress - */ - struct brw_bo *clear_color_bo; - - /** - * Offset into bo where the clear color can be found. - * - * @see create_ccs_buf_for_image - * @see RENDER_SURFACE_STATE.ClearValueAddress - */ - uint32_t clear_color_offset; -}; - -struct brw_mipmap_tree -{ - struct isl_surf surf; - - /** - * Buffer object containing the surface. - * - * @see brw_mipmap_tree::offset - * @see RENDER_SURFACE_STATE.SurfaceBaseAddress - * @see RENDER_SURFACE_STATE.AuxiliarySurfaceBaseAddress - * @see 3DSTATE_DEPTH_BUFFER.SurfaceBaseAddress - * @see 3DSTATE_HIER_DEPTH_BUFFER.SurfaceBaseAddress - * @see 3DSTATE_STENCIL_BUFFER.SurfaceBaseAddress - */ - struct brw_bo *bo; - - /** - * @brief One of GL_TEXTURE_2D, GL_TEXTURE_2D_ARRAY, etc. - * - * @see RENDER_SURFACE_STATE.SurfaceType - * @see RENDER_SURFACE_STATE.SurfaceArray - * @see 3DSTATE_DEPTH_BUFFER.SurfaceType - */ - GLenum target; - - /** - * Generally, this is just the same as the gl_texture_image->TexFormat or - * gl_renderbuffer->Format. - * - * However, for textures and renderbuffers with packed depth/stencil formats - * on hardware where we want or need to use separate stencil, there will be - * two miptrees for storing the data. If the depthstencil texture or rb is - * MESA_FORMAT_Z32_FLOAT_S8X24_UINT, then mt->format will be - * MESA_FORMAT_Z_FLOAT32, otherwise for MESA_FORMAT_Z24_UNORM_S8_UINT objects it will be - * MESA_FORMAT_Z24_UNORM_X8_UINT. - * - * @see RENDER_SURFACE_STATE.SurfaceFormat - * @see 3DSTATE_DEPTH_BUFFER.SurfaceFormat - */ - mesa_format format; - - GLuint first_level; - GLuint last_level; - - /** Bytes per pixel (or bytes per block if compressed) */ - GLuint cpp; - - bool compressed; - - /* Includes image offset tables: */ - struct brw_mipmap_level level[MAX_TEXTURE_LEVELS]; - - /** - * Offset into bo where the surface starts. - * - * @see brw_mipmap_tree::bo - * - * @see RENDER_SURFACE_STATE.AuxiliarySurfaceBaseAddress - * @see 3DSTATE_DEPTH_BUFFER.SurfaceBaseAddress - * @see 3DSTATE_HIER_DEPTH_BUFFER.SurfaceBaseAddress - * @see 3DSTATE_STENCIL_BUFFER.SurfaceBaseAddress - */ - uint32_t offset; - - /** - * \brief The type of auxiliary compression used by this miptree. - * - * This describes the type of auxiliary compression that is intended to be - * used by this miptree. An aux usage of ISL_AUX_USAGE_NONE means that - * auxiliary compression is permanently disabled. An aux usage other than - * ISL_AUX_USAGE_NONE does not imply that the auxiliary buffer has actually - * been allocated nor does it imply that auxiliary compression will always - * be enabled for this surface. For instance, with CCS_D, we may allocate - * the CCS on-the-fly and it may not be used for texturing if the miptree - * is fully resolved. - */ - enum isl_aux_usage aux_usage; - - /** - * \brief Whether or not this miptree supports fast clears. - */ - bool supports_fast_clear; - - /** - * \brief Maps miptree slices to their current aux state - * - * This two-dimensional array is indexed as [level][layer] and stores an - * aux state for each slice. - */ - enum isl_aux_state **aux_state; - - /** - * \brief Stencil miptree for depthstencil textures. - * - * This miptree is used for depthstencil textures and renderbuffers that - * require separate stencil. It always has the true copy of the stencil - * bits, regardless of mt->format. - * - * \see 3DSTATE_STENCIL_BUFFER - * \see brw_miptree_map_depthstencil() - * \see brw_miptree_unmap_depthstencil() - */ - struct brw_mipmap_tree *stencil_mt; - - /** - * \brief Shadow miptree for sampling when the main isn't supported by HW. - * - * To workaround various sampler bugs and limitations, we blit the main - * texture into a new texture that can be sampled. - * - * This miptree may be used for: - * - Stencil texturing (pre-BDW) as required by GL_ARB_stencil_texturing. - * - To store the decompressed ETC/EAC data in case we emulate the ETC - * compression on Gen 7 or earlier GPUs. - */ - struct brw_mipmap_tree *shadow_mt; - bool shadow_needs_update; - - /** - * \brief CCS, MCS, or HiZ auxiliary buffer. - * - * NULL if no auxiliary buffer is in use for this surface. - * - * For single-sampled color miptrees: - * This buffer contains the Color Control Surface, which stores the - * necessary information to implement lossless color compression (CCS_E) - * and "fast color clear" (CCS_D) behaviour. - * - * For multi-sampled color miptrees: - * This buffer contains the Multisample Control Surface, which stores the - * necessary information to implement compressed MSAA - * (INTEL_MSAA_FORMAT_CMS). - * - * For depth miptrees: - * This buffer contains the Hierarchical Depth Buffer, which stores the - * necessary information to implement lossless depth compression and fast - * depth clear behavior. - * - * To determine if HiZ is enabled, do not check this pointer. Instead, - * use brw_miptree_level_has_hiz(). - */ - struct brw_miptree_aux_buffer *aux_buf; - - /** - * Planes 1 and 2 in case this is a planar surface. - */ - struct brw_mipmap_tree *plane[2]; - - /** - * Fast clear color for this surface. For depth surfaces, the clear value - * is stored as a float32 in the red component. - */ - union isl_color_value fast_clear_color; - - /** - * For external surfaces, this is DRM format modifier that was used to - * create or import the surface. For internal surfaces, this will always - * be DRM_FORMAT_MOD_INVALID. - */ - uint64_t drm_modifier; - - /* These are also refcounted: - */ - GLuint refcount; -}; - -bool -brw_miptree_alloc_aux(struct brw_context *brw, - struct brw_mipmap_tree *mt); - -enum brw_miptree_create_flags { - /** No miptree create flags */ - MIPTREE_CREATE_DEFAULT = 0, - - /** Miptree creation should try to allocate a currently busy BO - * - * This may be advantageous if we know the next thing to touch the BO will - * be the GPU because the BO will likely already be in the GTT and maybe - * even in some caches. If there is a chance that the next thing to touch - * the miptree BO will be the CPU, this flag should not be set. - */ - MIPTREE_CREATE_BUSY = 1 << 0, - - /** Create the miptree with auxiliary compression disabled - * - * This does not prevent the caller of brw_miptree_create from coming - * along later and turning auxiliary compression back on but it does mean - * that the miptree will be created with mt->aux_usage == NONE. - */ - MIPTREE_CREATE_NO_AUX = 1 << 1, -}; - -struct brw_mipmap_tree *brw_miptree_create(struct brw_context *brw, - GLenum target, - mesa_format format, - GLuint first_level, - GLuint last_level, - GLuint width0, - GLuint height0, - GLuint depth0, - GLuint num_samples, - enum brw_miptree_create_flags flags); - -struct brw_mipmap_tree * -brw_miptree_create_for_bo(struct brw_context *brw, - struct brw_bo *bo, - mesa_format format, - uint32_t offset, - uint32_t width, - uint32_t height, - uint32_t depth, - int pitch, - enum isl_tiling tiling, - enum brw_miptree_create_flags flags); - -struct brw_mipmap_tree * -brw_miptree_create_for_dri_image(struct brw_context *brw, - __DRIimage *image, - GLenum target, - mesa_format format, - bool allow_internal_aux); - -bool -brw_update_winsys_renderbuffer_miptree(struct brw_context *intel, - struct brw_renderbuffer *irb, - struct brw_mipmap_tree *singlesample_mt, - uint32_t width, uint32_t height, - uint32_t pitch); - -/** - * Create a miptree appropriate as the storage for a non-texture renderbuffer. - * The miptree has the following properties: - * - The target is GL_TEXTURE_2D. - * - There are no levels other than the base level 0. - * - Depth is 1. - */ -struct brw_mipmap_tree* -brw_miptree_create_for_renderbuffer(struct brw_context *brw, - mesa_format format, - uint32_t width, - uint32_t height, - uint32_t num_samples); - -mesa_format -brw_depth_format_for_depthstencil_format(mesa_format format); - -mesa_format -brw_lower_compressed_format(struct brw_context *brw, mesa_format format); - -unsigned -brw_get_num_logical_layers(const struct brw_mipmap_tree *mt, unsigned level); - -/** \brief Assert that the level and layer are valid for the miptree. */ -void -brw_miptree_check_level_layer(const struct brw_mipmap_tree *mt, - uint32_t level, - uint32_t layer); - -void brw_miptree_reference(struct brw_mipmap_tree **dst, - struct brw_mipmap_tree *src); - -void brw_miptree_release(struct brw_mipmap_tree **mt); - -/* Check if an image fits an existing mipmap tree layout - */ -bool brw_miptree_match_image(struct brw_mipmap_tree *mt, - struct gl_texture_image *image); - -void -brw_miptree_get_image_offset(const struct brw_mipmap_tree *mt, - GLuint level, GLuint slice, - GLuint *x, GLuint *y); - -enum isl_surf_dim -get_isl_surf_dim(GLenum target); - -enum isl_dim_layout -get_isl_dim_layout(const struct intel_device_info *devinfo, - enum isl_tiling tiling, GLenum target); - -void -brw_get_image_dims(struct gl_texture_image *image, - int *width, int *height, int *depth); - -uint32_t -brw_miptree_get_tile_offsets(const struct brw_mipmap_tree *mt, - GLuint level, GLuint slice, - uint32_t *tile_x, - uint32_t *tile_y); -uint32_t -brw_miptree_get_aligned_offset(const struct brw_mipmap_tree *mt, - uint32_t x, uint32_t y); - -void -brw_miptree_copy_slice(struct brw_context *brw, - struct brw_mipmap_tree *src_mt, - unsigned src_level, unsigned src_layer, - struct brw_mipmap_tree *dst_mt, - unsigned dst_level, unsigned dst_layer); - -void -brw_miptree_copy_teximage(struct brw_context *brw, - struct brw_texture_image *brw_image, - struct brw_mipmap_tree *dst_mt); - -/** - * \name Miptree HiZ functions - * \{ - * - * It is safe to call the "slice_set_need_resolve" and "slice_resolve" - * functions on a miptree without HiZ. In that case, each function is a no-op. - */ - -bool -brw_miptree_level_has_hiz(const struct brw_mipmap_tree *mt, uint32_t level); - -/**\}*/ - -bool -brw_miptree_has_color_unresolved(const struct brw_mipmap_tree *mt, - unsigned start_level, unsigned num_levels, - unsigned start_layer, unsigned num_layers); - - -#define INTEL_REMAINING_LAYERS UINT32_MAX -#define INTEL_REMAINING_LEVELS UINT32_MAX - -/** Prepare a miptree for access - * - * This function should be called prior to any access to miptree in order to - * perform any needed resolves. - * - * \param[in] start_level The first mip level to be accessed - * - * \param[in] num_levels The number of miplevels to be accessed or - * INTEL_REMAINING_LEVELS to indicate every level - * above start_level will be accessed - * - * \param[in] start_layer The first array slice or 3D layer to be accessed - * - * \param[in] num_layers The number of array slices or 3D layers be - * accessed or INTEL_REMAINING_LAYERS to indicate - * every layer above start_layer will be accessed - * - * \param[in] aux_supported Whether or not the access will support the - * miptree's auxiliary compression format; this - * must be false for uncompressed miptrees - * - * \param[in] fast_clear_supported Whether or not the access will support - * fast clears in the miptree's auxiliary - * compression format - */ -void -brw_miptree_prepare_access(struct brw_context *brw, - struct brw_mipmap_tree *mt, - uint32_t start_level, uint32_t num_levels, - uint32_t start_layer, uint32_t num_layers, - enum isl_aux_usage aux_usage, - bool fast_clear_supported); - -/** Complete a write operation - * - * This function should be called after any operation writes to a miptree. - * This will update the miptree's compression state so that future resolves - * happen correctly. Technically, this function can be called before the - * write occurs but the caller must ensure that they don't interlace - * brw_miptree_prepare_access and brw_miptree_finish_write calls to - * overlapping layer/level ranges. - * - * \param[in] level The mip level that was written - * - * \param[in] start_layer The first array slice or 3D layer written - * - * \param[in] num_layers The number of array slices or 3D layers - * written or INTEL_REMAINING_LAYERS to indicate - * every layer above start_layer was written - * - * \param[in] written_with_aux Whether or not the write was done with - * auxiliary compression enabled - */ -void -brw_miptree_finish_write(struct brw_context *brw, - struct brw_mipmap_tree *mt, uint32_t level, - uint32_t start_layer, uint32_t num_layers, - enum isl_aux_usage aux_usage); - -/** Get the auxiliary compression state of a miptree slice */ -enum isl_aux_state -brw_miptree_get_aux_state(const struct brw_mipmap_tree *mt, - uint32_t level, uint32_t layer); - -/** Set the auxiliary compression state of a miptree slice range - * - * This function directly sets the auxiliary compression state of a slice - * range of a miptree. It only modifies data structures and does not do any - * resolves. This should only be called by code which directly performs - * compression operations such as fast clears and resolves. Most code should - * use brw_miptree_prepare_access or brw_miptree_finish_write. - */ -void -brw_miptree_set_aux_state(struct brw_context *brw, - struct brw_mipmap_tree *mt, uint32_t level, - uint32_t start_layer, uint32_t num_layers, - enum isl_aux_state aux_state); - -/** - * Prepare a miptree for raw access - * - * This helper prepares the miptree for access that knows nothing about any - * sort of compression whatsoever. This is useful when mapping the surface or - * using it with the blitter. - */ -static inline void -brw_miptree_access_raw(struct brw_context *brw, - struct brw_mipmap_tree *mt, - uint32_t level, uint32_t layer, - bool write) -{ - brw_miptree_prepare_access(brw, mt, level, 1, layer, 1, - ISL_AUX_USAGE_NONE, false); - if (write) - brw_miptree_finish_write(brw, mt, level, layer, 1, ISL_AUX_USAGE_NONE); -} - -enum isl_aux_usage -brw_miptree_texture_aux_usage(struct brw_context *brw, - struct brw_mipmap_tree *mt, - enum isl_format view_format, - enum gfx9_astc5x5_wa_tex_type astc5x5_wa_bits); -void -brw_miptree_prepare_texture(struct brw_context *brw, - struct brw_mipmap_tree *mt, - enum isl_format view_format, - uint32_t start_level, uint32_t num_levels, - uint32_t start_layer, uint32_t num_layers, - enum gfx9_astc5x5_wa_tex_type astc5x5_wa_bits); -void -brw_miptree_prepare_image(struct brw_context *brw, - struct brw_mipmap_tree *mt); - -enum isl_aux_usage -brw_miptree_render_aux_usage(struct brw_context *brw, - struct brw_mipmap_tree *mt, - enum isl_format render_format, - bool blend_enabled, - bool draw_aux_disabled); -void -brw_miptree_prepare_render(struct brw_context *brw, - struct brw_mipmap_tree *mt, uint32_t level, - uint32_t start_layer, uint32_t layer_count, - enum isl_aux_usage aux_usage); -void -brw_miptree_finish_render(struct brw_context *brw, - struct brw_mipmap_tree *mt, uint32_t level, - uint32_t start_layer, uint32_t layer_count, - enum isl_aux_usage aux_usage); -void -brw_miptree_prepare_depth(struct brw_context *brw, - struct brw_mipmap_tree *mt, uint32_t level, - uint32_t start_layer, uint32_t layer_count); -void -brw_miptree_finish_depth(struct brw_context *brw, - struct brw_mipmap_tree *mt, uint32_t level, - uint32_t start_layer, uint32_t layer_count, - bool depth_written); -void -brw_miptree_prepare_external(struct brw_context *brw, - struct brw_mipmap_tree *mt); -void -brw_miptree_finish_external(struct brw_context *brw, - struct brw_mipmap_tree *mt); - -void -brw_miptree_make_shareable(struct brw_context *brw, - struct brw_mipmap_tree *mt); - -void -brw_miptree_updownsample(struct brw_context *brw, - struct brw_mipmap_tree *src, - struct brw_mipmap_tree *dst); - -void -brw_update_r8stencil(struct brw_context *brw, - struct brw_mipmap_tree *mt); - -void -brw_miptree_map(struct brw_context *brw, - struct brw_mipmap_tree *mt, - unsigned int level, - unsigned int slice, - unsigned int x, - unsigned int y, - unsigned int w, - unsigned int h, - GLbitfield mode, - void **out_ptr, - ptrdiff_t *out_stride); - -void -brw_miptree_unmap(struct brw_context *brw, - struct brw_mipmap_tree *mt, - unsigned int level, - unsigned int slice); - -bool -brw_miptree_sample_with_hiz(struct brw_context *brw, - struct brw_mipmap_tree *mt); - -bool -brw_miptree_set_clear_color(struct brw_context *brw, - struct brw_mipmap_tree *mt, - union isl_color_value clear_color); - -/* Get a clear color suitable for filling out an ISL surface state. */ -union isl_color_value -brw_miptree_get_clear_color(const struct brw_mipmap_tree *mt, - struct brw_bo **clear_color_bo, - uint64_t *clear_color_offset); - - -static inline int -brw_miptree_blt_pitch(struct brw_mipmap_tree *mt) -{ - int pitch = mt->surf.row_pitch_B; - if (mt->surf.tiling != ISL_TILING_LINEAR) - pitch /= 4; - return pitch; -} - -isl_memcpy_type -brw_miptree_get_memcpy_type(mesa_format tiledFormat, GLenum format, GLenum type, - uint32_t *cpp); - -static inline bool -brw_miptree_needs_fake_etc(struct brw_context *brw, - struct brw_mipmap_tree *mt) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - bool is_etc = _mesa_is_format_etc2(mt->format) || - (mt->format == MESA_FORMAT_ETC1_RGB8); - - return devinfo->ver < 8 && devinfo->platform != INTEL_PLATFORM_BYT && is_etc; -} - -static inline bool -brw_miptree_has_etc_shadow(struct brw_context *brw, - struct brw_mipmap_tree *mt) -{ - return brw_miptree_needs_fake_etc(brw, mt) && mt->shadow_mt; -} - -void -brw_miptree_update_etc_shadow_levels(struct brw_context *brw, - struct brw_mipmap_tree *mt); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c deleted file mode 100644 index 00aa82f..0000000 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ /dev/null @@ -1,728 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - - - -#include "brw_batch.h" -#include "brw_fbo.h" -#include "brw_mipmap_tree.h" - -#include "brw_context.h" -#include "brw_state.h" -#include "brw_defines.h" -#include "compiler/brw_eu_defines.h" - -#include "main/framebuffer.h" -#include "main/fbobject.h" -#include "main/format_utils.h" -#include "main/glformats.h" - -/** - * Upload pointers to the per-stage state. - * - * The state pointers in this packet are all relative to the general state - * base address set by CMD_STATE_BASE_ADDRESS, which is 0. - */ -static void -upload_pipelined_state_pointers(struct brw_context *brw) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - if (devinfo->ver == 5) { - /* Need to flush before changing clip max threads for errata. */ - BEGIN_BATCH(1); - OUT_BATCH(MI_FLUSH); - ADVANCE_BATCH(); - } - - BEGIN_BATCH(7); - OUT_BATCH(_3DSTATE_PIPELINED_POINTERS << 16 | (7 - 2)); - OUT_RELOC(brw->batch.state.bo, 0, brw->vs.base.state_offset); - if (brw->ff_gs.prog_active) - OUT_RELOC(brw->batch.state.bo, 0, brw->ff_gs.state_offset | 1); - else - OUT_BATCH(0); - OUT_RELOC(brw->batch.state.bo, 0, brw->clip.state_offset | 1); - OUT_RELOC(brw->batch.state.bo, 0, brw->sf.state_offset); - OUT_RELOC(brw->batch.state.bo, 0, brw->wm.base.state_offset); - OUT_RELOC(brw->batch.state.bo, 0, brw->cc.state_offset); - ADVANCE_BATCH(); - - brw->ctx.NewDriverState |= BRW_NEW_PSP; -} - -static void -upload_psp_urb_cbs(struct brw_context *brw) -{ - upload_pipelined_state_pointers(brw); - brw_upload_urb_fence(brw); - brw_upload_cs_urb_state(brw); -} - -const struct brw_tracked_state brw_psp_urb_cbs = { - .dirty = { - .mesa = 0, - .brw = BRW_NEW_BATCH | - BRW_NEW_BLORP | - BRW_NEW_FF_GS_PROG_DATA | - BRW_NEW_GFX4_UNIT_STATE | - BRW_NEW_STATE_BASE_ADDRESS | - BRW_NEW_URB_FENCE, - }, - .emit = upload_psp_urb_cbs, -}; - -uint32_t -brw_depthbuffer_format(struct brw_context *brw) -{ - struct gl_context *ctx = &brw->ctx; - struct gl_framebuffer *fb = ctx->DrawBuffer; - struct brw_renderbuffer *drb = brw_get_renderbuffer(fb, BUFFER_DEPTH); - struct brw_renderbuffer *srb; - - if (!drb && - (srb = brw_get_renderbuffer(fb, BUFFER_STENCIL)) && - !srb->mt->stencil_mt && - (brw_rb_format(srb) == MESA_FORMAT_Z24_UNORM_S8_UINT || - brw_rb_format(srb) == MESA_FORMAT_Z32_FLOAT_S8X24_UINT)) { - drb = srb; - } - - if (!drb) - return BRW_DEPTHFORMAT_D32_FLOAT; - - return brw_depth_format(brw, drb->mt->format); -} - -static struct brw_mipmap_tree * -get_stencil_miptree(struct brw_renderbuffer *irb) -{ - if (!irb) - return NULL; - if (irb->mt->stencil_mt) - return irb->mt->stencil_mt; - return brw_renderbuffer_get_mt(irb); -} - -static bool -rebase_depth_stencil(struct brw_context *brw, struct brw_renderbuffer *irb, - bool invalidate) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - struct gl_context *ctx = &brw->ctx; - uint32_t tile_mask_x = 0, tile_mask_y = 0; - - isl_get_tile_masks(irb->mt->surf.tiling, irb->mt->cpp, - &tile_mask_x, &tile_mask_y); - assert(!brw_miptree_level_has_hiz(irb->mt, irb->mt_level)); - - uint32_t tile_x = irb->draw_x & tile_mask_x; - uint32_t tile_y = irb->draw_y & tile_mask_y; - - /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327 - * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth - * Coordinate Offset X/Y": - * - * "The 3 LSBs of both offsets must be zero to ensure correct - * alignment" - */ - bool rebase = tile_x & 7 || tile_y & 7; - - /* We didn't even have intra-tile offsets before g45. */ - rebase |= (!devinfo->has_surface_tile_offset && (tile_x || tile_y)); - - if (rebase) { - perf_debug("HW workaround: blitting depth level %d to a temporary " - "to fix alignment (depth tile offset %d,%d)\n", - irb->mt_level, tile_x, tile_y); - brw_renderbuffer_move_to_temp(brw, irb, invalidate); - - /* There is now only single slice miptree. */ - brw->depthstencil.tile_x = 0; - brw->depthstencil.tile_y = 0; - brw->depthstencil.depth_offset = 0; - return true; - } - - /* While we just tried to get everything aligned, we may have failed to do - * so in the case of rendering to array or 3D textures, where nonzero faces - * will still have an offset post-rebase. At least give an informative - * warning. - */ - WARN_ONCE((tile_x & 7) || (tile_y & 7), - "Depth/stencil buffer needs alignment to 8-pixel boundaries.\n" - "Truncating offset (%u:%u), bad rendering may occur.\n", - tile_x, tile_y); - tile_x &= ~7; - tile_y &= ~7; - - brw->depthstencil.tile_x = tile_x; - brw->depthstencil.tile_y = tile_y; - brw->depthstencil.depth_offset = brw_miptree_get_aligned_offset( - irb->mt, - irb->draw_x & ~tile_mask_x, - irb->draw_y & ~tile_mask_y); - - return false; -} - -void -brw_workaround_depthstencil_alignment(struct brw_context *brw, - GLbitfield clear_mask) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - struct gl_context *ctx = &brw->ctx; - struct gl_framebuffer *fb = ctx->DrawBuffer; - struct brw_renderbuffer *depth_irb = brw_get_renderbuffer(fb, BUFFER_DEPTH); - struct brw_renderbuffer *stencil_irb = brw_get_renderbuffer(fb, BUFFER_STENCIL); - struct brw_mipmap_tree *depth_mt = NULL; - bool invalidate_depth = clear_mask & BUFFER_BIT_DEPTH; - bool invalidate_stencil = clear_mask & BUFFER_BIT_STENCIL; - - if (depth_irb) - depth_mt = depth_irb->mt; - - /* Initialize brw->depthstencil to 'nop' workaround state. - */ - brw->depthstencil.tile_x = 0; - brw->depthstencil.tile_y = 0; - brw->depthstencil.depth_offset = 0; - - /* Gfx6+ doesn't require the workarounds, since we always program the - * surface state at the start of the whole surface. - */ - if (devinfo->ver >= 6) - return; - - /* Check if depth buffer is in depth/stencil format. If so, then it's only - * safe to invalidate it if we're also clearing stencil. - */ - if (depth_irb && invalidate_depth && - _mesa_get_format_base_format(depth_mt->format) == GL_DEPTH_STENCIL) - invalidate_depth = invalidate_stencil && stencil_irb; - - if (depth_irb) { - if (rebase_depth_stencil(brw, depth_irb, invalidate_depth)) { - /* In the case of stencil_irb being the same packed depth/stencil - * texture but not the same rb, make it point at our rebased mt, too. - */ - if (stencil_irb && - stencil_irb != depth_irb && - stencil_irb->mt == depth_mt) { - brw_miptree_reference(&stencil_irb->mt, depth_irb->mt); - brw_renderbuffer_set_draw_offset(stencil_irb); - } - } - - if (stencil_irb) { - assert(stencil_irb->mt == depth_irb->mt); - assert(stencil_irb->mt_level == depth_irb->mt_level); - assert(stencil_irb->mt_layer == depth_irb->mt_layer); - } - } - - /* If there is no depth attachment, consider if stencil needs rebase. */ - if (!depth_irb && stencil_irb) - rebase_depth_stencil(brw, stencil_irb, invalidate_stencil); -} - -static void -brw_emit_depth_stencil_hiz(struct brw_context *brw, - struct brw_renderbuffer *depth_irb, - struct brw_mipmap_tree *depth_mt, - struct brw_renderbuffer *stencil_irb, - struct brw_mipmap_tree *stencil_mt) -{ - uint32_t tile_x = brw->depthstencil.tile_x; - uint32_t tile_y = brw->depthstencil.tile_y; - uint32_t depth_surface_type = BRW_SURFACE_NULL; - uint32_t depthbuffer_format = BRW_DEPTHFORMAT_D32_FLOAT; - uint32_t depth_offset = 0; - uint32_t width = 1, height = 1; - bool tiled_surface = true; - - /* If there's a packed depth/stencil bound to stencil only, we need to - * emit the packed depth/stencil buffer packet. - */ - if (!depth_irb && stencil_irb) { - depth_irb = stencil_irb; - depth_mt = stencil_mt; - } - - if (depth_irb && depth_mt) { - depthbuffer_format = brw_depthbuffer_format(brw); - depth_surface_type = BRW_SURFACE_2D; - depth_offset = brw->depthstencil.depth_offset; - width = depth_irb->Base.Base.Width; - height = depth_irb->Base.Base.Height; - tiled_surface = depth_mt->surf.tiling != ISL_TILING_LINEAR; - } - - const struct intel_device_info *devinfo = &brw->screen->devinfo; - const unsigned len = (devinfo->verx10 == 45 || devinfo->ver == 5) ? 6 : 5; - - BEGIN_BATCH(len); - OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2)); - OUT_BATCH((depth_mt ? depth_mt->surf.row_pitch_B - 1 : 0) | - (depthbuffer_format << 18) | - (BRW_TILEWALK_YMAJOR << 26) | - (tiled_surface << 27) | - (depth_surface_type << 29)); - - if (depth_mt) { - OUT_RELOC(depth_mt->bo, RELOC_WRITE, depth_offset); - } else { - OUT_BATCH(0); - } - - OUT_BATCH(((width + tile_x - 1) << 6) | - ((height + tile_y - 1) << 19)); - OUT_BATCH(0); - - if (devinfo->verx10 >= 45) - OUT_BATCH(tile_x | (tile_y << 16)); - else - assert(tile_x == 0 && tile_y == 0); - - if (devinfo->ver >= 6) - OUT_BATCH(0); - - ADVANCE_BATCH(); -} - -void -brw_emit_depthbuffer(struct brw_context *brw) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - struct gl_context *ctx = &brw->ctx; - struct gl_framebuffer *fb = ctx->DrawBuffer; - /* _NEW_BUFFERS */ - struct brw_renderbuffer *depth_irb = brw_get_renderbuffer(fb, BUFFER_DEPTH); - struct brw_renderbuffer *stencil_irb = brw_get_renderbuffer(fb, BUFFER_STENCIL); - struct brw_mipmap_tree *depth_mt = brw_renderbuffer_get_mt(depth_irb); - struct brw_mipmap_tree *stencil_mt = get_stencil_miptree(stencil_irb); - - if (depth_mt) - brw_cache_flush_for_depth(brw, depth_mt->bo); - if (stencil_mt) - brw_cache_flush_for_depth(brw, stencil_mt->bo); - - if (devinfo->ver < 6) { - brw_emit_depth_stencil_hiz(brw, depth_irb, depth_mt, - stencil_irb, stencil_mt); - return; - } - - /* Skip repeated NULL depth/stencil emits (think 2D rendering). */ - if (!depth_mt && !stencil_mt && brw->no_depth_or_stencil) { - assert(brw->hw_ctx); - return; - } - - brw_emit_depth_stall_flushes(brw); - - const unsigned ds_dwords = brw->isl_dev.ds.size / 4; - brw_batch_begin(brw, ds_dwords); - uint32_t *ds_map = brw->batch.map_next; - const uint32_t ds_offset = (char *)ds_map - (char *)brw->batch.batch.map; - - struct isl_view view = { - /* Some nice defaults */ - .base_level = 0, - .levels = 1, - .base_array_layer = 0, - .array_len = 1, - .swizzle = ISL_SWIZZLE_IDENTITY, - }; - - struct isl_depth_stencil_hiz_emit_info info = { - .view = &view, - .mocs = brw_mocs(&brw->isl_dev, NULL), - }; - - if (depth_mt) { - view.usage |= ISL_SURF_USAGE_DEPTH_BIT; - info.depth_surf = &depth_mt->surf; - - info.depth_address = - brw_batch_reloc(&brw->batch, - ds_offset + brw->isl_dev.ds.depth_offset, - depth_mt->bo, depth_mt->offset, RELOC_WRITE); - - info.mocs = brw_mocs(&brw->isl_dev, depth_mt->bo); - view.base_level = depth_irb->mt_level - depth_irb->mt->first_level; - view.base_array_layer = depth_irb->mt_layer; - view.array_len = MAX2(depth_irb->layer_count, 1); - view.format = depth_mt->surf.format; - - info.hiz_usage = depth_mt->aux_usage; - if (!brw_renderbuffer_has_hiz(depth_irb)) { - /* Just because a miptree has ISL_AUX_USAGE_HIZ does not mean that - * all miplevels of that miptree are guaranteed to support HiZ. See - * brw_miptree_level_enable_hiz for details. - */ - info.hiz_usage = ISL_AUX_USAGE_NONE; - } - - if (info.hiz_usage == ISL_AUX_USAGE_HIZ) { - info.hiz_surf = &depth_mt->aux_buf->surf; - - uint64_t hiz_offset = 0; - if (devinfo->ver == 6) { - /* HiZ surfaces on Sandy Bridge technically don't support - * mip-mapping. However, we can fake it by offsetting to the - * first slice of LOD0 in the HiZ surface. - */ - isl_surf_get_image_offset_B_tile_sa(&depth_mt->aux_buf->surf, - view.base_level, 0, 0, - &hiz_offset, NULL, NULL); - } - - info.hiz_address = - brw_batch_reloc(&brw->batch, - ds_offset + brw->isl_dev.ds.hiz_offset, - depth_mt->aux_buf->bo, - depth_mt->aux_buf->offset + hiz_offset, - RELOC_WRITE); - } - - info.depth_clear_value = depth_mt->fast_clear_color.f32[0]; - } - - if (stencil_mt) { - view.usage |= ISL_SURF_USAGE_STENCIL_BIT; - info.stencil_surf = &stencil_mt->surf; - - if (!depth_mt) { - info.mocs = brw_mocs(&brw->isl_dev, stencil_mt->bo); - view.base_level = stencil_irb->mt_level - stencil_irb->mt->first_level; - view.base_array_layer = stencil_irb->mt_layer; - view.array_len = MAX2(stencil_irb->layer_count, 1); - view.format = stencil_mt->surf.format; - } - - uint64_t stencil_offset = 0; - if (devinfo->ver == 6) { - /* Stencil surfaces on Sandy Bridge technically don't support - * mip-mapping. However, we can fake it by offsetting to the - * first slice of LOD0 in the stencil surface. - */ - isl_surf_get_image_offset_B_tile_sa(&stencil_mt->surf, - view.base_level, 0, 0, - &stencil_offset, NULL, NULL); - } - - info.stencil_address = - brw_batch_reloc(&brw->batch, - ds_offset + brw->isl_dev.ds.stencil_offset, - stencil_mt->bo, - stencil_mt->offset + stencil_offset, - RELOC_WRITE); - } - - isl_emit_depth_stencil_hiz_s(&brw->isl_dev, ds_map, &info); - - brw->batch.map_next += ds_dwords; - brw_batch_advance(brw); - - brw->no_depth_or_stencil = !depth_mt && !stencil_mt; -} - -const struct brw_tracked_state brw_depthbuffer = { - .dirty = { - .mesa = _NEW_BUFFERS, - .brw = BRW_NEW_AUX_STATE | - BRW_NEW_BATCH | - BRW_NEW_BLORP, - }, - .emit = brw_emit_depthbuffer, -}; - -void -brw_emit_select_pipeline(struct brw_context *brw, enum brw_pipeline pipeline) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - const bool is_965 = devinfo->verx10 == 40; - const uint32_t _3DSTATE_PIPELINE_SELECT = - is_965 ? CMD_PIPELINE_SELECT_965 : CMD_PIPELINE_SELECT_GM45; - - if (devinfo->ver >= 8 && devinfo->ver < 10) { - /* From the Broadwell PRM, Volume 2a: Instructions, PIPELINE_SELECT: - * - * Software must clear the COLOR_CALC_STATE Valid field in - * 3DSTATE_CC_STATE_POINTERS command prior to send a PIPELINE_SELECT - * with Pipeline Select set to GPGPU. - * - * The internal hardware docs recommend the same workaround for Gfx9 - * hardware too. - */ - if (pipeline == BRW_COMPUTE_PIPELINE) { - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (2 - 2)); - OUT_BATCH(0); - ADVANCE_BATCH(); - - brw->ctx.NewDriverState |= BRW_NEW_CC_STATE; - } - } - - if (devinfo->ver == 9 && pipeline == BRW_RENDER_PIPELINE) { - /* We seem to have issues with geometry flickering when 3D and compute - * are combined in the same batch and this appears to fix it. - */ - const uint32_t maxNumberofThreads = - devinfo->max_cs_threads * devinfo->subslice_total - 1; - - BEGIN_BATCH(9); - OUT_BATCH(MEDIA_VFE_STATE << 16 | (9 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(2 << 8 | maxNumberofThreads << 16); - OUT_BATCH(0); - OUT_BATCH(2 << 16); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - } - - if (devinfo->ver >= 6) { - /* From "BXML » GT » MI » vol1a GPU Overview » [Instruction] - * PIPELINE_SELECT [DevBWR+]": - * - * Project: DEVSNB+ - * - * Software must ensure all the write caches are flushed through a - * stalling PIPE_CONTROL command followed by another PIPE_CONTROL - * command to invalidate read only caches prior to programming - * MI_PIPELINE_SELECT command to change the Pipeline Select Mode. - */ - const unsigned dc_flush = - devinfo->ver >= 7 ? PIPE_CONTROL_DATA_CACHE_FLUSH : 0; - - brw_emit_pipe_control_flush(brw, - PIPE_CONTROL_RENDER_TARGET_FLUSH | - PIPE_CONTROL_DEPTH_CACHE_FLUSH | - dc_flush | - PIPE_CONTROL_CS_STALL); - - brw_emit_pipe_control_flush(brw, - PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | - PIPE_CONTROL_CONST_CACHE_INVALIDATE | - PIPE_CONTROL_STATE_CACHE_INVALIDATE | - PIPE_CONTROL_INSTRUCTION_INVALIDATE); - - } else { - /* From "BXML » GT » MI » vol1a GPU Overview » [Instruction] - * PIPELINE_SELECT [DevBWR+]": - * - * Project: PRE-DEVSNB - * - * Software must ensure the current pipeline is flushed via an - * MI_FLUSH or PIPE_CONTROL prior to the execution of PIPELINE_SELECT. - */ - BEGIN_BATCH(1); - OUT_BATCH(MI_FLUSH); - ADVANCE_BATCH(); - } - - /* Select the pipeline */ - BEGIN_BATCH(1); - OUT_BATCH(_3DSTATE_PIPELINE_SELECT << 16 | - (devinfo->ver >= 9 ? (3 << 8) : 0) | - (pipeline == BRW_COMPUTE_PIPELINE ? 2 : 0)); - ADVANCE_BATCH(); - - if (devinfo->verx10 == 70 && - pipeline == BRW_RENDER_PIPELINE) { - /* From "BXML » GT » MI » vol1a GPU Overview » [Instruction] - * PIPELINE_SELECT [DevBWR+]": - * - * Project: DEVIVB, DEVHSW:GT3:A0 - * - * Software must send a pipe_control with a CS stall and a post sync - * operation and then a dummy DRAW after every MI_SET_CONTEXT and - * after any PIPELINE_SELECT that is enabling 3D mode. - */ - gfx7_emit_cs_stall_flush(brw); - - BEGIN_BATCH(7); - OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2)); - OUT_BATCH(_3DPRIM_POINTLIST); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - } - - if (devinfo->platform == INTEL_PLATFORM_GLK) { - /* Project: DevGLK - * - * "This chicken bit works around a hardware issue with barrier logic - * encountered when switching between GPGPU and 3D pipelines. To - * workaround the issue, this mode bit should be set after a pipeline - * is selected." - */ - const unsigned barrier_mode = - pipeline == BRW_RENDER_PIPELINE ? GLK_SCEC_BARRIER_MODE_3D_HULL - : GLK_SCEC_BARRIER_MODE_GPGPU; - brw_load_register_imm32(brw, SLICE_COMMON_ECO_CHICKEN1, - barrier_mode | GLK_SCEC_BARRIER_MODE_MASK); - } -} - -/** - * Update the pixel hashing modes that determine the balancing of PS threads - * across subslices and slices. - * - * \param width Width bound of the rendering area (already scaled down if \p - * scale is greater than 1). - * \param height Height bound of the rendering area (already scaled down if \p - * scale is greater than 1). - * \param scale The number of framebuffer samples that could potentially be - * affected by an individual channel of the PS thread. This is - * typically one for single-sampled rendering, but for operations - * like CCS resolves and fast clears a single PS invocation may - * update a huge number of pixels, in which case a finer - * balancing is desirable in order to maximally utilize the - * bandwidth available. UINT_MAX can be used as shorthand for - * "finest hashing mode available". - */ -void -brw_emit_hashing_mode(struct brw_context *brw, unsigned width, - unsigned height, unsigned scale) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - if (devinfo->ver == 9) { - const uint32_t slice_hashing[] = { - /* Because all Gfx9 platforms with more than one slice require - * three-way subslice hashing, a single "normal" 16x16 slice hashing - * block is guaranteed to suffer from substantial imbalance, with one - * subslice receiving twice as much work as the other two in the - * slice. - * - * The performance impact of that would be particularly severe when - * three-way hashing is also in use for slice balancing (which is the - * case for all Gfx9 GT4 platforms), because one of the slices - * receives one every three 16x16 blocks in either direction, which - * is roughly the periodicity of the underlying subslice imbalance - * pattern ("roughly" because in reality the hardware's - * implementation of three-way hashing doesn't do exact modulo 3 - * arithmetic, which somewhat decreases the magnitude of this effect - * in practice). This leads to a systematic subslice imbalance - * within that slice regardless of the size of the primitive. The - * 32x32 hashing mode guarantees that the subslice imbalance within a - * single slice hashing block is minimal, largely eliminating this - * effect. - */ - GFX9_SLICE_HASHING_32x32, - /* Finest slice hashing mode available. */ - GFX9_SLICE_HASHING_NORMAL - }; - const uint32_t subslice_hashing[] = { - /* The 16x16 subslice hashing mode is used on non-LLC platforms to - * match the performance of previous Mesa versions. 16x16 has a - * slight cache locality benefit especially visible in the sampler L1 - * cache efficiency of low-bandwidth platforms, but it comes at the - * cost of greater subslice imbalance for primitives of dimensions - * approximately intermediate between 16x4 and 16x16. - */ - (devinfo->has_llc ? GFX9_SUBSLICE_HASHING_16x4 : - GFX9_SUBSLICE_HASHING_16x16), - /* Finest subslice hashing mode available. */ - GFX9_SUBSLICE_HASHING_8x4 - }; - /* Dimensions of the smallest hashing block of a given hashing mode. If - * the rendering area is smaller than this there can't possibly be any - * benefit from switching to this mode, so we optimize out the - * transition. - */ - const unsigned min_size[][2] = { - { 16, 4 }, - { 8, 4 } - }; - const unsigned idx = scale > 1; - - if (width > min_size[idx][0] || height > min_size[idx][1]) { - const uint32_t gt_mode = - (devinfo->num_slices == 1 ? 0 : - GFX9_SLICE_HASHING_MASK_BITS | slice_hashing[idx]) | - GFX9_SUBSLICE_HASHING_MASK_BITS | subslice_hashing[idx]; - - brw_emit_pipe_control_flush(brw, - PIPE_CONTROL_STALL_AT_SCOREBOARD | - PIPE_CONTROL_CS_STALL); - - brw_load_register_imm32(brw, GFX7_GT_MODE, gt_mode); - - brw->current_hash_scale = scale; - } - } -} - -/** - * Misc invariant state packets - */ -void -brw_upload_invariant_state(struct brw_context *brw) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - const bool is_965 = devinfo->verx10 == 40; - - brw_emit_select_pipeline(brw, BRW_RENDER_PIPELINE); - brw->last_pipeline = BRW_RENDER_PIPELINE; - - if (devinfo->ver >= 8) { - BEGIN_BATCH(3); - OUT_BATCH(CMD_STATE_SIP << 16 | (3 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - } else { - BEGIN_BATCH(2); - OUT_BATCH(CMD_STATE_SIP << 16 | (2 - 2)); - OUT_BATCH(0); - ADVANCE_BATCH(); - } - - /* Original Gfx4 doesn't have 3DSTATE_AA_LINE_PARAMETERS. */ - if (!is_965) { - BEGIN_BATCH(3); - OUT_BATCH(_3DSTATE_AA_LINE_PARAMETERS << 16 | (3 - 2)); - /* use legacy aa line coverage computation */ - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - } -} diff --git a/src/mesa/drivers/dri/i965/brw_multisample_state.h b/src/mesa/drivers/dri/i965/brw_multisample_state.h deleted file mode 100644 index 2142a17..0000000 --- a/src/mesa/drivers/dri/i965/brw_multisample_state.h +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Copyright © 2013 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#ifndef BRW_MULTISAMPLE_STATE_H -#define BRW_MULTISAMPLE_STATE_H - -#include - -/** - * Note: There are no standard multisample positions defined in OpenGL - * specifications. Implementations have the freedom to pick the positions - * which give plausible results. But the Vulkan specification does define - * standard sample positions. So, we decided to pick the same pattern in - * OpenGL as in Vulkan to keep it uniform across drivers and also to avoid - * breaking applications which rely on this standard pattern. - */ - -/** - * 1x MSAA has a single sample at the center: (0.5, 0.5) -> (0x8, 0x8). - * - * 2x MSAA sample positions are (0.75, 0.75) and (0.25, 0.25): - * 4 c - * 4 1 - * c 0 - */ -static const uint32_t -brw_multisample_positions_1x_2x = 0x008844cc; - -/** - * Sample positions: - * 2 6 a e - * 2 0 - * 6 1 - * a 2 - * e 3 - */ -static const uint32_t -brw_multisample_positions_4x = 0xae2ae662; - -/** - * Sample positions: - * - * From the Ivy Bridge PRM, Vol2 Part1 p304 (3DSTATE_MULTISAMPLE: - * Programming Notes): - * "When programming the sample offsets (for NUMSAMPLES_4 or _8 and - * MSRASTMODE_xxx_PATTERN), the order of the samples 0 to 3 (or 7 - * for 8X) must have monotonically increasing distance from the - * pixel center. This is required to get the correct centroid - * computation in the device." - * - * Sample positions: - * 1 3 5 7 9 b d f - * 1 7 - * 3 3 - * 5 0 - * 7 5 - * 9 2 - * b 1 - * d 4 - * f 6 - */ -static const uint32_t -brw_multisample_positions_8x[] = { 0x53d97b95, 0xf1bf173d }; - -/** - * Sample positions: - * - * 0 1 2 3 4 5 6 7 8 9 a b c d e f - * 0 15 - * 1 9 - * 2 10 - * 3 7 - * 4 13 - * 5 1 - * 6 4 - * 7 3 - * 8 12 - * 9 0 - * a 2 - * b 6 - * c 11 - * d 5 - * e 8 - * f 14 - */ -static const uint32_t -brw_multisample_positions_16x[] = { - 0xc75a7599, 0xb3dbad36, 0x2c42816e, 0x10eff408 -}; - -#endif /* BRW_MULTISAMPLE_STATE_H */ diff --git a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp deleted file mode 100644 index 8ef67ff..0000000 --- a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp +++ /dev/null @@ -1,450 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "compiler/brw_nir.h" -#include "compiler/glsl/ir_uniform.h" -#include "compiler/nir/nir_builder.h" -#include "brw_program.h" - -static void -brw_nir_setup_glsl_builtin_uniform(nir_variable *var, - const struct gl_program *prog, - struct brw_stage_prog_data *stage_prog_data, - bool is_scalar) -{ - const nir_state_slot *const slots = var->state_slots; - assert(var->state_slots != NULL); - - unsigned uniform_index = var->data.driver_location / 4; - for (unsigned int i = 0; i < var->num_state_slots; i++) { - /* This state reference has already been setup by ir_to_mesa, but we'll - * get the same index back here. - */ - int index = _mesa_add_state_reference(prog->Parameters, - slots[i].tokens); - - /* Add each of the unique swizzles of the element as a parameter. - * This'll end up matching the expected layout of the - * array/matrix/structure we're trying to fill in. - */ - int last_swiz = -1; - for (unsigned j = 0; j < 4; j++) { - int swiz = GET_SWZ(slots[i].swizzle, j); - - /* If we hit a pair of identical swizzles, this means we've hit the - * end of the builtin variable. In scalar mode, we should just quit - * and move on to the next one. In vec4, we need to continue and pad - * it out to 4 components. - */ - if (swiz == last_swiz && is_scalar) - break; - - last_swiz = swiz; - - stage_prog_data->param[uniform_index++] = - BRW_PARAM_PARAMETER(index, swiz); - } - } -} - -static void -setup_vec4_image_param(uint32_t *params, uint32_t idx, - unsigned offset, unsigned n) -{ - assert(offset % sizeof(uint32_t) == 0); - for (unsigned i = 0; i < n; ++i) - params[i] = BRW_PARAM_IMAGE(idx, offset / sizeof(uint32_t) + i); - - for (unsigned i = n; i < 4; ++i) - params[i] = BRW_PARAM_BUILTIN_ZERO; -} - -static void -brw_setup_image_uniform_values(nir_variable *var, - struct brw_stage_prog_data *prog_data) -{ - unsigned param_start_index = var->data.driver_location / 4; - uint32_t *param = &prog_data->param[param_start_index]; - unsigned num_images = MAX2(1, var->type->arrays_of_arrays_size()); - - for (unsigned i = 0; i < num_images; i++) { - const unsigned image_idx = var->data.binding + i; - - /* Upload the brw_image_param structure. The order is expected to match - * the BRW_IMAGE_PARAM_*_OFFSET defines. - */ - setup_vec4_image_param(param + BRW_IMAGE_PARAM_OFFSET_OFFSET, - image_idx, - offsetof(brw_image_param, offset), 2); - setup_vec4_image_param(param + BRW_IMAGE_PARAM_SIZE_OFFSET, - image_idx, - offsetof(brw_image_param, size), 3); - setup_vec4_image_param(param + BRW_IMAGE_PARAM_STRIDE_OFFSET, - image_idx, - offsetof(brw_image_param, stride), 4); - setup_vec4_image_param(param + BRW_IMAGE_PARAM_TILING_OFFSET, - image_idx, - offsetof(brw_image_param, tiling), 3); - setup_vec4_image_param(param + BRW_IMAGE_PARAM_SWIZZLING_OFFSET, - image_idx, - offsetof(brw_image_param, swizzling), 2); - param += BRW_IMAGE_PARAM_SIZE; - } -} - -static unsigned -count_uniform_storage_slots(const struct glsl_type *type) -{ - /* gl_uniform_storage can cope with one level of array, so if the - * type is a composite type or an array where each element occupies - * more than one slot than we need to recursively process it. - */ - if (glsl_type_is_struct_or_ifc(type)) { - unsigned location_count = 0; - - for (unsigned i = 0; i < glsl_get_length(type); i++) { - const struct glsl_type *field_type = glsl_get_struct_field(type, i); - - location_count += count_uniform_storage_slots(field_type); - } - - return location_count; - } - - if (glsl_type_is_array(type)) { - const struct glsl_type *element_type = glsl_get_array_element(type); - - if (glsl_type_is_array(element_type) || - glsl_type_is_struct_or_ifc(element_type)) { - unsigned element_count = count_uniform_storage_slots(element_type); - return element_count * glsl_get_length(type); - } - } - - return 1; -} - -static void -brw_nir_setup_glsl_uniform(gl_shader_stage stage, nir_variable *var, - const struct gl_program *prog, - struct brw_stage_prog_data *stage_prog_data, - bool is_scalar) -{ - if (var->type->without_array()->is_sampler() || - var->type->without_array()->is_image()) - return; - - /* The data for our (non-builtin) uniforms is stored in a series of - * gl_uniform_storage structs for each subcomponent that - * glGetUniformLocation() could name. We know it's been set up in the same - * order we'd walk the type, so walk the list of storage that matches the - * range of slots covered by this variable. - */ - unsigned uniform_index = var->data.driver_location / 4; - unsigned num_slots = count_uniform_storage_slots(var->type); - for (unsigned u = 0; u < num_slots; u++) { - struct gl_uniform_storage *storage = - &prog->sh.data->UniformStorage[var->data.location + u]; - - /* We already handled samplers and images via the separate top-level - * variables created by gl_nir_lower_samplers_as_deref(), but they're - * still part of the structure's storage, and so we'll see them while - * walking it to set up the other regular fields. Just skip over them. - */ - if (storage->builtin || - storage->type->is_sampler() || - storage->type->is_image()) - continue; - - gl_constant_value *components = storage->storage; - unsigned vector_count = (MAX2(storage->array_elements, 1) * - storage->type->matrix_columns); - unsigned vector_size = storage->type->vector_elements; - unsigned max_vector_size = 4; - if (storage->type->base_type == GLSL_TYPE_DOUBLE || - storage->type->base_type == GLSL_TYPE_UINT64 || - storage->type->base_type == GLSL_TYPE_INT64) { - vector_size *= 2; - if (vector_size > 4) - max_vector_size = 8; - } - - for (unsigned s = 0; s < vector_count; s++) { - unsigned i; - for (i = 0; i < vector_size; i++) { - uint32_t idx = components - prog->sh.data->UniformDataSlots; - stage_prog_data->param[uniform_index++] = BRW_PARAM_UNIFORM(idx); - components++; - } - - if (!is_scalar) { - /* Pad out with zeros if needed (only needed for vec4) */ - for (; i < max_vector_size; i++) { - stage_prog_data->param[uniform_index++] = - BRW_PARAM_BUILTIN_ZERO; - } - } - } - } -} - -void -brw_nir_setup_glsl_uniforms(void *mem_ctx, nir_shader *shader, - const struct gl_program *prog, - struct brw_stage_prog_data *stage_prog_data, - bool is_scalar) -{ - unsigned nr_params = shader->num_uniforms / 4; - stage_prog_data->nr_params = nr_params; - stage_prog_data->param = rzalloc_array(mem_ctx, uint32_t, nr_params); - - nir_foreach_uniform_variable(var, shader) { - /* UBO's, atomics and samplers don't take up space in the - uniform file */ - if (var->interface_type != NULL || var->type->contains_atomic()) - continue; - - if (var->num_state_slots > 0) { - brw_nir_setup_glsl_builtin_uniform(var, prog, stage_prog_data, - is_scalar); - } else { - brw_nir_setup_glsl_uniform(shader->info.stage, var, prog, - stage_prog_data, is_scalar); - } - } - - nir_foreach_image_variable(var, shader) - brw_setup_image_uniform_values(var, stage_prog_data); -} - -void -brw_nir_setup_arb_uniforms(void *mem_ctx, nir_shader *shader, - struct gl_program *prog, - struct brw_stage_prog_data *stage_prog_data) -{ - struct gl_program_parameter_list *plist = prog->Parameters; - - unsigned nr_params = plist->NumParameters * 4; - stage_prog_data->nr_params = nr_params; - stage_prog_data->param = rzalloc_array(mem_ctx, uint32_t, nr_params); - - /* For ARB programs, prog_to_nir generates a single "parameters" variable - * for all uniform data. There may be additional sampler variables, and - * an extra uniform from nir_lower_wpos_ytransform. - */ - - for (unsigned p = 0; p < plist->NumParameters; p++) { - /* Parameters should be either vec4 uniforms or single component - * constants; matrices and other larger types should have been broken - * down earlier. - */ - assert(plist->Parameters[p].Size <= 4); - - unsigned i; - for (i = 0; i < plist->Parameters[p].Size; i++) - stage_prog_data->param[4 * p + i] = BRW_PARAM_PARAMETER(p, i); - for (; i < 4; i++) - stage_prog_data->param[4 * p + i] = BRW_PARAM_BUILTIN_ZERO; - } -} - -static nir_ssa_def * -get_aoa_deref_offset(nir_builder *b, - nir_deref_instr *deref, - unsigned elem_size) -{ - unsigned array_size = elem_size; - nir_ssa_def *offset = nir_imm_int(b, 0); - - while (deref->deref_type != nir_deref_type_var) { - assert(deref->deref_type == nir_deref_type_array); - - /* This level's element size is the previous level's array size */ - nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1); - assert(deref->arr.index.ssa); - offset = nir_iadd(b, offset, - nir_imul(b, index, nir_imm_int(b, array_size))); - - deref = nir_deref_instr_parent(deref); - assert(glsl_type_is_array(deref->type)); - array_size *= glsl_get_length(deref->type); - } - - /* Accessing an invalid surface index with the dataport can result in a - * hang. According to the spec "if the index used to select an individual - * element is negative or greater than or equal to the size of the array, - * the results of the operation are undefined but may not lead to - * termination" -- which is one of the possible outcomes of the hang. - * Clamp the index to prevent access outside of the array bounds. - */ - return nir_umin(b, offset, nir_imm_int(b, array_size - elem_size)); -} - -void -brw_nir_lower_gl_images(nir_shader *shader, - const struct gl_program *prog) -{ - /* We put image uniforms at the end */ - nir_foreach_image_variable(var, shader) { - const unsigned num_images = MAX2(1, var->type->arrays_of_arrays_size()); - - var->data.driver_location = shader->num_uniforms; - shader->num_uniforms += num_images * BRW_IMAGE_PARAM_SIZE * 4; - } - - nir_function_impl *impl = nir_shader_get_entrypoint(shader); - - nir_builder b; - nir_builder_init(&b, impl); - - nir_foreach_block(block, impl) { - nir_foreach_instr_safe(instr, block) { - if (instr->type != nir_instr_type_intrinsic) - continue; - - nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); - switch (intrin->intrinsic) { - case nir_intrinsic_image_deref_load: - case nir_intrinsic_image_deref_store: - case nir_intrinsic_image_deref_atomic_add: - case nir_intrinsic_image_deref_atomic_imin: - case nir_intrinsic_image_deref_atomic_umin: - case nir_intrinsic_image_deref_atomic_imax: - case nir_intrinsic_image_deref_atomic_umax: - case nir_intrinsic_image_deref_atomic_and: - case nir_intrinsic_image_deref_atomic_or: - case nir_intrinsic_image_deref_atomic_xor: - case nir_intrinsic_image_deref_atomic_exchange: - case nir_intrinsic_image_deref_atomic_comp_swap: - case nir_intrinsic_image_deref_size: - case nir_intrinsic_image_deref_samples: - case nir_intrinsic_image_deref_load_raw_intel: - case nir_intrinsic_image_deref_store_raw_intel: { - nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); - nir_variable *var = nir_deref_instr_get_variable(deref); - - struct gl_uniform_storage *storage = - &prog->sh.data->UniformStorage[var->data.location]; - const unsigned image_var_idx = - storage->opaque[shader->info.stage].index; - - b.cursor = nir_before_instr(&intrin->instr); - nir_ssa_def *index = nir_iadd(&b, nir_imm_int(&b, image_var_idx), - get_aoa_deref_offset(&b, deref, 1)); - nir_rewrite_image_intrinsic(intrin, index, false); - break; - } - - case nir_intrinsic_image_deref_load_param_intel: { - nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); - nir_variable *var = nir_deref_instr_get_variable(deref); - const unsigned num_images = - MAX2(1, var->type->arrays_of_arrays_size()); - - b.cursor = nir_instr_remove(&intrin->instr); - - const unsigned param = nir_intrinsic_base(intrin); - nir_ssa_def *offset = - get_aoa_deref_offset(&b, deref, BRW_IMAGE_PARAM_SIZE * 4); - offset = nir_iadd(&b, offset, nir_imm_int(&b, param * 16)); - - nir_intrinsic_instr *load = - nir_intrinsic_instr_create(b.shader, - nir_intrinsic_load_uniform); - nir_intrinsic_set_base(load, var->data.driver_location); - nir_intrinsic_set_range(load, num_images * BRW_IMAGE_PARAM_SIZE * 4); - load->src[0] = nir_src_for_ssa(offset); - load->num_components = intrin->dest.ssa.num_components; - nir_ssa_dest_init(&load->instr, &load->dest, - intrin->dest.ssa.num_components, - intrin->dest.ssa.bit_size, NULL); - nir_builder_instr_insert(&b, &load->instr); - - nir_ssa_def_rewrite_uses(&intrin->dest.ssa, - &load->dest.ssa); - break; - } - - default: - break; - } - } - } -} - -void -brw_nir_lower_legacy_clipping(nir_shader *nir, int nr_userclip_plane_consts, - struct brw_stage_prog_data *prog_data) -{ - if (nr_userclip_plane_consts == 0) - return; - - nir_function_impl *impl = nir_shader_get_entrypoint(nir); - - nir_lower_clip_vs(nir, (1 << nr_userclip_plane_consts) - 1, true, false, - NULL); - nir_lower_io_to_temporaries(nir, impl, true, false); - nir_lower_global_vars_to_local(nir); - nir_lower_vars_to_ssa(nir); - - const unsigned clip_plane_base = nir->num_uniforms; - - assert(nir->num_uniforms == prog_data->nr_params * 4); - const unsigned num_clip_floats = 4 * nr_userclip_plane_consts; - uint32_t *clip_param = - brw_stage_prog_data_add_params(prog_data, num_clip_floats); - nir->num_uniforms += num_clip_floats * sizeof(float); - assert(nir->num_uniforms == prog_data->nr_params * 4); - - for (unsigned i = 0; i < num_clip_floats; i++) - clip_param[i] = BRW_PARAM_BUILTIN_CLIP_PLANE(i / 4, i % 4); - - nir_builder b; - nir_builder_init(&b, impl); - nir_foreach_block(block, impl) { - nir_foreach_instr_safe(instr, block) { - if (instr->type != nir_instr_type_intrinsic) - continue; - - nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); - if (intrin->intrinsic != nir_intrinsic_load_user_clip_plane) - continue; - - b.cursor = nir_before_instr(instr); - - nir_intrinsic_instr *load = - nir_intrinsic_instr_create(nir, nir_intrinsic_load_uniform); - load->num_components = 4; - load->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0)); - nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL); - nir_intrinsic_set_base(load, clip_plane_base + 4 * sizeof(float) * - nir_intrinsic_ucp_id(intrin)); - nir_intrinsic_set_range(load, 4 * sizeof(float)); - nir_builder_instr_insert(&b, &load->instr); - - nir_ssa_def_rewrite_uses(&intrin->dest.ssa, - &load->dest.ssa); - nir_instr_remove(instr); - } - } -} diff --git a/src/mesa/drivers/dri/i965/brw_object_purgeable.c b/src/mesa/drivers/dri/i965/brw_object_purgeable.c deleted file mode 100644 index 104454e..0000000 --- a/src/mesa/drivers/dri/i965/brw_object_purgeable.c +++ /dev/null @@ -1,187 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -/** - * @file brw_object_purgeable.c - * - * The driver implementation of the GL_APPLE_object_purgeable extension. - */ - -#include "main/mtypes.h" -#include "main/macros.h" -#include "main/bufferobj.h" - -#include "brw_context.h" -#include "brw_buffer_objects.h" -#include "brw_fbo.h" -#include "brw_mipmap_tree.h" - -static GLenum -brw_buffer_purgeable(struct brw_bo *buffer) -{ - int retained = 0; - - if (buffer != NULL) - retained = brw_bo_madvise(buffer, I915_MADV_DONTNEED); - - return retained ? GL_VOLATILE_APPLE : GL_RELEASED_APPLE; -} - -static GLenum -brw_buffer_object_purgeable(struct gl_context * ctx, - struct gl_buffer_object *obj, - GLenum option) -{ - struct brw_buffer_object *intel_obj = brw_buffer_object(obj); - - if (intel_obj->buffer != NULL) - return brw_buffer_purgeable(intel_obj->buffer); - - if (option == GL_RELEASED_APPLE) { - return GL_RELEASED_APPLE; - } else { - /* XXX Create the buffer and madvise(MADV_DONTNEED)? */ - return brw_buffer_purgeable(intel_obj->buffer); - } -} - -static GLenum -brw_texture_object_purgeable(struct gl_context * ctx, - struct gl_texture_object *obj, - GLenum option) -{ - struct brw_texture_object *intel; - - (void) ctx; - (void) option; - - intel = brw_texture_object(obj); - if (intel->mt == NULL || intel->mt->bo == NULL) - return GL_RELEASED_APPLE; - - return brw_buffer_purgeable(intel->mt->bo); -} - -static GLenum -brw_render_object_purgeable(struct gl_context * ctx, - struct gl_renderbuffer *obj, - GLenum option) -{ - struct brw_renderbuffer *intel; - - (void) ctx; - (void) option; - - intel = brw_renderbuffer(obj); - if (intel->mt == NULL) - return GL_RELEASED_APPLE; - - return brw_buffer_purgeable(intel->mt->bo); -} - -static int -brw_bo_unpurgeable(struct brw_bo *buffer) -{ - int retained; - - retained = 0; - if (buffer != NULL) - retained = brw_bo_madvise(buffer, I915_MADV_WILLNEED); - - return retained; -} - -static GLenum -brw_buffer_object_unpurgeable(struct gl_context * ctx, - struct gl_buffer_object *obj, - GLenum option) -{ - struct brw_buffer_object *intel = brw_buffer_object(obj); - - (void) ctx; - - if (!intel->buffer) - return GL_UNDEFINED_APPLE; - - if (option == GL_UNDEFINED_APPLE || !brw_bo_unpurgeable(intel->buffer)) { - brw_bo_unreference(intel->buffer); - intel->buffer = NULL; - return GL_UNDEFINED_APPLE; - } - - return GL_RETAINED_APPLE; -} - -static GLenum -brw_texture_object_unpurgeable(struct gl_context * ctx, - struct gl_texture_object *obj, - GLenum option) -{ - struct brw_texture_object *intel; - - (void) ctx; - - intel = brw_texture_object(obj); - if (intel->mt == NULL || intel->mt->bo == NULL) - return GL_UNDEFINED_APPLE; - - if (option == GL_UNDEFINED_APPLE || !brw_bo_unpurgeable(intel->mt->bo)) { - brw_miptree_release(&intel->mt); - return GL_UNDEFINED_APPLE; - } - - return GL_RETAINED_APPLE; -} - -static GLenum -brw_render_object_unpurgeable(struct gl_context * ctx, - struct gl_renderbuffer *obj, - GLenum option) -{ - struct brw_renderbuffer *intel; - - (void) ctx; - - intel = brw_renderbuffer(obj); - if (intel->mt == NULL) - return GL_UNDEFINED_APPLE; - - if (option == GL_UNDEFINED_APPLE || !brw_bo_unpurgeable(intel->mt->bo)) { - brw_miptree_release(&intel->mt); - return GL_UNDEFINED_APPLE; - } - - return GL_RETAINED_APPLE; -} - -void -brw_init_object_purgeable_functions(struct dd_function_table *functions) -{ - functions->BufferObjectPurgeable = brw_buffer_object_purgeable; - functions->TextureObjectPurgeable = brw_texture_object_purgeable; - functions->RenderObjectPurgeable = brw_render_object_purgeable; - - functions->BufferObjectUnpurgeable = brw_buffer_object_unpurgeable; - functions->TextureObjectUnpurgeable = brw_texture_object_unpurgeable; - functions->RenderObjectUnpurgeable = brw_render_object_unpurgeable; -} diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c b/src/mesa/drivers/dri/i965/brw_performance_query.c deleted file mode 100644 index 43bd4d6..0000000 --- a/src/mesa/drivers/dri/i965/brw_performance_query.c +++ /dev/null @@ -1,533 +0,0 @@ -/* - * Copyright © 2013 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file brw_performance_query.c - * - * Implementation of the GL_INTEL_performance_query extension. - * - * Currently there are two possible counter sources exposed here: - * - * On Gfx6+ hardware we have numerous 64bit Pipeline Statistics Registers - * that we can snapshot at the beginning and end of a query. - * - * On Gfx7.5+ we have Observability Architecture counters which are - * covered in separate document from the rest of the PRMs. It is available at: - * https://01.org/linuxgraphics/documentation/driver-documentation-prms - * => 2013 Intel Core Processor Family => Observability Performance Counters - * (This one volume covers Sandybridge, Ivybridge, Baytrail, and Haswell, - * though notably we currently only support OA counters for Haswell+) - */ - -#include - -/* put before sys/types.h to silence glibc warnings */ -#ifdef MAJOR_IN_MKDEV -#include -#endif -#ifdef MAJOR_IN_SYSMACROS -#include -#endif -#include -#include -#include -#include -#include - -#include -#include "drm-uapi/i915_drm.h" - -#include "main/hash.h" -#include "main/macros.h" -#include "main/mtypes.h" -#include "main/performance_query.h" - -#include "util/bitset.h" -#include "util/ralloc.h" -#include "util/hash_table.h" -#include "util/list.h" -#include "util/u_math.h" - -#include "brw_context.h" -#include "brw_defines.h" -#include "brw_batch.h" - -#include "perf/intel_perf.h" -#include "perf/intel_perf_regs.h" -#include "perf/intel_perf_mdapi.h" -#include "perf/intel_perf_query.h" - -#define FILE_DEBUG_FLAG DEBUG_PERFMON - -#define OAREPORT_REASON_MASK 0x3f -#define OAREPORT_REASON_SHIFT 19 -#define OAREPORT_REASON_TIMER (1<<0) -#define OAREPORT_REASON_TRIGGER1 (1<<1) -#define OAREPORT_REASON_TRIGGER2 (1<<2) -#define OAREPORT_REASON_CTX_SWITCH (1<<3) -#define OAREPORT_REASON_GO_TRANSITION (1<<4) - -struct brw_perf_query_object { - struct gl_perf_query_object base; - struct intel_perf_query_object *query; -}; - -/** Downcasting convenience macro. */ -static inline struct brw_perf_query_object * -brw_perf_query(struct gl_perf_query_object *o) -{ - return (struct brw_perf_query_object *) o; -} - -#define MI_RPC_BO_SIZE 4096 -#define MI_RPC_BO_END_OFFSET_BYTES (MI_RPC_BO_SIZE / 2) -#define MI_FREQ_START_OFFSET_BYTES (3072) -#define MI_FREQ_END_OFFSET_BYTES (3076) - -/******************************************************************************/ - -static bool -brw_is_perf_query_ready(struct gl_context *ctx, - struct gl_perf_query_object *o); - -static void -dump_perf_query_callback(void *query_void, void *brw_void) -{ - struct brw_context *ctx = brw_void; - struct intel_perf_context *perf_ctx = ctx->perf_ctx; - struct gl_perf_query_object *o = query_void; - struct brw_perf_query_object * brw_query = brw_perf_query(o); - struct intel_perf_query_object *obj = brw_query->query; - - DBG("%4d: %-6s %-8s ", - o->Id, - o->Used ? "Dirty," : "New,", - o->Active ? "Active," : (o->Ready ? "Ready," : "Pending,")); - intel_perf_dump_query(perf_ctx, obj, &ctx->batch); -} - -static void -dump_perf_queries(struct brw_context *brw) -{ - struct gl_context *ctx = &brw->ctx; - intel_perf_dump_query_count(brw->perf_ctx); - _mesa_HashWalk(ctx->PerfQuery.Objects, dump_perf_query_callback, brw); -} - -/** - * Driver hook for glGetPerfQueryInfoINTEL(). - */ -static void -brw_get_perf_query_info(struct gl_context *ctx, - unsigned query_index, - const char **name, - GLuint *data_size, - GLuint *n_counters, - GLuint *n_active) -{ - struct brw_context *brw = brw_context(ctx); - struct intel_perf_context *perf_ctx = brw->perf_ctx; - struct intel_perf_config *perf_cfg = intel_perf_config(perf_ctx); - const struct intel_perf_query_info *query = &perf_cfg->queries[query_index]; - - *name = query->name; - *data_size = query->data_size; - *n_counters = query->n_counters; - *n_active = intel_perf_active_queries(perf_ctx, query); -} - -static GLuint -intel_counter_type_enum_to_gl_type(enum intel_perf_counter_type type) -{ - switch (type) { - case INTEL_PERF_COUNTER_TYPE_EVENT: return GL_PERFQUERY_COUNTER_EVENT_INTEL; - case INTEL_PERF_COUNTER_TYPE_DURATION_NORM: return GL_PERFQUERY_COUNTER_DURATION_NORM_INTEL; - case INTEL_PERF_COUNTER_TYPE_DURATION_RAW: return GL_PERFQUERY_COUNTER_DURATION_RAW_INTEL; - case INTEL_PERF_COUNTER_TYPE_THROUGHPUT: return GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL; - case INTEL_PERF_COUNTER_TYPE_RAW: return GL_PERFQUERY_COUNTER_RAW_INTEL; - case INTEL_PERF_COUNTER_TYPE_TIMESTAMP: return GL_PERFQUERY_COUNTER_TIMESTAMP_INTEL; - default: - unreachable("Unknown counter type"); - } -} - -static GLuint -intel_counter_data_type_to_gl_type(enum intel_perf_counter_data_type type) -{ - switch (type) { - case INTEL_PERF_COUNTER_DATA_TYPE_BOOL32: return GL_PERFQUERY_COUNTER_DATA_BOOL32_INTEL; - case INTEL_PERF_COUNTER_DATA_TYPE_UINT32: return GL_PERFQUERY_COUNTER_DATA_UINT32_INTEL; - case INTEL_PERF_COUNTER_DATA_TYPE_UINT64: return GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL; - case INTEL_PERF_COUNTER_DATA_TYPE_FLOAT: return GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL; - case INTEL_PERF_COUNTER_DATA_TYPE_DOUBLE: return GL_PERFQUERY_COUNTER_DATA_DOUBLE_INTEL; - default: - unreachable("Unknown counter data type"); - } -} - -/** - * Driver hook for glGetPerfCounterInfoINTEL(). - */ -static void -brw_get_perf_counter_info(struct gl_context *ctx, - unsigned query_index, - unsigned counter_index, - const char **name, - const char **desc, - GLuint *offset, - GLuint *data_size, - GLuint *type_enum, - GLuint *data_type_enum, - GLuint64 *raw_max) -{ - struct brw_context *brw = brw_context(ctx); - struct intel_perf_config *perf_cfg = intel_perf_config(brw->perf_ctx); - const struct intel_perf_query_info *query = - &perf_cfg->queries[query_index]; - const struct intel_perf_query_counter *counter = - &query->counters[counter_index]; - - *name = counter->name; - *desc = counter->desc; - *offset = counter->offset; - *data_size = intel_perf_query_counter_get_size(counter); - *type_enum = intel_counter_type_enum_to_gl_type(counter->type); - *data_type_enum = intel_counter_data_type_to_gl_type(counter->data_type); - *raw_max = counter->raw_max; -} - -enum OaReadStatus { - OA_READ_STATUS_ERROR, - OA_READ_STATUS_UNFINISHED, - OA_READ_STATUS_FINISHED, -}; - -/******************************************************************************/ - -/** - * Driver hook for glBeginPerfQueryINTEL(). - */ -static bool -brw_begin_perf_query(struct gl_context *ctx, - struct gl_perf_query_object *o) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_perf_query_object *brw_query = brw_perf_query(o); - struct intel_perf_query_object *obj = brw_query->query; - struct intel_perf_context *perf_ctx = brw->perf_ctx; - - /* We can assume the frontend hides mistaken attempts to Begin a - * query object multiple times before its End. Similarly if an - * application reuses a query object before results have arrived - * the frontend will wait for prior results so we don't need - * to support abandoning in-flight results. - */ - assert(!o->Active); - assert(!o->Used || o->Ready); /* no in-flight query to worry about */ - - DBG("Begin(%d)\n", o->Id); - - bool ret = intel_perf_begin_query(perf_ctx, obj); - - if (INTEL_DEBUG(DEBUG_PERFMON)) - dump_perf_queries(brw); - - return ret; -} - -/** - * Driver hook for glEndPerfQueryINTEL(). - */ -static void -brw_end_perf_query(struct gl_context *ctx, - struct gl_perf_query_object *o) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_perf_query_object *brw_query = brw_perf_query(o); - struct intel_perf_query_object *obj = brw_query->query; - struct intel_perf_context *perf_ctx = brw->perf_ctx; - - DBG("End(%d)\n", o->Id); - intel_perf_end_query(perf_ctx, obj); -} - -static void -brw_wait_perf_query(struct gl_context *ctx, struct gl_perf_query_object *o) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_perf_query_object *brw_query = brw_perf_query(o); - struct intel_perf_query_object *obj = brw_query->query; - - assert(!o->Ready); - - intel_perf_wait_query(brw->perf_ctx, obj, &brw->batch); -} - -static bool -brw_is_perf_query_ready(struct gl_context *ctx, - struct gl_perf_query_object *o) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_perf_query_object *brw_query = brw_perf_query(o); - struct intel_perf_query_object *obj = brw_query->query; - - if (o->Ready) - return true; - - return intel_perf_is_query_ready(brw->perf_ctx, obj, &brw->batch); -} - -/** - * Driver hook for glGetPerfQueryDataINTEL(). - */ -static bool -brw_get_perf_query_data(struct gl_context *ctx, - struct gl_perf_query_object *o, - GLsizei data_size, - GLuint *data, - GLuint *bytes_written) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_perf_query_object *brw_query = brw_perf_query(o); - struct intel_perf_query_object *obj = brw_query->query; - - assert(brw_is_perf_query_ready(ctx, o)); - - DBG("GetData(%d)\n", o->Id); - - if (INTEL_DEBUG(DEBUG_PERFMON)) - dump_perf_queries(brw); - - /* We expect that the frontend only calls this hook when it knows - * that results are available. - */ - assert(o->Ready); - - intel_perf_get_query_data(brw->perf_ctx, obj, &brw->batch, - data_size, data, bytes_written); - - return true; -} - -static struct gl_perf_query_object * -brw_new_perf_query_object(struct gl_context *ctx, unsigned query_index) -{ - struct brw_context *brw = brw_context(ctx); - struct intel_perf_context *perf_ctx = brw->perf_ctx; - struct intel_perf_query_object * obj = intel_perf_new_query(perf_ctx, query_index); - if (unlikely(!obj)) - return NULL; - - struct brw_perf_query_object *brw_query = calloc(1, sizeof(struct brw_perf_query_object)); - if (unlikely(!brw_query)) { - intel_perf_delete_query(perf_ctx, obj); - return NULL; - } - - brw_query->query = obj; - return &brw_query->base; -} - -/** - * Driver hook for glDeletePerfQueryINTEL(). - */ -static void -brw_delete_perf_query(struct gl_context *ctx, - struct gl_perf_query_object *o) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_perf_query_object *brw_query = brw_perf_query(o); - struct intel_perf_query_object *obj = brw_query->query; - struct intel_perf_context *perf_ctx = brw->perf_ctx; - - /* We can assume that the frontend waits for a query to complete - * before ever calling into here, so we don't have to worry about - * deleting an in-flight query object. - */ - assert(!o->Active); - assert(!o->Used || o->Ready); - - DBG("Delete(%d)\n", o->Id); - - intel_perf_delete_query(perf_ctx, obj); - free(brw_query); -} - -/******************************************************************************/ -/* intel_device_info will have incorrect default topology values for unsupported - * kernels. Verify kernel support to ensure OA metrics are accurate. - */ -static bool -oa_metrics_kernel_support(int fd, const struct intel_device_info *devinfo) -{ - if (devinfo->ver >= 10) { - /* topology uAPI required for CNL+ (kernel 4.17+) make a call to the api - * to verify support - */ - struct drm_i915_query_item item = { - .query_id = DRM_I915_QUERY_TOPOLOGY_INFO, - }; - struct drm_i915_query query = { - .num_items = 1, - .items_ptr = (uintptr_t) &item, - }; - - /* kernel 4.17+ supports the query */ - return drmIoctl(fd, DRM_IOCTL_I915_QUERY, &query) == 0; - } - - if (devinfo->ver >= 8) { - /* 4.13+ api required for gfx8 - gfx9 */ - int mask; - struct drm_i915_getparam gp = { - .param = I915_PARAM_SLICE_MASK, - .value = &mask, - }; - /* kernel 4.13+ supports this parameter */ - return drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp) == 0; - } - - if (devinfo->ver == 7) - /* default topology values are correct for HSW */ - return true; - - /* oa not supported before gen 7*/ - return false; -} - -static void * -brw_oa_bo_alloc(void *bufmgr, const char *name, uint64_t size) -{ - return brw_bo_alloc(bufmgr, name, size, BRW_MEMZONE_OTHER); -} - -static void -brw_oa_emit_mi_report_perf_count(void *c, - void *bo, - uint32_t offset_in_bytes, - uint32_t report_id) -{ - struct brw_context *ctx = c; - ctx->vtbl.emit_mi_report_perf_count(ctx, - bo, - offset_in_bytes, - report_id); -} - -typedef void (*bo_unreference_t)(void *); -typedef void *(*bo_map_t)(void *, void *, unsigned flags); -typedef void (*bo_unmap_t)(void *); -typedef void (* emit_mi_report_t)(void *, void *, uint32_t, uint32_t); -typedef void (*emit_mi_flush_t)(void *); - -static void -brw_oa_batchbuffer_flush(void *c, const char *file, int line) -{ - struct brw_context *ctx = c; - _brw_batch_flush_fence(ctx, -1, NULL, file, line); -} - -static void -brw_oa_emit_stall_at_pixel_scoreboard(void *c) -{ - struct brw_context *brw = c; - brw_emit_end_of_pipe_sync(brw, PIPE_CONTROL_STALL_AT_SCOREBOARD); -} - -static void -brw_perf_store_register(struct brw_context *brw, struct brw_bo *bo, - uint32_t reg, uint32_t reg_size, - uint32_t offset) -{ - if (reg_size == 8) { - brw_store_register_mem64(brw, bo, reg, offset); - } else { - assert(reg_size == 4); - brw_store_register_mem32(brw, bo, reg, offset); - } -} - -typedef void (*store_register_mem_t)(void *ctx, void *bo, - uint32_t reg, uint32_t reg_size, - uint32_t offset); -typedef bool (*batch_references_t)(void *batch, void *bo); -typedef void (*bo_wait_rendering_t)(void *bo); -typedef int (*bo_busy_t)(void *bo); - -static unsigned -brw_init_perf_query_info(struct gl_context *ctx) -{ - struct brw_context *brw = brw_context(ctx); - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - struct intel_perf_context *perf_ctx = brw->perf_ctx; - struct intel_perf_config *perf_cfg = intel_perf_config(perf_ctx); - - if (perf_cfg) - return perf_cfg->n_queries; - - if (!oa_metrics_kernel_support(brw->screen->fd, devinfo)) - return 0; - - perf_cfg = intel_perf_new(brw->mem_ctx); - - perf_cfg->vtbl.bo_alloc = brw_oa_bo_alloc; - perf_cfg->vtbl.bo_unreference = (bo_unreference_t)brw_bo_unreference; - perf_cfg->vtbl.bo_map = (bo_map_t)brw_bo_map; - perf_cfg->vtbl.bo_unmap = (bo_unmap_t)brw_bo_unmap; - perf_cfg->vtbl.emit_stall_at_pixel_scoreboard = - (emit_mi_flush_t)brw_oa_emit_stall_at_pixel_scoreboard; - perf_cfg->vtbl.emit_mi_report_perf_count = - (emit_mi_report_t)brw_oa_emit_mi_report_perf_count; - perf_cfg->vtbl.batchbuffer_flush = brw_oa_batchbuffer_flush; - perf_cfg->vtbl.store_register_mem = - (store_register_mem_t) brw_perf_store_register; - perf_cfg->vtbl.batch_references = (batch_references_t)brw_batch_references; - perf_cfg->vtbl.bo_wait_rendering = (bo_wait_rendering_t)brw_bo_wait_rendering; - perf_cfg->vtbl.bo_busy = (bo_busy_t)brw_bo_busy; - - intel_perf_init_metrics(perf_cfg, devinfo, brw->screen->fd, - true /* pipeline stats */, - true /* register snapshots */); - intel_perf_init_context(perf_ctx, perf_cfg, brw->mem_ctx, brw, brw->bufmgr, - devinfo, brw->hw_ctx, brw->screen->fd); - - return perf_cfg->n_queries; -} - -void -brw_init_performance_queries(struct brw_context *brw) -{ - struct gl_context *ctx = &brw->ctx; - - ctx->Driver.InitPerfQueryInfo = brw_init_perf_query_info; - ctx->Driver.GetPerfQueryInfo = brw_get_perf_query_info; - ctx->Driver.GetPerfCounterInfo = brw_get_perf_counter_info; - ctx->Driver.NewPerfQueryObject = brw_new_perf_query_object; - ctx->Driver.DeletePerfQuery = brw_delete_perf_query; - ctx->Driver.BeginPerfQuery = brw_begin_perf_query; - ctx->Driver.EndPerfQuery = brw_end_perf_query; - ctx->Driver.WaitPerfQuery = brw_wait_perf_query; - ctx->Driver.IsPerfQueryReady = brw_is_perf_query_ready; - ctx->Driver.GetPerfQueryData = brw_get_perf_query_data; -} diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.c b/src/mesa/drivers/dri/i965/brw_pipe_control.c deleted file mode 100644 index 7c2cfde..0000000 --- a/src/mesa/drivers/dri/i965/brw_pipe_control.c +++ /dev/null @@ -1,454 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "brw_context.h" -#include "brw_defines.h" -#include "brw_state.h" -#include "brw_batch.h" -#include "brw_fbo.h" - -/** - * Emit a PIPE_CONTROL with various flushing flags. - * - * The caller is responsible for deciding what flags are appropriate for the - * given generation. - */ -void -brw_emit_pipe_control_flush(struct brw_context *brw, uint32_t flags) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - if (devinfo->ver >= 6 && - (flags & PIPE_CONTROL_CACHE_FLUSH_BITS) && - (flags & PIPE_CONTROL_CACHE_INVALIDATE_BITS)) { - /* A pipe control command with flush and invalidate bits set - * simultaneously is an inherently racy operation on Gfx6+ if the - * contents of the flushed caches were intended to become visible from - * any of the invalidated caches. Split it in two PIPE_CONTROLs, the - * first one should stall the pipeline to make sure that the flushed R/W - * caches are coherent with memory once the specified R/O caches are - * invalidated. On pre-Gfx6 hardware the (implicit) R/O cache - * invalidation seems to happen at the bottom of the pipeline together - * with any write cache flush, so this shouldn't be a concern. In order - * to ensure a full stall, we do an end-of-pipe sync. - */ - brw_emit_end_of_pipe_sync(brw, (flags & PIPE_CONTROL_CACHE_FLUSH_BITS)); - flags &= ~(PIPE_CONTROL_CACHE_FLUSH_BITS | PIPE_CONTROL_CS_STALL); - } - - brw->vtbl.emit_raw_pipe_control(brw, flags, NULL, 0, 0); -} - -/** - * Emit a PIPE_CONTROL that writes to a buffer object. - * - * \p flags should contain one of the following items: - * - PIPE_CONTROL_WRITE_IMMEDIATE - * - PIPE_CONTROL_WRITE_TIMESTAMP - * - PIPE_CONTROL_WRITE_DEPTH_COUNT - */ -void -brw_emit_pipe_control_write(struct brw_context *brw, uint32_t flags, - struct brw_bo *bo, uint32_t offset, - uint64_t imm) -{ - brw->vtbl.emit_raw_pipe_control(brw, flags, bo, offset, imm); -} - -/** - * Restriction [DevSNB, DevIVB]: - * - * Prior to changing Depth/Stencil Buffer state (i.e. any combination of - * 3DSTATE_DEPTH_BUFFER, 3DSTATE_CLEAR_PARAMS, 3DSTATE_STENCIL_BUFFER, - * 3DSTATE_HIER_DEPTH_BUFFER) SW must first issue a pipelined depth stall - * (PIPE_CONTROL with Depth Stall bit set), followed by a pipelined depth - * cache flush (PIPE_CONTROL with Depth Flush Bit set), followed by - * another pipelined depth stall (PIPE_CONTROL with Depth Stall bit set), - * unless SW can otherwise guarantee that the pipeline from WM onwards is - * already flushed (e.g., via a preceding MI_FLUSH). - */ -void -brw_emit_depth_stall_flushes(struct brw_context *brw) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - assert(devinfo->ver >= 6); - - /* Starting on BDW, these pipe controls are unnecessary. - * - * WM HW will internally manage the draining pipe and flushing of the caches - * when this command is issued. The PIPE_CONTROL restrictions are removed. - */ - if (devinfo->ver >= 8) - return; - - brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_STALL); - brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_CACHE_FLUSH); - brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_STALL); -} - -/** - * From the Ivybridge PRM, Volume 2 Part 1, Section 3.2 (VS Stage Input): - * "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth - * stall needs to be sent just prior to any 3DSTATE_VS, 3DSTATE_URB_VS, - * 3DSTATE_CONSTANT_VS, 3DSTATE_BINDING_TABLE_POINTER_VS, - * 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one PIPE_CONTROL needs - * to be sent before any combination of VS associated 3DSTATE." - */ -void -gfx7_emit_vs_workaround_flush(struct brw_context *brw) -{ - ASSERTED const struct intel_device_info *devinfo = &brw->screen->devinfo; - - assert(devinfo->ver == 7); - brw_emit_pipe_control_write(brw, - PIPE_CONTROL_WRITE_IMMEDIATE - | PIPE_CONTROL_DEPTH_STALL, - brw->workaround_bo, - brw->workaround_bo_offset, 0); -} - -/** - * From the PRM, Volume 2a: - * - * "Indirect State Pointers Disable - * - * At the completion of the post-sync operation associated with this pipe - * control packet, the indirect state pointers in the hardware are - * considered invalid; the indirect pointers are not saved in the context. - * If any new indirect state commands are executed in the command stream - * while the pipe control is pending, the new indirect state commands are - * preserved. - * - * [DevIVB+]: Using Invalidate State Pointer (ISP) only inhibits context - * restoring of Push Constant (3DSTATE_CONSTANT_*) commands. Push Constant - * commands are only considered as Indirect State Pointers. Once ISP is - * issued in a context, SW must initialize by programming push constant - * commands for all the shaders (at least to zero length) before attempting - * any rendering operation for the same context." - * - * 3DSTATE_CONSTANT_* packets are restored during a context restore, - * even though they point to a BO that has been already unreferenced at - * the end of the previous batch buffer. This has been fine so far since - * we are protected by these scratch page (every address not covered by - * a BO should be pointing to the scratch page). But on CNL, it is - * causing a GPU hang during context restore at the 3DSTATE_CONSTANT_* - * instruction. - * - * The flag "Indirect State Pointers Disable" in PIPE_CONTROL tells the - * hardware to ignore previous 3DSTATE_CONSTANT_* packets during a - * context restore, so the mentioned hang doesn't happen. However, - * software must program push constant commands for all stages prior to - * rendering anything, so we flag them as dirty. - * - * Finally, we also make sure to stall at pixel scoreboard to make sure the - * constants have been loaded into the EUs prior to disable the push constants - * so that it doesn't hang a previous 3DPRIMITIVE. - */ -void -gfx7_emit_isp_disable(struct brw_context *brw) -{ - brw->vtbl.emit_raw_pipe_control(brw, - PIPE_CONTROL_STALL_AT_SCOREBOARD | - PIPE_CONTROL_CS_STALL, - NULL, 0, 0); - brw->vtbl.emit_raw_pipe_control(brw, - PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE | - PIPE_CONTROL_CS_STALL, - NULL, 0, 0); - - brw->vs.base.push_constants_dirty = true; - brw->tcs.base.push_constants_dirty = true; - brw->tes.base.push_constants_dirty = true; - brw->gs.base.push_constants_dirty = true; - brw->wm.base.push_constants_dirty = true; -} - -/** - * Emit a PIPE_CONTROL command for gfx7 with the CS Stall bit set. - */ -void -gfx7_emit_cs_stall_flush(struct brw_context *brw) -{ - brw_emit_pipe_control_write(brw, - PIPE_CONTROL_CS_STALL - | PIPE_CONTROL_WRITE_IMMEDIATE, - brw->workaround_bo, - brw->workaround_bo_offset, 0); -} - -/** - * Emits a PIPE_CONTROL with a non-zero post-sync operation, for - * implementing two workarounds on gfx6. From section 1.4.7.1 - * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1: - * - * [DevSNB-C+{W/A}] Before any depth stall flush (including those - * produced by non-pipelined state commands), software needs to first - * send a PIPE_CONTROL with no bits set except Post-Sync Operation != - * 0. - * - * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable - * =1, a PIPE_CONTROL with any non-zero post-sync-op is required. - * - * And the workaround for these two requires this workaround first: - * - * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent - * BEFORE the pipe-control with a post-sync op and no write-cache - * flushes. - * - * And this last workaround is tricky because of the requirements on - * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM - * volume 2 part 1: - * - * "1 of the following must also be set: - * - Render Target Cache Flush Enable ([12] of DW1) - * - Depth Cache Flush Enable ([0] of DW1) - * - Stall at Pixel Scoreboard ([1] of DW1) - * - Depth Stall ([13] of DW1) - * - Post-Sync Operation ([13] of DW1) - * - Notify Enable ([8] of DW1)" - * - * The cache flushes require the workaround flush that triggered this - * one, so we can't use it. Depth stall would trigger the same. - * Post-sync nonzero is what triggered this second workaround, so we - * can't use that one either. Notify enable is IRQs, which aren't - * really our business. That leaves only stall at scoreboard. - */ -void -brw_emit_post_sync_nonzero_flush(struct brw_context *brw) -{ - brw_emit_pipe_control_flush(brw, - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_STALL_AT_SCOREBOARD); - - brw_emit_pipe_control_write(brw, PIPE_CONTROL_WRITE_IMMEDIATE, - brw->workaround_bo, - brw->workaround_bo_offset, 0); -} - -/* - * From Sandybridge PRM, volume 2, "1.7.2 End-of-Pipe Synchronization": - * - * Write synchronization is a special case of end-of-pipe - * synchronization that requires that the render cache and/or depth - * related caches are flushed to memory, where the data will become - * globally visible. This type of synchronization is required prior to - * SW (CPU) actually reading the result data from memory, or initiating - * an operation that will use as a read surface (such as a texture - * surface) a previous render target and/or depth/stencil buffer - * - * - * From Haswell PRM, volume 2, part 1, "End-of-Pipe Synchronization": - * - * Exercising the write cache flush bits (Render Target Cache Flush - * Enable, Depth Cache Flush Enable, DC Flush) in PIPE_CONTROL only - * ensures the write caches are flushed and doesn't guarantee the data - * is globally visible. - * - * SW can track the completion of the end-of-pipe-synchronization by - * using "Notify Enable" and "PostSync Operation - Write Immediate - * Data" in the PIPE_CONTROL command. - */ -void -brw_emit_end_of_pipe_sync(struct brw_context *brw, uint32_t flags) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - if (devinfo->ver >= 6) { - /* From Sandybridge PRM, volume 2, "1.7.3.1 Writing a Value to Memory": - * - * "The most common action to perform upon reaching a synchronization - * point is to write a value out to memory. An immediate value - * (included with the synchronization command) may be written." - * - * - * From Broadwell PRM, volume 7, "End-of-Pipe Synchronization": - * - * "In case the data flushed out by the render engine is to be read - * back in to the render engine in coherent manner, then the render - * engine has to wait for the fence completion before accessing the - * flushed data. This can be achieved by following means on various - * products: PIPE_CONTROL command with CS Stall and the required - * write caches flushed with Post-Sync-Operation as Write Immediate - * Data. - * - * Example: - * - Workload-1 (3D/GPGPU/MEDIA) - * - PIPE_CONTROL (CS Stall, Post-Sync-Operation Write Immediate - * Data, Required Write Cache Flush bits set) - * - Workload-2 (Can use the data produce or output by Workload-1) - */ - brw_emit_pipe_control_write(brw, - flags | PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_WRITE_IMMEDIATE, - brw->workaround_bo, - brw->workaround_bo_offset, 0); - - if (devinfo->platform == INTEL_PLATFORM_HSW) { - /* Haswell needs addition work-arounds: - * - * From Haswell PRM, volume 2, part 1, "End-of-Pipe Synchronization": - * - * Option 1: - * PIPE_CONTROL command with the CS Stall and the required write - * caches flushed with Post-SyncOperation as Write Immediate Data - * followed by eight dummy MI_STORE_DATA_IMM (write to scratch - * spce) commands. - * - * Example: - * - Workload-1 - * - PIPE_CONTROL (CS Stall, Post-Sync-Operation Write - * Immediate Data, Required Write Cache Flush bits set) - * - MI_STORE_DATA_IMM (8 times) (Dummy data, Scratch Address) - * - Workload-2 (Can use the data produce or output by - * Workload-1) - * - * Unfortunately, both the PRMs and the internal docs are a bit - * out-of-date in this regard. What the windows driver does (and - * this appears to actually work) is to emit a register read from the - * memory address written by the pipe control above. - * - * What register we load into doesn't matter. We choose an indirect - * rendering register because we know it always exists and it's one - * of the first registers the command parser allows us to write. If - * you don't have command parser support in your kernel (pre-4.2), - * this will get turned into MI_NOOP and you won't get the - * workaround. Unfortunately, there's just not much we can do in - * that case. This register is perfectly safe to write since we - * always re-load all of the indirect draw registers right before - * 3DPRIMITIVE when needed anyway. - */ - brw_load_register_mem(brw, GFX7_3DPRIM_START_INSTANCE, - brw->workaround_bo, brw->workaround_bo_offset); - } - } else { - /* On gfx4-5, a regular pipe control seems to suffice. */ - brw_emit_pipe_control_flush(brw, flags); - } -} - -/* Emit a pipelined flush to either flush render and texture cache for - * reading from a FBO-drawn texture, or flush so that frontbuffer - * render appears on the screen in DRI1. - * - * This is also used for the always_flush_cache driconf debug option. - */ -void -brw_emit_mi_flush(struct brw_context *brw) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - int flags = PIPE_CONTROL_RENDER_TARGET_FLUSH; - if (devinfo->ver >= 6) { - flags |= PIPE_CONTROL_INSTRUCTION_INVALIDATE | - PIPE_CONTROL_CONST_CACHE_INVALIDATE | - PIPE_CONTROL_DATA_CACHE_FLUSH | - PIPE_CONTROL_DEPTH_CACHE_FLUSH | - PIPE_CONTROL_VF_CACHE_INVALIDATE | - PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | - PIPE_CONTROL_CS_STALL; - } - brw_emit_pipe_control_flush(brw, flags); -} - -static bool -init_identifier_bo(struct brw_context *brw) -{ - void *bo_map; - - if (!can_do_exec_capture(brw->screen)) - return true; - - bo_map = brw_bo_map(NULL, brw->workaround_bo, MAP_READ | MAP_WRITE); - if (!bo_map) - return false; - - brw->workaround_bo->kflags |= EXEC_OBJECT_CAPTURE; - brw->workaround_bo_offset = - ALIGN(intel_debug_write_identifiers(bo_map, 4096, "i965") + 8, 8); - - brw_bo_unmap(brw->workaround_bo); - - return true; -} - -int -brw_init_pipe_control(struct brw_context *brw, - const struct intel_device_info *devinfo) -{ - switch (devinfo->ver) { - case 11: - brw->vtbl.emit_raw_pipe_control = gfx11_emit_raw_pipe_control; - break; - case 9: - brw->vtbl.emit_raw_pipe_control = gfx9_emit_raw_pipe_control; - break; - case 8: - brw->vtbl.emit_raw_pipe_control = gfx8_emit_raw_pipe_control; - break; - case 7: - brw->vtbl.emit_raw_pipe_control = - devinfo->verx10 == 75 ? - gfx75_emit_raw_pipe_control : gfx7_emit_raw_pipe_control; - break; - case 6: - brw->vtbl.emit_raw_pipe_control = gfx6_emit_raw_pipe_control; - break; - case 5: - brw->vtbl.emit_raw_pipe_control = gfx5_emit_raw_pipe_control; - break; - case 4: - brw->vtbl.emit_raw_pipe_control = - devinfo->verx10 == 45 ? - gfx45_emit_raw_pipe_control : gfx4_emit_raw_pipe_control; - break; - default: - unreachable("Unhandled Gen."); - } - - if (devinfo->ver < 6) - return 0; - - /* We can't just use brw_state_batch to get a chunk of space for - * the gfx6 workaround because it involves actually writing to - * the buffer, and the kernel doesn't let us write to the batch. - */ - brw->workaround_bo = brw_bo_alloc(brw->bufmgr, "workaround", 4096, - BRW_MEMZONE_OTHER); - if (brw->workaround_bo == NULL) - return -ENOMEM; - - if (!init_identifier_bo(brw)) - return -ENOMEM; /* Couldn't map workaround_bo?? */ - - brw->workaround_bo_offset = 0; - brw->pipe_controls_since_last_cs_stall = 0; - - return 0; -} - -void -brw_fini_pipe_control(struct brw_context *brw) -{ - brw_bo_unreference(brw->workaround_bo); -} diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.h b/src/mesa/drivers/dri/i965/brw_pipe_control.h deleted file mode 100644 index 1aed53e..0000000 --- a/src/mesa/drivers/dri/i965/brw_pipe_control.h +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright © 2017 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#ifndef BRW_PIPE_CONTROL_DOT_H -#define BRW_PIPE_CONTROL_DOT_H - -struct brw_context; -struct intel_device_info; -struct brw_bo; - -/** @{ - * - * PIPE_CONTROL operation, a combination MI_FLUSH and register write with - * additional flushing control. - * - * The bits here are not the actual hardware values. The actual values - * shift around a bit per-generation, so we just have flags for each - * potential operation, and use genxml to encode the actual packet. - */ -enum pipe_control_flags -{ - PIPE_CONTROL_FLUSH_LLC = (1 << 1), - PIPE_CONTROL_LRI_POST_SYNC_OP = (1 << 2), - PIPE_CONTROL_STORE_DATA_INDEX = (1 << 3), - PIPE_CONTROL_CS_STALL = (1 << 4), - PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET = (1 << 5), - PIPE_CONTROL_SYNC_GFDT = (1 << 6), - PIPE_CONTROL_TLB_INVALIDATE = (1 << 7), - PIPE_CONTROL_MEDIA_STATE_CLEAR = (1 << 8), - PIPE_CONTROL_WRITE_IMMEDIATE = (1 << 9), - PIPE_CONTROL_WRITE_DEPTH_COUNT = (1 << 10), - PIPE_CONTROL_WRITE_TIMESTAMP = (1 << 11), - PIPE_CONTROL_DEPTH_STALL = (1 << 12), - PIPE_CONTROL_RENDER_TARGET_FLUSH = (1 << 13), - PIPE_CONTROL_INSTRUCTION_INVALIDATE = (1 << 14), - PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE = (1 << 15), - PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE = (1 << 16), - PIPE_CONTROL_NOTIFY_ENABLE = (1 << 17), - PIPE_CONTROL_FLUSH_ENABLE = (1 << 18), - PIPE_CONTROL_DATA_CACHE_FLUSH = (1 << 19), - PIPE_CONTROL_VF_CACHE_INVALIDATE = (1 << 20), - PIPE_CONTROL_CONST_CACHE_INVALIDATE = (1 << 21), - PIPE_CONTROL_STATE_CACHE_INVALIDATE = (1 << 22), - PIPE_CONTROL_STALL_AT_SCOREBOARD = (1 << 23), - PIPE_CONTROL_DEPTH_CACHE_FLUSH = (1 << 24), -}; - -#define PIPE_CONTROL_CACHE_FLUSH_BITS \ - (PIPE_CONTROL_DEPTH_CACHE_FLUSH | PIPE_CONTROL_DATA_CACHE_FLUSH | \ - PIPE_CONTROL_RENDER_TARGET_FLUSH) - -#define PIPE_CONTROL_CACHE_INVALIDATE_BITS \ - (PIPE_CONTROL_STATE_CACHE_INVALIDATE | PIPE_CONTROL_CONST_CACHE_INVALIDATE | \ - PIPE_CONTROL_VF_CACHE_INVALIDATE | PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | \ - PIPE_CONTROL_INSTRUCTION_INVALIDATE) - -/** @} */ - -int brw_init_pipe_control(struct brw_context *brw, - const struct intel_device_info *info); -void brw_fini_pipe_control(struct brw_context *brw); - -void brw_emit_pipe_control_flush(struct brw_context *brw, uint32_t flags); -void brw_emit_pipe_control_write(struct brw_context *brw, uint32_t flags, - struct brw_bo *bo, uint32_t offset, - uint64_t imm); -void brw_emit_end_of_pipe_sync(struct brw_context *brw, uint32_t flags); -void brw_emit_mi_flush(struct brw_context *brw); -void brw_emit_post_sync_nonzero_flush(struct brw_context *brw); -void brw_emit_depth_stall_flushes(struct brw_context *brw); -void gfx7_emit_vs_workaround_flush(struct brw_context *brw); -void gfx7_emit_cs_stall_flush(struct brw_context *brw); -void gfx7_emit_isp_disable(struct brw_context *brw); - -#endif diff --git a/src/mesa/drivers/dri/i965/brw_pixel.c b/src/mesa/drivers/dri/i965/brw_pixel.c deleted file mode 100644 index b6a2c51..0000000 --- a/src/mesa/drivers/dri/i965/brw_pixel.c +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Copyright 2006 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portionsalloc - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "main/accum.h" -#include "main/enums.h" -#include "main/state.h" -#include "main/stencil.h" -#include "main/bufferobj.h" -#include "main/context.h" -#include "swrast/swrast.h" - -#include "brw_context.h" -#include "brw_pixel.h" - -#define FILE_DEBUG_FLAG DEBUG_PIXEL - -static GLenum -effective_func(GLenum func, bool src_alpha_is_one) -{ - if (src_alpha_is_one) { - if (func == GL_SRC_ALPHA) - return GL_ONE; - if (func == GL_ONE_MINUS_SRC_ALPHA) - return GL_ZERO; - } - - return func; -} - -/** - * Check if any fragment operations are in effect which might effect - * glDraw/CopyPixels. - */ -bool -brw_check_blit_fragment_ops(struct gl_context * ctx, bool src_alpha_is_one) -{ - if (ctx->NewState) - _mesa_update_state(ctx); - - if (_mesa_arb_fragment_program_enabled(ctx)) { - DBG("fallback due to fragment program\n"); - return false; - } - - if (ctx->Color.BlendEnabled && - (effective_func(ctx->Color.Blend[0].SrcRGB, src_alpha_is_one) != GL_ONE || - effective_func(ctx->Color.Blend[0].DstRGB, src_alpha_is_one) != GL_ZERO || - ctx->Color.Blend[0].EquationRGB != GL_FUNC_ADD || - effective_func(ctx->Color.Blend[0].SrcA, src_alpha_is_one) != GL_ONE || - effective_func(ctx->Color.Blend[0].DstA, src_alpha_is_one) != GL_ZERO || - ctx->Color.Blend[0].EquationA != GL_FUNC_ADD)) { - DBG("fallback due to blend\n"); - return false; - } - - if (ctx->Texture._MaxEnabledTexImageUnit != -1) { - DBG("fallback due to texturing\n"); - return false; - } - - if (GET_COLORMASK(ctx->Color.ColorMask, 0) != 0xf) { - DBG("fallback due to color masking\n"); - return false; - } - - if (ctx->Color.AlphaEnabled) { - DBG("fallback due to alpha\n"); - return false; - } - - if (ctx->Depth.Test) { - DBG("fallback due to depth test\n"); - return false; - } - - if (ctx->Fog.Enabled) { - DBG("fallback due to fog\n"); - return false; - } - - if (ctx->_ImageTransferState) { - DBG("fallback due to image transfer\n"); - return false; - } - - if (_mesa_stencil_is_enabled(ctx)) { - DBG("fallback due to image stencil\n"); - return false; - } - - if (ctx->Pixel.ZoomX != 1.0F || ctx->Pixel.ZoomY != 1.0F) { - DBG("fallback due to pixel zoom\n"); - return false; - } - - if (ctx->RenderMode != GL_RENDER) { - DBG("fallback due to render mode\n"); - return false; - } - - return true; -} - -void -brw_init_pixel_functions(struct dd_function_table *functions) -{ - functions->Bitmap = brw_bitmap; - functions->CopyPixels = brw_copypixels; - functions->DrawPixels = brw_drawpixels; - functions->ReadPixels = brw_readpixels; -} diff --git a/src/mesa/drivers/dri/i965/brw_pixel.h b/src/mesa/drivers/dri/i965/brw_pixel.h deleted file mode 100644 index b6e3e6e..0000000 --- a/src/mesa/drivers/dri/i965/brw_pixel.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright 2006 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef BRW_PIXEL_H -#define BRW_PIXEL_H - -#include "main/mtypes.h" - -void brw_init_pixel_functions(struct dd_function_table *functions); -bool brw_check_blit_fragment_ops(struct gl_context *ctx, - bool src_alpha_is_one); - -void brw_readpixels(struct gl_context *ctx, - GLint x, GLint y, - GLsizei width, GLsizei height, - GLenum format, GLenum type, - const struct gl_pixelstore_attrib *pack, - GLvoid *pixels); - -void brw_drawpixels(struct gl_context *ctx, - GLint x, GLint y, - GLsizei width, GLsizei height, - GLenum format, - GLenum type, - const struct gl_pixelstore_attrib *unpack, - const GLvoid *pixels); - -void brw_copypixels(struct gl_context *ctx, - GLint srcx, GLint srcy, - GLsizei width, GLsizei height, - GLint destx, GLint desty, GLenum type); - -void brw_bitmap(struct gl_context *ctx, - GLint x, GLint y, - GLsizei width, GLsizei height, - const struct gl_pixelstore_attrib *unpack, - const GLubyte *pixels); - -#endif diff --git a/src/mesa/drivers/dri/i965/brw_pixel_bitmap.c b/src/mesa/drivers/dri/i965/brw_pixel_bitmap.c deleted file mode 100644 index aa8c2fc..0000000 --- a/src/mesa/drivers/dri/i965/brw_pixel_bitmap.c +++ /dev/null @@ -1,363 +0,0 @@ -/* - * Copyright 2006 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portionsalloc - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "main/blend.h" -#include "main/enums.h" -#include "main/image.h" -#include "main/colormac.h" -#include "main/condrender.h" -#include "main/mtypes.h" -#include "main/macros.h" -#include "main/pbo.h" -#include "main/bufferobj.h" -#include "main/state.h" -#include "main/texobj.h" -#include "main/context.h" -#include "main/fbobject.h" -#include "swrast/swrast.h" -#include "drivers/common/meta.h" - -#include "brw_context.h" -#include "brw_screen.h" -#include "brw_batch.h" -#include "brw_blit.h" -#include "brw_fbo.h" -#include "brw_image.h" -#include "brw_buffers.h" -#include "brw_pixel.h" - - -#define FILE_DEBUG_FLAG DEBUG_PIXEL - - -/* Unlike the other intel_pixel_* functions, the expectation here is - * that the incoming data is not in a PBO. With the XY_TEXT blit - * method, there's no benefit haveing it in a PBO, but we could - * implement a path based on XY_MONO_SRC_COPY_BLIT which might benefit - * PBO bitmaps. I think they are probably pretty rare though - I - * wonder if Xgl uses them? - */ -static const GLubyte * -map_pbo(struct gl_context *ctx, - GLsizei width, GLsizei height, - const struct gl_pixelstore_attrib *unpack, - const GLubyte *bitmap) -{ - GLubyte *buf; - - if (!_mesa_validate_pbo_access(2, unpack, width, height, 1, - GL_COLOR_INDEX, GL_BITMAP, - INT_MAX, (const GLvoid *) bitmap)) { - _mesa_error(ctx, GL_INVALID_OPERATION,"glBitmap(invalid PBO access)"); - return NULL; - } - - buf = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0, unpack->BufferObj->Size, - GL_MAP_READ_BIT, - unpack->BufferObj, - MAP_INTERNAL); - if (!buf) { - _mesa_error(ctx, GL_INVALID_OPERATION, "glBitmap(PBO is mapped)"); - return NULL; - } - - return ADD_POINTERS(buf, bitmap); -} - -static bool test_bit( const GLubyte *src, GLuint bit ) -{ - return (src[bit/8] & (1<<(bit % 8))) ? 1 : 0; -} - -static void set_bit( GLubyte *dest, GLuint bit ) -{ - dest[bit/8] |= 1 << (bit % 8); -} - -/* Extract a rectangle's worth of data from the bitmap. Called - * per chunk of HW-sized bitmap. - */ -static GLuint -get_bitmap_rect(GLsizei width, GLsizei height, - const struct gl_pixelstore_attrib *unpack, - const GLubyte *bitmap, - GLuint x, GLuint y, - GLuint w, GLuint h, - GLubyte *dest, - GLuint row_align, - bool invert) -{ - GLuint src_offset = (x + unpack->SkipPixels) & 0x7; - GLuint mask = unpack->LsbFirst ? 0 : 7; - GLuint bit = 0; - GLint row, col; - GLint first, last; - GLint incr; - GLuint count = 0; - - DBG("%s %d,%d %dx%d bitmap %dx%d skip %d src_offset %d mask %d\n", - __func__, x,y,w,h,width,height,unpack->SkipPixels, src_offset, mask); - - if (invert) { - first = h-1; - last = 0; - incr = -1; - } - else { - first = 0; - last = h-1; - incr = 1; - } - - /* Require that dest be pre-zero'd. - */ - for (row = first; row != (last+incr); row += incr) { - const GLubyte *rowsrc = _mesa_image_address2d(unpack, bitmap, - width, height, - GL_COLOR_INDEX, GL_BITMAP, - y + row, x); - - for (col = 0; col < w; col++, bit++) { - if (test_bit(rowsrc, (col + src_offset) ^ mask)) { - set_bit(dest, bit ^ 7); - count++; - } - } - - if (row_align) - bit = ALIGN(bit, row_align); - } - - return count; -} - -/** - * Returns the low Y value of the vertical range given, flipped according to - * whether the framebuffer is or not. - */ -static inline int -y_flip(struct gl_framebuffer *fb, int y, int height) -{ - if (fb->FlipY) - return fb->Height - y - height; - else - return y; -} - -/* - * Render a bitmap. - */ -static bool -do_blit_bitmap(struct gl_context *ctx, - GLint dstx, GLint dsty, - GLsizei width, GLsizei height, - const struct gl_pixelstore_attrib *unpack, - const GLubyte *bitmap) -{ - struct brw_context *brw = brw_context(ctx); - struct gl_framebuffer *fb = ctx->DrawBuffer; - struct brw_renderbuffer *irb; - GLfloat tmpColor[4]; - GLubyte ubcolor[4]; - GLuint color; - GLsizei bitmap_width = width; - GLsizei bitmap_height = height; - GLint px, py; - GLuint stipple[32]; - GLint orig_dstx = dstx; - GLint orig_dsty = dsty; - - /* Update draw buffer bounds */ - _mesa_update_state(ctx); - - if (ctx->Depth.Test) { - /* The blit path produces incorrect results when depth testing is on. - * It seems the blit Z coord is always 1.0 (the far plane) so fragments - * will likely be obscured by other, closer geometry. - */ - return false; - } - - brw_prepare_render(brw); - - if (fb->_NumColorDrawBuffers != 1) { - perf_debug("accelerated glBitmap() only supports rendering to a " - "single color buffer\n"); - return false; - } - - irb = brw_renderbuffer(fb->_ColorDrawBuffers[0]); - - if (unpack->BufferObj) { - bitmap = map_pbo(ctx, width, height, unpack, bitmap); - if (bitmap == NULL) - return true; /* even though this is an error, we're done */ - } - - COPY_4V(tmpColor, ctx->Current.RasterColor); - - if (_mesa_need_secondary_color(ctx)) { - ADD_3V(tmpColor, tmpColor, ctx->Current.RasterSecondaryColor); - } - - UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[0], tmpColor[0]); - UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[1], tmpColor[1]); - UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[2], tmpColor[2]); - UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[3], tmpColor[3]); - - switch (_mesa_get_render_format(ctx, brw_rb_format(irb))) { - case MESA_FORMAT_B8G8R8A8_UNORM: - case MESA_FORMAT_B8G8R8X8_UNORM: - color = PACK_COLOR_8888(ubcolor[3], ubcolor[0], ubcolor[1], ubcolor[2]); - break; - case MESA_FORMAT_B5G6R5_UNORM: - color = PACK_COLOR_565(ubcolor[0], ubcolor[1], ubcolor[2]); - break; - default: - perf_debug("Unsupported format %s in accelerated glBitmap()\n", - _mesa_get_format_name(irb->mt->format)); - return false; - } - - if (!brw_check_blit_fragment_ops(ctx, tmpColor[3] == 1.0F)) - return false; - - /* Clip to buffer bounds and scissor. */ - if (!_mesa_clip_to_region(fb->_Xmin, fb->_Ymin, - fb->_Xmax, fb->_Ymax, - &dstx, &dsty, &width, &height)) - goto out; - - dsty = y_flip(fb, dsty, height); - -#define DY 32 -#define DX 32 - - /* The blitter has no idea about fast color clears, so we need to resolve - * the miptree before we do anything. - */ - brw_miptree_access_raw(brw, irb->mt, irb->mt_level, irb->mt_layer, true); - - /* Chop it all into chunks that can be digested by hardware: */ - for (py = 0; py < height; py += DY) { - for (px = 0; px < width; px += DX) { - int h = MIN2(DY, height - py); - int w = MIN2(DX, width - px); - GLuint sz = ALIGN(ALIGN(w,8) * h, 64)/8; - const enum gl_logicop_mode logic_op = ctx->Color.ColorLogicOpEnabled ? - ctx->Color._LogicOp : COLOR_LOGICOP_COPY; - - assert(sz <= sizeof(stipple)); - memset(stipple, 0, sz); - - /* May need to adjust this when padding has been introduced in - * sz above: - * - * Have to translate destination coordinates back into source - * coordinates. - */ - int count = get_bitmap_rect(bitmap_width, bitmap_height, unpack, - bitmap, - -orig_dstx + (dstx + px), - -orig_dsty + y_flip(fb, dsty + py, h), - w, h, - (GLubyte *)stipple, - 8, - fb->FlipY); - if (count == 0) - continue; - - if (!brw_emit_immediate_color_expand_blit(brw, - irb->mt->cpp, - (GLubyte *)stipple, - sz, - color, - irb->mt->surf.row_pitch_B, - irb->mt->bo, - irb->mt->offset, - irb->mt->surf.tiling, - dstx + px, - dsty + py, - w, h, - logic_op)) { - return false; - } - - if (ctx->Query.CurrentOcclusionObject) - ctx->Query.CurrentOcclusionObject->Result += count; - } - } -out: - - if (INTEL_DEBUG(DEBUG_SYNC)) - brw_batch_flush(brw); - - if (unpack->BufferObj) { - /* done with PBO so unmap it now */ - ctx->Driver.UnmapBuffer(ctx, unpack->BufferObj, MAP_INTERNAL); - } - - return true; -} - - -/* There are a large number of possible ways to implement bitmap on - * this hardware, most of them have some sort of drawback. Here are a - * few that spring to mind: - * - * Blit: - * - XY_MONO_SRC_BLT_CMD - * - use XY_SETUP_CLIP_BLT for cliprect clipping. - * - XY_TEXT_BLT - * - XY_TEXT_IMMEDIATE_BLT - * - blit per cliprect, subject to maximum immediate data size. - * - XY_COLOR_BLT - * - per pixel or run of pixels - * - XY_PIXEL_BLT - * - good for sparse bitmaps - * - * 3D engine: - * - Point per pixel - * - Translate bitmap to an alpha texture and render as a quad - * - Chop bitmap up into 32x32 squares and render w/polygon stipple. - */ -void -brw_bitmap(struct gl_context * ctx, - GLint x, GLint y, - GLsizei width, GLsizei height, - const struct gl_pixelstore_attrib *unpack, - const GLubyte * pixels) -{ - struct brw_context *brw = brw_context(ctx); - - if (!_mesa_check_conditional_render(ctx)) - return; - - if (brw->screen->devinfo.ver < 6 && - do_blit_bitmap(ctx, x, y, width, height, unpack, pixels)) - return; - - _mesa_meta_Bitmap(ctx, x, y, width, height, unpack, pixels); -} diff --git a/src/mesa/drivers/dri/i965/brw_pixel_copy.c b/src/mesa/drivers/dri/i965/brw_pixel_copy.c deleted file mode 100644 index 5527ffb..0000000 --- a/src/mesa/drivers/dri/i965/brw_pixel_copy.c +++ /dev/null @@ -1,212 +0,0 @@ -/* - * Copyright 2003 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "main/image.h" -#include "main/state.h" -#include "main/stencil.h" -#include "main/mtypes.h" -#include "main/condrender.h" -#include "main/fbobject.h" -#include "drivers/common/meta.h" - -#include "brw_context.h" -#include "brw_buffers.h" -#include "brw_mipmap_tree.h" -#include "brw_pixel.h" -#include "brw_fbo.h" -#include "brw_blit.h" -#include "brw_batch.h" - -#define FILE_DEBUG_FLAG DEBUG_PIXEL - -/** - * CopyPixels with the blitter. Don't support zooming, pixel transfer, etc. - */ -static bool -do_blit_copypixels(struct gl_context * ctx, - GLint srcx, GLint srcy, - GLsizei width, GLsizei height, - GLint dstx, GLint dsty, GLenum type) -{ - struct brw_context *brw = brw_context(ctx); - struct gl_framebuffer *fb = ctx->DrawBuffer; - struct gl_framebuffer *read_fb = ctx->ReadBuffer; - GLint orig_dstx; - GLint orig_dsty; - GLint orig_srcx; - GLint orig_srcy; - struct brw_renderbuffer *draw_irb = NULL; - struct brw_renderbuffer *read_irb = NULL; - - /* Update draw buffer bounds */ - _mesa_update_state(ctx); - - brw_prepare_render(brw); - - switch (type) { - case GL_COLOR: - if (fb->_NumColorDrawBuffers != 1) { - perf_debug("glCopyPixels() fallback: MRT\n"); - return false; - } - - draw_irb = brw_renderbuffer(fb->_ColorDrawBuffers[0]); - read_irb = brw_renderbuffer(read_fb->_ColorReadBuffer); - break; - case GL_DEPTH_STENCIL_EXT: - draw_irb = brw_renderbuffer(fb->Attachment[BUFFER_DEPTH].Renderbuffer); - read_irb = - brw_renderbuffer(read_fb->Attachment[BUFFER_DEPTH].Renderbuffer); - break; - case GL_DEPTH: - perf_debug("glCopyPixels() fallback: GL_DEPTH\n"); - return false; - case GL_STENCIL: - perf_debug("glCopyPixels() fallback: GL_STENCIL\n"); - return false; - default: - perf_debug("glCopyPixels(): Unknown type\n"); - return false; - } - - if (!draw_irb) { - perf_debug("glCopyPixels() fallback: missing draw buffer\n"); - return false; - } - - if (!read_irb) { - perf_debug("glCopyPixels() fallback: missing read buffer\n"); - return false; - } - - if (draw_irb->mt->surf.samples > 1 || read_irb->mt->surf.samples > 1) { - perf_debug("glCopyPixels() fallback: multisampled buffers\n"); - return false; - } - - if (ctx->_ImageTransferState) { - perf_debug("glCopyPixels(): Unsupported image transfer state\n"); - return false; - } - - if (ctx->Depth.Test) { - perf_debug("glCopyPixels(): Unsupported depth test state\n"); - return false; - } - - if (brw->stencil_enabled) { - perf_debug("glCopyPixels(): Unsupported stencil test state\n"); - return false; - } - - if (ctx->Fog.Enabled || - ctx->Texture._MaxEnabledTexImageUnit != -1 || - _mesa_arb_fragment_program_enabled(ctx)) { - perf_debug("glCopyPixels(): Unsupported fragment shader state\n"); - return false; - } - - if (ctx->Color.AlphaEnabled || - ctx->Color.BlendEnabled) { - perf_debug("glCopyPixels(): Unsupported blend state\n"); - return false; - } - - if (GET_COLORMASK(ctx->Color.ColorMask, 0) != 0xf) { - perf_debug("glCopyPixels(): Unsupported color mask state\n"); - return false; - } - - if (ctx->Pixel.ZoomX != 1.0F || ctx->Pixel.ZoomY != 1.0F) { - perf_debug("glCopyPixels(): Unsupported pixel zoom\n"); - return false; - } - - brw_batch_flush(brw); - - /* Clip to destination buffer. */ - orig_dstx = dstx; - orig_dsty = dsty; - if (!_mesa_clip_to_region(fb->_Xmin, fb->_Ymin, - fb->_Xmax, fb->_Ymax, - &dstx, &dsty, &width, &height)) - goto out; - /* Adjust src coords for our post-clipped destination origin */ - srcx += dstx - orig_dstx; - srcy += dsty - orig_dsty; - - /* Clip to source buffer. */ - orig_srcx = srcx; - orig_srcy = srcy; - if (!_mesa_clip_to_region(0, 0, - read_fb->Width, read_fb->Height, - &srcx, &srcy, &width, &height)) - goto out; - /* Adjust dst coords for our post-clipped source origin */ - dstx += srcx - orig_srcx; - dsty += srcy - orig_srcy; - - if (!brw_miptree_blit(brw, - read_irb->mt, read_irb->mt_level, read_irb->mt_layer, - srcx, srcy, read_fb->FlipY, - draw_irb->mt, draw_irb->mt_level, draw_irb->mt_layer, - dstx, dsty, fb->FlipY, - width, height, - (ctx->Color.ColorLogicOpEnabled ? - ctx->Color._LogicOp : COLOR_LOGICOP_COPY))) { - DBG("%s: blit failure\n", __func__); - return false; - } - - if (ctx->Query.CurrentOcclusionObject) - ctx->Query.CurrentOcclusionObject->Result += width * height; - -out: - - DBG("%s: success\n", __func__); - return true; -} - - -void -brw_copypixels(struct gl_context *ctx, - GLint srcx, GLint srcy, - GLsizei width, GLsizei height, - GLint destx, GLint desty, GLenum type) -{ - struct brw_context *brw = brw_context(ctx); - - DBG("%s\n", __func__); - - if (!_mesa_check_conditional_render(ctx)) - return; - - if (brw->screen->devinfo.ver < 6 && - do_blit_copypixels(ctx, srcx, srcy, width, height, destx, desty, type)) - return; - - /* this will use swrast if needed */ - _mesa_meta_CopyPixels(ctx, srcx, srcy, width, height, destx, desty, type); -} diff --git a/src/mesa/drivers/dri/i965/brw_pixel_draw.c b/src/mesa/drivers/dri/i965/brw_pixel_draw.c deleted file mode 100644 index aaf81f4..0000000 --- a/src/mesa/drivers/dri/i965/brw_pixel_draw.c +++ /dev/null @@ -1,178 +0,0 @@ -/* - * Copyright 2006 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portionsalloc - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "main/enums.h" -#include "main/image.h" -#include "main/glformats.h" -#include "main/mtypes.h" -#include "main/condrender.h" -#include "main/fbobject.h" -#include "main/teximage.h" -#include "main/texobj.h" -#include "main/texstate.h" -#include "main/bufferobj.h" -#include "swrast/swrast.h" -#include "drivers/common/meta.h" - -#include "brw_context.h" -#include "brw_screen.h" -#include "brw_blit.h" -#include "brw_buffers.h" -#include "brw_fbo.h" -#include "brw_mipmap_tree.h" -#include "brw_pixel.h" -#include "brw_buffer_objects.h" - -#define FILE_DEBUG_FLAG DEBUG_PIXEL - -static bool -do_blit_drawpixels(struct gl_context * ctx, - GLint x, GLint y, GLsizei width, GLsizei height, - GLenum format, GLenum type, - const struct gl_pixelstore_attrib *unpack, - const GLvoid * pixels) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_buffer_object *src = brw_buffer_object(unpack->BufferObj); - GLuint src_offset; - struct brw_bo *src_buffer; - - DBG("%s\n", __func__); - - if (!brw_check_blit_fragment_ops(ctx, false)) - return false; - - if (ctx->DrawBuffer->_NumColorDrawBuffers != 1) { - DBG("%s: fallback due to MRT\n", __func__); - return false; - } - - brw_prepare_render(brw); - - struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[0]; - struct brw_renderbuffer *irb = brw_renderbuffer(rb); - - mesa_format src_format = _mesa_format_from_format_and_type(format, type); - if (_mesa_format_is_mesa_array_format(src_format)) - src_format = _mesa_format_from_array_format(src_format); - mesa_format dst_format = irb->mt->format; - - /* We can safely discard sRGB encode/decode for the DrawPixels interface */ - src_format = _mesa_get_srgb_format_linear(src_format); - dst_format = _mesa_get_srgb_format_linear(dst_format); - - if (!brw_miptree_blit_compatible_formats(src_format, dst_format)) { - DBG("%s: bad format for blit\n", __func__); - return false; - } - - if (unpack->SwapBytes || unpack->LsbFirst || - unpack->SkipPixels || unpack->SkipRows) { - DBG("%s: bad packing params\n", __func__); - return false; - } - - int src_stride = _mesa_image_row_stride(unpack, width, format, type); - bool src_flip = false; - /* Mesa flips the src_stride for unpack->Invert, but we want our mt to have - * a normal src_stride. - */ - if (unpack->Invert) { - src_stride = -src_stride; - src_flip = true; - } - - src_offset = (GLintptr)pixels; - src_offset += _mesa_image_offset(2, unpack, width, height, - format, type, 0, 0, 0); - - src_buffer = brw_bufferobj_buffer(brw, src, src_offset, - height * src_stride, false); - - struct brw_mipmap_tree *pbo_mt = - brw_miptree_create_for_bo(brw, - src_buffer, - irb->mt->format, - src_offset, - width, height, 1, - src_stride, - ISL_TILING_LINEAR, - MIPTREE_CREATE_DEFAULT); - if (!pbo_mt) - return false; - - if (!brw_miptree_blit(brw, - pbo_mt, 0, 0, - 0, 0, src_flip, - irb->mt, irb->mt_level, irb->mt_layer, - x, y, ctx->DrawBuffer->FlipY, - width, height, COLOR_LOGICOP_COPY)) { - DBG("%s: blit failed\n", __func__); - brw_miptree_release(&pbo_mt); - return false; - } - - brw_miptree_release(&pbo_mt); - - if (ctx->Query.CurrentOcclusionObject) - ctx->Query.CurrentOcclusionObject->Result += width * height; - - DBG("%s: success\n", __func__); - return true; -} - -void -brw_drawpixels(struct gl_context *ctx, - GLint x, GLint y, - GLsizei width, GLsizei height, - GLenum format, - GLenum type, - const struct gl_pixelstore_attrib *unpack, - const GLvoid *pixels) -{ - struct brw_context *brw = brw_context(ctx); - - if (!_mesa_check_conditional_render(ctx)) - return; - - if (format == GL_STENCIL_INDEX) { - _swrast_DrawPixels(ctx, x, y, width, height, format, type, - unpack, pixels); - return; - } - - if (brw->screen->devinfo.ver < 6 && - unpack->BufferObj) { - if (do_blit_drawpixels(ctx, x, y, width, height, format, type, unpack, - pixels)) { - return; - } - - perf_debug("%s: fallback to generic code in PBO case\n", __func__); - } - - _mesa_meta_DrawPixels(ctx, x, y, width, height, format, type, - unpack, pixels); -} diff --git a/src/mesa/drivers/dri/i965/brw_pixel_read.c b/src/mesa/drivers/dri/i965/brw_pixel_read.c deleted file mode 100644 index ad0ee97..0000000 --- a/src/mesa/drivers/dri/i965/brw_pixel_read.c +++ /dev/null @@ -1,300 +0,0 @@ -/* - * Copyright 2003 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "main/enums.h" -#include "main/mtypes.h" -#include "main/macros.h" -#include "main/fbobject.h" -#include "main/image.h" -#include "main/bufferobj.h" -#include "main/readpix.h" -#include "main/state.h" -#include "main/glformats.h" -#include "program/prog_instruction.h" -#include "drivers/common/meta.h" - -#include "brw_context.h" -#include "brw_blorp.h" -#include "brw_screen.h" -#include "brw_batch.h" -#include "brw_buffers.h" -#include "brw_fbo.h" -#include "brw_mipmap_tree.h" -#include "brw_pixel.h" -#include "brw_buffer_objects.h" - -#define FILE_DEBUG_FLAG DEBUG_PIXEL - -/** - * \brief A fast path for glReadPixels - * - * This fast path is taken when the source format is BGRA, RGBA, - * A or L and when the texture memory is X- or Y-tiled. It downloads - * the source data by directly mapping the memory without a GTT fence. - * This then needs to be de-tiled on the CPU before presenting the data to - * the user in the linear fasion. - * - * This is a performance win over the conventional texture download path. - * In the conventional texture download path, the texture is either mapped - * through the GTT or copied to a linear buffer with the blitter before - * handing off to a software path. This allows us to avoid round-tripping - * through the GPU (in the case where we would be blitting) and do only a - * single copy operation. - */ -static bool -brw_readpixels_tiled_memcpy(struct gl_context *ctx, - GLint xoffset, GLint yoffset, - GLsizei width, GLsizei height, - GLenum format, GLenum type, - GLvoid * pixels, - const struct gl_pixelstore_attrib *pack) -{ - struct brw_context *brw = brw_context(ctx); - struct gl_renderbuffer *rb = ctx->ReadBuffer->_ColorReadBuffer; - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - /* This path supports reading from color buffers only */ - if (rb == NULL) - return false; - - struct brw_renderbuffer *irb = brw_renderbuffer(rb); - int dst_pitch; - - /* The miptree's buffer. */ - struct brw_bo *bo; - - uint32_t cpp; - isl_memcpy_type copy_type; - - /* This fastpath is restricted to specific renderbuffer types: - * a 2D BGRA, RGBA, L8 or A8 texture. It could be generalized to support - * more types. - */ - if (!devinfo->has_llc || - !(type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) || - pixels == NULL || - pack->BufferObj || - pack->Alignment > 4 || - pack->SkipPixels > 0 || - pack->SkipRows > 0 || - (pack->RowLength != 0 && pack->RowLength != width) || - pack->SwapBytes || - pack->LsbFirst || - pack->Invert) - return false; - - /* Only a simple blit, no scale, bias or other mapping. */ - if (ctx->_ImageTransferState) - return false; - - /* It is possible that the renderbuffer (or underlying texture) is - * multisampled. Since ReadPixels from a multisampled buffer requires a - * multisample resolve, we can't handle this here - */ - if (rb->NumSamples > 1) - return false; - - /* We can't handle copying from RGBX or BGRX because the tiled_memcpy - * function doesn't set the last channel to 1. Note this checks BaseFormat - * rather than TexFormat in case the RGBX format is being simulated with an - * RGBA format. - */ - if (rb->_BaseFormat == GL_RGB) - return false; - - copy_type = brw_miptree_get_memcpy_type(rb->Format, format, type, &cpp); - if (copy_type == ISL_MEMCPY_INVALID) - return false; - - if (!irb->mt || - (irb->mt->surf.tiling != ISL_TILING_X && - irb->mt->surf.tiling != ISL_TILING_Y0)) { - /* The algorithm is written only for X- or Y-tiled memory. */ - return false; - } - - /* tiled_to_linear() assumes that if the object is swizzled, it is using - * I915_BIT6_SWIZZLE_9_10 for X and I915_BIT6_SWIZZLE_9 for Y. This is only - * true on gfx5 and above. - * - * The killer on top is that some gfx4 have an L-shaped swizzle mode, where - * parts of the memory aren't swizzled at all. Userspace just can't handle - * that. - */ - if (devinfo->ver < 5 && devinfo->has_bit6_swizzle) - return false; - - /* Since we are going to read raw data to the miptree, we need to resolve - * any pending fast color clears before we start. - */ - brw_miptree_access_raw(brw, irb->mt, irb->mt_level, irb->mt_layer, false); - - bo = irb->mt->bo; - - if (brw_batch_references(&brw->batch, bo)) { - perf_debug("Flushing before mapping a referenced bo.\n"); - brw_batch_flush(brw); - } - - void *map = brw_bo_map(brw, bo, MAP_READ | MAP_RAW); - if (map == NULL) { - DBG("%s: failed to map bo\n", __func__); - return false; - } - - unsigned slice_offset_x, slice_offset_y; - brw_miptree_get_image_offset(irb->mt, irb->mt_level, irb->mt_layer, - &slice_offset_x, &slice_offset_y); - xoffset += slice_offset_x; - yoffset += slice_offset_y; - - dst_pitch = _mesa_image_row_stride(pack, width, format, type); - - /* For a window-system renderbuffer, the buffer is actually flipped - * vertically, so we need to handle that. Since the detiling function - * can only really work in the forwards direction, we have to be a - * little creative. First, we compute the Y-offset of the first row of - * the renderbuffer (in renderbuffer coordinates). We then match that - * with the last row of the client's data. Finally, we give - * tiled_to_linear a negative pitch so that it walks through the - * client's data backwards as it walks through the renderbufer forwards. - */ - if (ctx->ReadBuffer->FlipY) { - yoffset = rb->Height - yoffset - height; - pixels += (ptrdiff_t) (height - 1) * dst_pitch; - dst_pitch = -dst_pitch; - } - - /* We postponed printing this message until having committed to executing - * the function. - */ - DBG("%s: x,y=(%d,%d) (w,h)=(%d,%d) format=0x%x type=0x%x " - "mesa_format=0x%x tiling=%d " - "pack=(alignment=%d row_length=%d skip_pixels=%d skip_rows=%d)\n", - __func__, xoffset, yoffset, width, height, - format, type, rb->Format, irb->mt->surf.tiling, - pack->Alignment, pack->RowLength, pack->SkipPixels, - pack->SkipRows); - - isl_memcpy_tiled_to_linear( - xoffset * cpp, (xoffset + width) * cpp, - yoffset, yoffset + height, - pixels, - map + irb->mt->offset, - dst_pitch, irb->mt->surf.row_pitch_B, - devinfo->has_bit6_swizzle, - irb->mt->surf.tiling, - copy_type - ); - - brw_bo_unmap(bo); - return true; -} - -static bool -brw_readpixels_blorp(struct gl_context *ctx, - unsigned x, unsigned y, - unsigned w, unsigned h, - GLenum format, GLenum type, const void *pixels, - const struct gl_pixelstore_attrib *packing) -{ - struct brw_context *brw = brw_context(ctx); - struct gl_renderbuffer *rb = ctx->ReadBuffer->_ColorReadBuffer; - if (!rb) - return false; - - struct brw_renderbuffer *irb = brw_renderbuffer(rb); - - /* _mesa_get_readpixels_transfer_ops() includes the cases of read - * color clamping along with the ctx->_ImageTransferState. - */ - if (_mesa_get_readpixels_transfer_ops(ctx, rb->Format, format, - type, GL_FALSE)) - return false; - - GLenum dst_base_format = _mesa_unpack_format_to_base_format(format); - if (_mesa_need_rgb_to_luminance_conversion(rb->_BaseFormat, - dst_base_format)) - return false; - - unsigned swizzle; - if (irb->Base.Base._BaseFormat == GL_RGB) { - swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE); - } else { - swizzle = SWIZZLE_XYZW; - } - - return brw_blorp_download_miptree(brw, irb->mt, rb->Format, swizzle, - irb->mt_level, x, y, irb->mt_layer, - w, h, 1, GL_TEXTURE_2D, format, type, - ctx->ReadBuffer->FlipY, pixels, packing); -} - -void -brw_readpixels(struct gl_context *ctx, - GLint x, GLint y, GLsizei width, GLsizei height, - GLenum format, GLenum type, - const struct gl_pixelstore_attrib *pack, GLvoid *pixels) -{ - bool ok; - - struct brw_context *brw = brw_context(ctx); - bool dirty; - - DBG("%s\n", __func__); - - /* Reading pixels wont dirty the front buffer, so reset the dirty - * flag after calling brw_prepare_render(). - */ - dirty = brw->front_buffer_dirty; - brw_prepare_render(brw); - brw->front_buffer_dirty = dirty; - - if (pack->BufferObj) { - if (brw_readpixels_blorp(ctx, x, y, width, height, - format, type, pixels, pack)) - return; - - perf_debug("%s: fallback to CPU mapping in PBO case\n", __func__); - } - - ok = brw_readpixels_tiled_memcpy(ctx, x, y, width, height, - format, type, pixels, pack); - if(ok) - return; - - /* Update Mesa state before calling _mesa_readpixels(). - * XXX this may not be needed since ReadPixels no longer uses the - * span code. - */ - - if (ctx->NewState) - _mesa_update_state(ctx); - - _mesa_readpixels(ctx, x, y, width, height, format, type, pack, pixels); - - /* There's an brw_prepare_render() call in intelSpanRenderStart(). */ - brw->front_buffer_dirty = dirty; -} diff --git a/src/mesa/drivers/dri/i965/brw_primitive_restart.c b/src/mesa/drivers/dri/i965/brw_primitive_restart.c deleted file mode 100644 index 21f1c79..0000000 --- a/src/mesa/drivers/dri/i965/brw_primitive_restart.c +++ /dev/null @@ -1,462 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Jordan Justen - * - */ - -#include "main/bufferobj.h" -#include "main/varray.h" -#include "vbo/vbo.h" - -#include "brw_context.h" -#include "brw_defines.h" -#include "brw_draw.h" - -#include "brw_batch.h" - - -#define UPDATE_MIN2(a, b) (a) = MIN2((a), (b)) -#define UPDATE_MAX2(a, b) (a) = MAX2((a), (b)) - -/* - * Notes on primitive restart: - * The code below is used when the driver does not fully support primitive - * restart (for example, if it only does restart index of ~0). - * - * We map the index buffer, find the restart indexes, unmap - * the index buffer then draw the sub-primitives delineated by the restarts. - * - * A couple possible optimizations: - * 1. Save the list of sub-primitive (start, count) values in a list attached - * to the index buffer for re-use in subsequent draws. The list would be - * invalidated when the contents of the buffer changed. - * 2. If drawing triangle strips or quad strips, create a new index buffer - * that uses duplicated vertices to render the disjoint strips as one - * long strip. We'd have to be careful to avoid using too much memory - * for this. - * - * Finally, some apps might perform better if they don't use primitive restart - * at all rather than this fallback path. Set MESA_EXTENSION_OVERRIDE to - * "-GL_NV_primitive_restart" to test that. - */ - - -struct sub_primitive -{ - GLuint start; - GLuint count; - GLuint min_index; - GLuint max_index; -}; - - -/** - * Scan the elements array to find restart indexes. Return an array - * of struct sub_primitive to indicate how to draw the sub-primitives - * are delineated by the restart index. - */ -static struct sub_primitive * -find_sub_primitives(const void *elements, unsigned element_size, - unsigned start, unsigned end, unsigned restart_index, - unsigned *num_sub_prims) -{ - const unsigned max_prims = end - start; - struct sub_primitive *sub_prims; - unsigned i, cur_start, cur_count; - GLuint scan_index; - unsigned scan_num; - - sub_prims = - malloc(max_prims * sizeof(struct sub_primitive)); - - if (!sub_prims) { - *num_sub_prims = 0; - return NULL; - } - - cur_start = start; - cur_count = 0; - scan_num = 0; - -#define IB_INDEX_READ(TYPE, INDEX) (((const GL##TYPE *) elements)[INDEX]) - -#define SCAN_ELEMENTS(TYPE) \ - sub_prims[scan_num].min_index = (GL##TYPE) 0xffffffff; \ - sub_prims[scan_num].max_index = 0; \ - for (i = start; i < end; i++) { \ - scan_index = IB_INDEX_READ(TYPE, i); \ - if (scan_index == restart_index) { \ - if (cur_count > 0) { \ - assert(scan_num < max_prims); \ - sub_prims[scan_num].start = cur_start; \ - sub_prims[scan_num].count = cur_count; \ - scan_num++; \ - sub_prims[scan_num].min_index = (GL##TYPE) 0xffffffff; \ - sub_prims[scan_num].max_index = 0; \ - } \ - cur_start = i + 1; \ - cur_count = 0; \ - } \ - else { \ - UPDATE_MIN2(sub_prims[scan_num].min_index, scan_index); \ - UPDATE_MAX2(sub_prims[scan_num].max_index, scan_index); \ - cur_count++; \ - } \ - } \ - if (cur_count > 0) { \ - assert(scan_num < max_prims); \ - sub_prims[scan_num].start = cur_start; \ - sub_prims[scan_num].count = cur_count; \ - scan_num++; \ - } - - switch (element_size) { - case 1: - SCAN_ELEMENTS(ubyte); - break; - case 2: - SCAN_ELEMENTS(ushort); - break; - case 4: - SCAN_ELEMENTS(uint); - break; - default: - assert(0 && "bad index_size in find_sub_primitives()"); - } - -#undef SCAN_ELEMENTS - - *num_sub_prims = scan_num; - - return sub_prims; -} - - -/** - * Handle primitive restart in software. - * - * This function breaks up calls into the driver so primitive restart - * support is not required in the driver. - */ -static void -vbo_sw_primitive_restart_common_start(struct gl_context *ctx, - const struct _mesa_prim *prims, - GLuint nr_prims, - const struct _mesa_index_buffer *ib, - GLuint num_instances, - GLuint base_instance, - struct gl_buffer_object *indirect, - GLsizeiptr indirect_offset, - bool primitive_restart, - unsigned restart_index) -{ - GLuint prim_num; - struct _mesa_prim new_prim; - struct _mesa_index_buffer new_ib; - struct sub_primitive *sub_prims; - struct sub_primitive *sub_prim; - GLuint num_sub_prims; - GLuint sub_prim_num; - GLuint end_index; - GLuint sub_end_index; - struct _mesa_prim temp_prim; - GLboolean map_ib = ib->obj && !ib->obj->Mappings[MAP_INTERNAL].Pointer; - const void *ptr; - - /* If there is an indirect buffer, map it and extract the draw params */ - if (indirect) { - const uint32_t *indirect_params; - if (!ctx->Driver.MapBufferRange(ctx, 0, indirect->Size, GL_MAP_READ_BIT, - indirect, MAP_INTERNAL)) { - - /* something went wrong with mapping, give up */ - _mesa_error(ctx, GL_OUT_OF_MEMORY, - "failed to map indirect buffer for sw primitive restart"); - return; - } - - assert(nr_prims == 1); - new_prim = prims[0]; - indirect_params = (const uint32_t *) - ADD_POINTERS(indirect->Mappings[MAP_INTERNAL].Pointer, - indirect_offset); - - new_prim.count = indirect_params[0]; - new_prim.start = indirect_params[2]; - new_prim.basevertex = indirect_params[3]; - - num_instances = indirect_params[1]; - base_instance = indirect_params[4]; - - new_ib = *ib; - new_ib.count = new_prim.count; - - prims = &new_prim; - ib = &new_ib; - - ctx->Driver.UnmapBuffer(ctx, indirect, MAP_INTERNAL); - } - - /* Find the sub-primitives. These are regions in the index buffer which - * are split based on the primitive restart index value. - */ - if (map_ib) { - ctx->Driver.MapBufferRange(ctx, 0, ib->obj->Size, GL_MAP_READ_BIT, - ib->obj, MAP_INTERNAL); - } - - if (ib->obj) - ptr = ADD_POINTERS(ib->obj->Mappings[MAP_INTERNAL].Pointer, ib->ptr); - else - ptr = ib->ptr; - - sub_prims = find_sub_primitives(ptr, 1 << ib->index_size_shift, - prims[0].start, prims[0].start + ib->count, - restart_index, &num_sub_prims); - - if (map_ib) { - ctx->Driver.UnmapBuffer(ctx, ib->obj, MAP_INTERNAL); - } - - /* Loop over the primitives, and use the located sub-primitives to draw - * each primitive with a break to implement each primitive restart. - */ - for (prim_num = 0; prim_num < nr_prims; prim_num++) { - end_index = prims[prim_num].start + prims[prim_num].count; - memcpy(&temp_prim, &prims[prim_num], sizeof (temp_prim)); - /* Loop over the sub-primitives drawing sub-ranges of the primitive. */ - for (sub_prim_num = 0; sub_prim_num < num_sub_prims; sub_prim_num++) { - sub_prim = &sub_prims[sub_prim_num]; - sub_end_index = sub_prim->start + sub_prim->count; - if (prims[prim_num].start <= sub_prim->start) { - temp_prim.start = MAX2(prims[prim_num].start, sub_prim->start); - temp_prim.count = MIN2(sub_end_index, end_index) - temp_prim.start; - if ((temp_prim.start == sub_prim->start) && - (temp_prim.count == sub_prim->count)) { - ctx->Driver.Draw(ctx, &temp_prim, 1, ib, true, false, 0, - sub_prim->min_index, sub_prim->max_index, - num_instances, base_instance); - } else { - ctx->Driver.Draw(ctx, &temp_prim, 1, ib, - false, false, 0, -1, -1, - num_instances, base_instance); - } - } - if (sub_end_index >= end_index) { - break; - } - } - } - - free(sub_prims); -} - -static void -vbo_sw_primitive_restart(struct gl_context *ctx, - const struct _mesa_prim *prims, - GLuint nr_prims, - const struct _mesa_index_buffer *ib, - GLuint num_instances, - GLuint base_instance, - struct gl_buffer_object *indirect, - GLsizeiptr indirect_offset, - bool primitive_restart, - unsigned restart_index) -{ - unsigned i; - for (i = 1; i < nr_prims; i++) { - if (prims[i].start != prims[0].start) - break; - } - - vbo_sw_primitive_restart_common_start(ctx, &prims[0], i, ib, - num_instances, base_instance, - indirect, indirect_offset, - primitive_restart, - restart_index); - if (i != nr_prims) { - vbo_sw_primitive_restart(ctx, &prims[i], nr_prims - i, ib, - num_instances, base_instance, - indirect, indirect_offset, - primitive_restart, - restart_index); - } -} - -/** - * Check if the hardware's cut index support can handle the primitive - * restart index value (pre-Haswell only). - */ -static bool -can_cut_index_handle_restart_index(struct gl_context *ctx, - const struct _mesa_index_buffer *ib, - unsigned restart_index) -{ - /* The FixedIndex variant means 0xFF, 0xFFFF, or 0xFFFFFFFF based on - * the index buffer type, which corresponds exactly to the hardware. - */ - if (ctx->Array.PrimitiveRestartFixedIndex) - return true; - - bool cut_index_will_work; - - switch (ib->index_size_shift) { - case 0: - cut_index_will_work = restart_index == 0xff; - break; - case 1: - cut_index_will_work = restart_index == 0xffff; - break; - case 2: - cut_index_will_work = restart_index == 0xffffffff; - break; - default: - unreachable("not reached"); - } - - return cut_index_will_work; -} - -/** - * Check if the hardware's cut index support can handle the primitive - * restart case. - */ -static bool -can_cut_index_handle_prims(struct gl_context *ctx, - const struct _mesa_prim *prim, - GLuint nr_prims, - const struct _mesa_index_buffer *ib, - unsigned restart_index) -{ - struct brw_context *brw = brw_context(ctx); - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - /* Otherwise Haswell can do it all. */ - if (devinfo->verx10 >= 75) - return true; - - if (!can_cut_index_handle_restart_index(ctx, ib, restart_index)) { - /* The primitive restart index can't be handled, so take - * the software path - */ - return false; - } - - for (unsigned i = 0; i < nr_prims; i++) { - switch (prim[i].mode) { - case GL_POINTS: - case GL_LINES: - case GL_LINE_STRIP: - case GL_TRIANGLES: - case GL_TRIANGLE_STRIP: - case GL_LINES_ADJACENCY: - case GL_LINE_STRIP_ADJACENCY: - case GL_TRIANGLES_ADJACENCY: - case GL_TRIANGLE_STRIP_ADJACENCY: - /* Cut index supports these primitive types */ - break; - default: - /* Cut index does not support these primitive types */ - //case GL_LINE_LOOP: - //case GL_TRIANGLE_FAN: - //case GL_QUADS: - //case GL_QUAD_STRIP: - //case GL_POLYGON: - return false; - } - } - - return true; -} - -/** - * Check if primitive restart is enabled, and if so, handle it properly. - * - * In some cases the support will be handled in software. When available - * hardware will handle primitive restart. - */ -GLboolean -brw_handle_primitive_restart(struct gl_context *ctx, - const struct _mesa_prim *prims, - GLuint nr_prims, - const struct _mesa_index_buffer *ib, - GLuint num_instances, GLuint base_instance, - bool primitive_restart, - unsigned restart_index) -{ - struct brw_context *brw = brw_context(ctx); - - /* We only need to handle cases where there is an index buffer. */ - if (ib == NULL) { - return GL_FALSE; - } - - /* If we have set the in_progress flag, then we are in the middle - * of handling the primitive restart draw. - */ - if (brw->prim_restart.in_progress) { - return GL_FALSE; - } - - /* If PrimitiveRestart is not enabled, then we aren't concerned about - * handling this draw. - */ - if (!primitive_restart) { - return GL_FALSE; - } - - /* Signal that we are in the process of handling the - * primitive restart draw - */ - brw->prim_restart.in_progress = true; - - if (can_cut_index_handle_prims(ctx, prims, nr_prims, ib, restart_index)) { - /* Cut index should work for primitive restart, so use it - */ - brw->prim_restart.enable_cut_index = true; - brw->prim_restart.restart_index = restart_index; - brw_draw_prims(ctx, prims, nr_prims, ib, false, primitive_restart, - restart_index, -1, -1, - num_instances, base_instance); - brw->prim_restart.enable_cut_index = false; - } else { - /* Not all the primitive draw modes are supported by the cut index, - * so take the software path - */ - struct gl_buffer_object *indirect_data = brw->draw.draw_indirect_data; - - /* Clear this to make the draw direct. */ - brw->draw.draw_indirect_data = NULL; - - vbo_sw_primitive_restart(ctx, prims, nr_prims, ib, num_instances, - base_instance, indirect_data, - brw->draw.draw_indirect_offset, - primitive_restart, restart_index); - } - - brw->prim_restart.in_progress = false; - - /* The primitive restart draw was completed, so return true. */ - return GL_TRUE; -} diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c deleted file mode 100644 index cbce06c..0000000 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ /dev/null @@ -1,888 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - -#include -#include "main/glspirv.h" -#include "program/prog_parameter.h" -#include "program/prog_print.h" -#include "program/prog_to_nir.h" -#include "program/program.h" -#include "program/programopt.h" -#include "tnl/tnl.h" -#include "util/ralloc.h" -#include "compiler/glsl/ir.h" -#include "compiler/glsl/program.h" -#include "compiler/glsl/gl_nir.h" -#include "compiler/glsl/glsl_to_nir.h" - -#include "brw_program.h" -#include "brw_context.h" -#include "compiler/brw_nir.h" -#include "brw_defines.h" -#include "brw_batch.h" - -#include "brw_cs.h" -#include "brw_gs.h" -#include "brw_vs.h" -#include "brw_wm.h" -#include "brw_state.h" - -#include "main/shaderapi.h" -#include "main/shaderobj.h" - -static bool -brw_nir_lower_uniforms(nir_shader *nir, bool is_scalar) -{ - if (is_scalar) { - nir_assign_var_locations(nir, nir_var_uniform, &nir->num_uniforms, - type_size_scalar_bytes); - return nir_lower_io(nir, nir_var_uniform, type_size_scalar_bytes, 0); - } else { - nir_assign_var_locations(nir, nir_var_uniform, &nir->num_uniforms, - type_size_vec4_bytes); - return nir_lower_io(nir, nir_var_uniform, type_size_vec4_bytes, 0); - } -} - -static struct gl_program *brw_new_program(struct gl_context *ctx, - gl_shader_stage stage, - GLuint id, bool is_arb_asm); - -nir_shader * -brw_create_nir(struct brw_context *brw, - const struct gl_shader_program *shader_prog, - struct gl_program *prog, - gl_shader_stage stage, - bool is_scalar) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - struct gl_context *ctx = &brw->ctx; - const nir_shader_compiler_options *options = - ctx->Const.ShaderCompilerOptions[stage].NirOptions; - nir_shader *nir; - - /* First, lower the GLSL/Mesa IR or SPIR-V to NIR */ - if (shader_prog) { - if (shader_prog->data->spirv) { - nir = _mesa_spirv_to_nir(ctx, shader_prog, stage, options); - } else { - nir = glsl_to_nir(ctx, shader_prog, stage, options); - - /* Remap the locations to slots so those requiring two slots will - * occupy two locations. For instance, if we have in the IR code a - * dvec3 attr0 in location 0 and vec4 attr1 in location 1, in NIR attr0 - * will use locations/slots 0 and 1, and attr1 will use location/slot 2 - */ - if (nir->info.stage == MESA_SHADER_VERTEX) - nir_remap_dual_slot_attributes(nir, &prog->DualSlotInputs); - } - assert (nir); - - nir_remove_dead_variables(nir, nir_var_shader_in | nir_var_shader_out, - NULL); - nir_validate_shader(nir, "after glsl_to_nir or spirv_to_nir"); - NIR_PASS_V(nir, nir_lower_io_to_temporaries, - nir_shader_get_entrypoint(nir), true, false); - } else { - nir = prog_to_nir(prog, options); - NIR_PASS_V(nir, nir_lower_regs_to_ssa); /* turn registers into SSA */ - } - nir_validate_shader(nir, "before brw_preprocess_nir"); - - nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); - - if (!ctx->SoftFP64 && ((nir->info.bit_sizes_int | nir->info.bit_sizes_float) & 64) && - (options->lower_doubles_options & nir_lower_fp64_full_software)) { - ctx->SoftFP64 = glsl_float64_funcs_to_nir(ctx, options); - } - - brw_preprocess_nir(brw->screen->compiler, nir, ctx->SoftFP64); - - if (stage == MESA_SHADER_TESS_CTRL) { - /* Lower gl_PatchVerticesIn from a sys. value to a uniform on Gfx8+. */ - static const gl_state_index16 tokens[STATE_LENGTH] = - { STATE_TCS_PATCH_VERTICES_IN }; - nir_lower_patch_vertices(nir, 0, devinfo->ver >= 8 ? tokens : NULL); - } - - if (stage == MESA_SHADER_TESS_EVAL) { - /* Lower gl_PatchVerticesIn to a constant if we have a TCS, or - * a uniform if we don't. - */ - struct gl_linked_shader *tcs = - shader_prog->_LinkedShaders[MESA_SHADER_TESS_CTRL]; - uint32_t static_patch_vertices = - tcs ? tcs->Program->nir->info.tess.tcs_vertices_out : 0; - static const gl_state_index16 tokens[STATE_LENGTH] = - { STATE_TES_PATCH_VERTICES_IN }; - nir_lower_patch_vertices(nir, static_patch_vertices, tokens); - } - - if (stage == MESA_SHADER_FRAGMENT) { - static const struct nir_lower_wpos_ytransform_options wpos_options = { - .state_tokens = {STATE_FB_WPOS_Y_TRANSFORM, 0, 0}, - .fs_coord_pixel_center_integer = 1, - .fs_coord_origin_upper_left = 1, - }; - - bool progress = false; - NIR_PASS(progress, nir, nir_lower_wpos_ytransform, &wpos_options); - if (progress) { - _mesa_add_state_reference(prog->Parameters, - wpos_options.state_tokens); - } - } - - return nir; -} - -static void -shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align) -{ - assert(glsl_type_is_vector_or_scalar(type)); - - uint32_t comp_size = glsl_type_is_boolean(type) - ? 4 : glsl_get_bit_size(type) / 8; - unsigned length = glsl_get_vector_elements(type); - *size = comp_size * length, - *align = comp_size * (length == 3 ? 4 : length); -} - -void -brw_nir_lower_resources(nir_shader *nir, struct gl_shader_program *shader_prog, - struct gl_program *prog, - const struct intel_device_info *devinfo) -{ - NIR_PASS_V(nir, brw_nir_lower_uniforms, nir->options->lower_to_scalar); - NIR_PASS_V(prog->nir, gl_nir_lower_samplers, shader_prog); - BITSET_COPY(prog->info.textures_used, prog->nir->info.textures_used); - BITSET_COPY(prog->info.textures_used_by_txf, prog->nir->info.textures_used_by_txf); - - NIR_PASS_V(prog->nir, brw_nir_lower_storage_image, devinfo); - - if (prog->nir->info.stage == MESA_SHADER_COMPUTE && - shader_prog->data->spirv) { - NIR_PASS_V(prog->nir, nir_lower_vars_to_explicit_types, - nir_var_mem_shared, shared_type_info); - NIR_PASS_V(prog->nir, nir_lower_explicit_io, - nir_var_mem_shared, nir_address_format_32bit_offset); - } - - NIR_PASS_V(prog->nir, gl_nir_lower_buffers, shader_prog); - /* Do a round of constant folding to clean up address calculations */ - NIR_PASS_V(prog->nir, nir_opt_constant_folding); -} - -void -brw_shader_gather_info(nir_shader *nir, struct gl_program *prog) -{ - nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); - - /* Copy the info we just generated back into the gl_program */ - const char *prog_name = prog->info.name; - const char *prog_label = prog->info.label; - prog->info = nir->info; - prog->info.name = prog_name; - prog->info.label = prog_label; -} - -static unsigned -get_new_program_id(struct brw_screen *screen) -{ - return p_atomic_inc_return(&screen->program_id); -} - -static struct gl_program * -brw_new_program(struct gl_context *ctx, - gl_shader_stage stage, - GLuint id, bool is_arb_asm) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_program *prog = rzalloc(NULL, struct brw_program); - - if (prog) { - prog->id = get_new_program_id(brw->screen); - - return _mesa_init_gl_program(&prog->program, stage, id, is_arb_asm); - } - - return NULL; -} - -static void -brw_delete_program(struct gl_context *ctx, struct gl_program *prog) -{ - struct brw_context *brw = brw_context(ctx); - - /* Beware! prog's refcount has reached zero, and it's about to be freed. - * - * In brw_upload_pipeline_state(), we compare brw->programs[i] to - * ctx->FooProgram._Current, and flag BRW_NEW_FOO_PROGRAM if the - * pointer has changed. - * - * We cannot leave brw->programs[i] as a dangling pointer to the dead - * program. malloc() may allocate the same memory for a new gl_program, - * causing us to see matching pointers...but totally different programs. - * - * We cannot set brw->programs[i] to NULL, either. If we've deleted the - * active program, Mesa may set ctx->FooProgram._Current to NULL. That - * would cause us to see matching pointers (NULL == NULL), and fail to - * detect that a program has changed since our last draw. - * - * So, set it to a bogus gl_program pointer that will never match, - * causing us to properly reevaluate the state on our next draw. - * - * Getting this wrong causes heisenbugs which are very hard to catch, - * as you need a very specific allocation pattern to hit the problem. - */ - static const struct gl_program deleted_program; - - for (int i = 0; i < MESA_SHADER_STAGES; i++) { - if (brw->programs[i] == prog) - brw->programs[i] = (struct gl_program *) &deleted_program; - } - - _mesa_delete_program( ctx, prog ); -} - - -static GLboolean -brw_program_string_notify(struct gl_context *ctx, - GLenum target, - struct gl_program *prog) -{ - assert(target == GL_VERTEX_PROGRAM_ARB || !prog->arb.IsPositionInvariant); - - struct brw_context *brw = brw_context(ctx); - const struct brw_compiler *compiler = brw->screen->compiler; - - switch (target) { - case GL_FRAGMENT_PROGRAM_ARB: { - struct brw_program *newFP = brw_program(prog); - const struct brw_program *curFP = - brw_program_const(brw->programs[MESA_SHADER_FRAGMENT]); - - if (newFP == curFP) - brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM; - _mesa_program_fragment_position_to_sysval(&newFP->program); - newFP->id = get_new_program_id(brw->screen); - - prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_FRAGMENT, true); - - brw_nir_lower_resources(prog->nir, NULL, prog, &brw->screen->devinfo); - - brw_shader_gather_info(prog->nir, prog); - - brw_fs_precompile(ctx, prog); - break; - } - case GL_VERTEX_PROGRAM_ARB: { - struct brw_program *newVP = brw_program(prog); - const struct brw_program *curVP = - brw_program_const(brw->programs[MESA_SHADER_VERTEX]); - - if (newVP == curVP) - brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM; - if (newVP->program.arb.IsPositionInvariant) { - _mesa_insert_mvp_code(ctx, &newVP->program); - } - newVP->id = get_new_program_id(brw->screen); - - /* Also tell tnl about it: - */ - _tnl_program_string(ctx, target, prog); - - prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_VERTEX, - compiler->scalar_stage[MESA_SHADER_VERTEX]); - - brw_nir_lower_resources(prog->nir, NULL, prog, &brw->screen->devinfo); - - brw_shader_gather_info(prog->nir, prog); - - brw_vs_precompile(ctx, prog); - break; - } - default: - /* - * driver->ProgramStringNotify is only called for ARB programs, fixed - * function vertex programs, and ir_to_mesa (which isn't used by the - * i965 back-end). Therefore, even after geometry shaders are added, - * this function should only ever be called with a target of - * GL_VERTEX_PROGRAM_ARB or GL_FRAGMENT_PROGRAM_ARB. - */ - unreachable("Unexpected target in brwProgramStringNotify"); - } - - return true; -} - -static void -brw_memory_barrier(struct gl_context *ctx, GLbitfield barriers) -{ - struct brw_context *brw = brw_context(ctx); - const struct intel_device_info *devinfo = &brw->screen->devinfo; - unsigned bits = PIPE_CONTROL_DATA_CACHE_FLUSH | PIPE_CONTROL_CS_STALL; - assert(devinfo->ver >= 7 && devinfo->ver <= 11); - - if (barriers & (GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT | - GL_ELEMENT_ARRAY_BARRIER_BIT | - GL_COMMAND_BARRIER_BIT)) - bits |= PIPE_CONTROL_VF_CACHE_INVALIDATE; - - if (barriers & GL_UNIFORM_BARRIER_BIT) - bits |= (PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | - PIPE_CONTROL_CONST_CACHE_INVALIDATE); - - if (barriers & GL_TEXTURE_FETCH_BARRIER_BIT) - bits |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; - - if (barriers & (GL_TEXTURE_UPDATE_BARRIER_BIT | - GL_PIXEL_BUFFER_BARRIER_BIT)) - bits |= (PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | - PIPE_CONTROL_RENDER_TARGET_FLUSH); - - if (barriers & GL_FRAMEBUFFER_BARRIER_BIT) - bits |= (PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | - PIPE_CONTROL_RENDER_TARGET_FLUSH); - - /* Typed surface messages are handled by the render cache on IVB, so we - * need to flush it too. - */ - if (devinfo->verx10 == 70) - bits |= PIPE_CONTROL_RENDER_TARGET_FLUSH; - - brw_emit_pipe_control_flush(brw, bits); -} - -static void -brw_framebuffer_fetch_barrier(struct gl_context *ctx) -{ - struct brw_context *brw = brw_context(ctx); - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - if (!ctx->Extensions.EXT_shader_framebuffer_fetch) { - if (devinfo->ver >= 6) { - brw_emit_pipe_control_flush(brw, - PIPE_CONTROL_RENDER_TARGET_FLUSH | - PIPE_CONTROL_CS_STALL); - brw_emit_pipe_control_flush(brw, - PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE); - } else { - brw_emit_pipe_control_flush(brw, - PIPE_CONTROL_RENDER_TARGET_FLUSH); - } - } -} - -void -brw_get_scratch_bo(struct brw_context *brw, - struct brw_bo **scratch_bo, int size) -{ - struct brw_bo *old_bo = *scratch_bo; - - if (old_bo && old_bo->size < size) { - brw_bo_unreference(old_bo); - old_bo = NULL; - } - - if (!old_bo) { - *scratch_bo = - brw_bo_alloc(brw->bufmgr, "scratch bo", size, BRW_MEMZONE_SCRATCH); - } -} - -/** - * Reserve enough scratch space for the given stage to hold \p per_thread_size - * bytes times the given \p thread_count. - */ -void -brw_alloc_stage_scratch(struct brw_context *brw, - struct brw_stage_state *stage_state, - unsigned per_thread_size) -{ - if (stage_state->per_thread_scratch >= per_thread_size) - return; - - stage_state->per_thread_scratch = per_thread_size; - - if (stage_state->scratch_bo) - brw_bo_unreference(stage_state->scratch_bo); - - const struct intel_device_info *devinfo = &brw->screen->devinfo; - assert(stage_state->stage < ARRAY_SIZE(devinfo->max_scratch_ids)); - unsigned max_ids = devinfo->max_scratch_ids[stage_state->stage]; - stage_state->scratch_bo = - brw_bo_alloc(brw->bufmgr, "shader scratch space", - per_thread_size * max_ids, BRW_MEMZONE_SCRATCH); -} - -void -brw_init_frag_prog_functions(struct dd_function_table *functions) -{ - assert(functions->ProgramStringNotify == _tnl_program_string); - - functions->NewProgram = brw_new_program; - functions->DeleteProgram = brw_delete_program; - functions->ProgramStringNotify = brw_program_string_notify; - - functions->LinkShader = brw_link_shader; - - functions->MemoryBarrier = brw_memory_barrier; - functions->FramebufferFetchBarrier = brw_framebuffer_fetch_barrier; -} - -struct shader_times { - uint64_t time; - uint64_t written; - uint64_t reset; -}; - -void -brw_init_shader_time(struct brw_context *brw) -{ - const int max_entries = 2048; - brw->shader_time.bo = - brw_bo_alloc(brw->bufmgr, "shader time", - max_entries * BRW_SHADER_TIME_STRIDE * 3, - BRW_MEMZONE_OTHER); - brw->shader_time.names = rzalloc_array(brw, const char *, max_entries); - brw->shader_time.ids = rzalloc_array(brw, int, max_entries); - brw->shader_time.types = rzalloc_array(brw, enum shader_time_shader_type, - max_entries); - brw->shader_time.cumulative = rzalloc_array(brw, struct shader_times, - max_entries); - brw->shader_time.max_entries = max_entries; -} - -static int -compare_time(const void *a, const void *b) -{ - uint64_t * const *a_val = a; - uint64_t * const *b_val = b; - - /* We don't just subtract because we're turning the value to an int. */ - if (**a_val < **b_val) - return -1; - else if (**a_val == **b_val) - return 0; - else - return 1; -} - -static void -print_shader_time_line(const char *stage, const char *name, - int shader_num, uint64_t time, uint64_t total) -{ - fprintf(stderr, "%-6s%-18s", stage, name); - - if (shader_num != 0) - fprintf(stderr, "%4d: ", shader_num); - else - fprintf(stderr, " : "); - - fprintf(stderr, "%16lld (%7.2f Gcycles) %4.1f%%\n", - (long long)time, - (double)time / 1000000000.0, - (double)time / total * 100.0); -} - -static void -brw_report_shader_time(struct brw_context *brw) -{ - if (!brw->shader_time.bo || !brw->shader_time.num_entries) - return; - - uint64_t scaled[brw->shader_time.num_entries]; - uint64_t *sorted[brw->shader_time.num_entries]; - uint64_t total_by_type[ST_CS + 1]; - memset(total_by_type, 0, sizeof(total_by_type)); - double total = 0; - for (int i = 0; i < brw->shader_time.num_entries; i++) { - uint64_t written = 0, reset = 0; - enum shader_time_shader_type type = brw->shader_time.types[i]; - - sorted[i] = &scaled[i]; - - switch (type) { - case ST_VS: - case ST_TCS: - case ST_TES: - case ST_GS: - case ST_FS8: - case ST_FS16: - case ST_FS32: - case ST_CS: - written = brw->shader_time.cumulative[i].written; - reset = brw->shader_time.cumulative[i].reset; - break; - - default: - /* I sometimes want to print things that aren't the 3 shader times. - * Just print the sum in that case. - */ - written = 1; - reset = 0; - break; - } - - uint64_t time = brw->shader_time.cumulative[i].time; - if (written) { - scaled[i] = time / written * (written + reset); - } else { - scaled[i] = time; - } - - switch (type) { - case ST_VS: - case ST_TCS: - case ST_TES: - case ST_GS: - case ST_FS8: - case ST_FS16: - case ST_FS32: - case ST_CS: - total_by_type[type] += scaled[i]; - break; - default: - break; - } - - total += scaled[i]; - } - - if (total == 0) { - fprintf(stderr, "No shader time collected yet\n"); - return; - } - - qsort(sorted, brw->shader_time.num_entries, sizeof(sorted[0]), compare_time); - - fprintf(stderr, "\n"); - fprintf(stderr, "type ID cycles spent %% of total\n"); - for (int s = 0; s < brw->shader_time.num_entries; s++) { - const char *stage; - /* Work back from the sorted pointers times to a time to print. */ - int i = sorted[s] - scaled; - - if (scaled[i] == 0) - continue; - - int shader_num = brw->shader_time.ids[i]; - const char *shader_name = brw->shader_time.names[i]; - - switch (brw->shader_time.types[i]) { - case ST_VS: - stage = "vs"; - break; - case ST_TCS: - stage = "tcs"; - break; - case ST_TES: - stage = "tes"; - break; - case ST_GS: - stage = "gs"; - break; - case ST_FS8: - stage = "fs8"; - break; - case ST_FS16: - stage = "fs16"; - break; - case ST_FS32: - stage = "fs32"; - break; - case ST_CS: - stage = "cs"; - break; - default: - stage = "other"; - break; - } - - print_shader_time_line(stage, shader_name, shader_num, - scaled[i], total); - } - - fprintf(stderr, "\n"); - print_shader_time_line("total", "vs", 0, total_by_type[ST_VS], total); - print_shader_time_line("total", "tcs", 0, total_by_type[ST_TCS], total); - print_shader_time_line("total", "tes", 0, total_by_type[ST_TES], total); - print_shader_time_line("total", "gs", 0, total_by_type[ST_GS], total); - print_shader_time_line("total", "fs8", 0, total_by_type[ST_FS8], total); - print_shader_time_line("total", "fs16", 0, total_by_type[ST_FS16], total); - print_shader_time_line("total", "fs32", 0, total_by_type[ST_FS32], total); - print_shader_time_line("total", "cs", 0, total_by_type[ST_CS], total); -} - -static void -brw_collect_shader_time(struct brw_context *brw) -{ - if (!brw->shader_time.bo) - return; - - /* This probably stalls on the last rendering. We could fix that by - * delaying reading the reports, but it doesn't look like it's a big - * overhead compared to the cost of tracking the time in the first place. - */ - void *bo_map = brw_bo_map(brw, brw->shader_time.bo, MAP_READ | MAP_WRITE); - - for (int i = 0; i < brw->shader_time.num_entries; i++) { - uint32_t *times = bo_map + i * 3 * BRW_SHADER_TIME_STRIDE; - - brw->shader_time.cumulative[i].time += times[BRW_SHADER_TIME_STRIDE * 0 / 4]; - brw->shader_time.cumulative[i].written += times[BRW_SHADER_TIME_STRIDE * 1 / 4]; - brw->shader_time.cumulative[i].reset += times[BRW_SHADER_TIME_STRIDE * 2 / 4]; - } - - /* Zero the BO out to clear it out for our next collection. - */ - memset(bo_map, 0, brw->shader_time.bo->size); - brw_bo_unmap(brw->shader_time.bo); -} - -void -brw_collect_and_report_shader_time(struct brw_context *brw) -{ - brw_collect_shader_time(brw); - - if (brw->shader_time.report_time == 0 || - get_time() - brw->shader_time.report_time >= 1.0) { - brw_report_shader_time(brw); - brw->shader_time.report_time = get_time(); - } -} - -/** - * Chooses an index in the shader_time buffer and sets up tracking information - * for our printouts. - * - * Note that this holds on to references to the underlying programs, which may - * change their lifetimes compared to normal operation. - */ -int -brw_get_shader_time_index(struct brw_context *brw, struct gl_program *prog, - enum shader_time_shader_type type, bool is_glsl_sh) -{ - int shader_time_index = brw->shader_time.num_entries++; - assert(shader_time_index < brw->shader_time.max_entries); - brw->shader_time.types[shader_time_index] = type; - - const char *name; - if (prog->Id == 0) { - name = "ff"; - } else if (is_glsl_sh) { - name = prog->info.label ? - ralloc_strdup(brw->shader_time.names, prog->info.label) : "glsl"; - } else { - name = "prog"; - } - - brw->shader_time.names[shader_time_index] = name; - brw->shader_time.ids[shader_time_index] = prog->Id; - - return shader_time_index; -} - -void -brw_destroy_shader_time(struct brw_context *brw) -{ - brw_bo_unreference(brw->shader_time.bo); - brw->shader_time.bo = NULL; -} - -void -brw_stage_prog_data_free(const void *p) -{ - struct brw_stage_prog_data *prog_data = (struct brw_stage_prog_data *)p; - - ralloc_free(prog_data->param); - ralloc_free(prog_data->pull_param); -} - -void -brw_dump_arb_asm(const char *stage, struct gl_program *prog) -{ - fprintf(stderr, "ARB_%s_program %d ir for native %s shader\n", - stage, prog->Id, stage); - _mesa_print_program(prog); -} - -void -brw_setup_tex_for_precompile(const struct intel_device_info *devinfo, - struct brw_sampler_prog_key_data *tex, - const struct gl_program *prog) -{ - const bool has_shader_channel_select = devinfo->verx10 >= 75; - unsigned sampler_count = util_last_bit(prog->SamplersUsed); - for (unsigned i = 0; i < sampler_count; i++) { - if (!has_shader_channel_select && (prog->ShadowSamplers & (1 << i))) { - /* Assume DEPTH_TEXTURE_MODE is the default: X, X, X, 1 */ - tex->swizzles[i] = - MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_ONE); - } else { - /* Color sampler: assume no swizzling. */ - tex->swizzles[i] = SWIZZLE_XYZW; - } - } -} - -/** - * Sets up the starting offsets for the groups of binding table entries - * common to all pipeline stages. - * - * Unused groups are initialized to 0xd0d0d0d0 to make it obvious that they're - * unused but also make sure that addition of small offsets to them will - * trigger some of our asserts that surface indices are < BRW_MAX_SURFACES. - */ -uint32_t -brw_assign_common_binding_table_offsets(const struct intel_device_info *devinfo, - const struct gl_program *prog, - struct brw_stage_prog_data *stage_prog_data, - uint32_t next_binding_table_offset) -{ - int num_textures = util_last_bit(prog->SamplersUsed); - - stage_prog_data->binding_table.texture_start = next_binding_table_offset; - next_binding_table_offset += num_textures; - - if (prog->info.num_ubos) { - assert(prog->info.num_ubos <= BRW_MAX_UBO); - stage_prog_data->binding_table.ubo_start = next_binding_table_offset; - next_binding_table_offset += prog->info.num_ubos; - } else { - stage_prog_data->binding_table.ubo_start = 0xd0d0d0d0; - } - - if (prog->info.num_ssbos || prog->info.num_abos) { - assert(prog->info.num_abos <= BRW_MAX_ABO); - assert(prog->info.num_ssbos <= BRW_MAX_SSBO); - stage_prog_data->binding_table.ssbo_start = next_binding_table_offset; - next_binding_table_offset += prog->info.num_abos + prog->info.num_ssbos; - } else { - stage_prog_data->binding_table.ssbo_start = 0xd0d0d0d0; - } - - if (INTEL_DEBUG(DEBUG_SHADER_TIME)) { - stage_prog_data->binding_table.shader_time_start = next_binding_table_offset; - next_binding_table_offset++; - } else { - stage_prog_data->binding_table.shader_time_start = 0xd0d0d0d0; - } - - if (prog->info.uses_texture_gather) { - if (devinfo->ver >= 8) { - stage_prog_data->binding_table.gather_texture_start = - stage_prog_data->binding_table.texture_start; - } else { - stage_prog_data->binding_table.gather_texture_start = next_binding_table_offset; - next_binding_table_offset += num_textures; - } - } else { - stage_prog_data->binding_table.gather_texture_start = 0xd0d0d0d0; - } - - if (prog->info.num_images) { - stage_prog_data->binding_table.image_start = next_binding_table_offset; - next_binding_table_offset += prog->info.num_images; - } else { - stage_prog_data->binding_table.image_start = 0xd0d0d0d0; - } - - /* This may or may not be used depending on how the compile goes. */ - stage_prog_data->binding_table.pull_constants_start = next_binding_table_offset; - next_binding_table_offset++; - - /* Plane 0 is just the regular texture section */ - stage_prog_data->binding_table.plane_start[0] = stage_prog_data->binding_table.texture_start; - - stage_prog_data->binding_table.plane_start[1] = next_binding_table_offset; - next_binding_table_offset += num_textures; - - stage_prog_data->binding_table.plane_start[2] = next_binding_table_offset; - next_binding_table_offset += num_textures; - - /* Set the binding table size. Some callers may append new entries - * and increase this accordingly. - */ - stage_prog_data->binding_table.size_bytes = next_binding_table_offset * 4; - - assert(next_binding_table_offset <= BRW_MAX_SURFACES); - return next_binding_table_offset; -} - -void -brw_populate_default_key(const struct brw_compiler *compiler, - union brw_any_prog_key *prog_key, - struct gl_shader_program *sh_prog, - struct gl_program *prog) -{ - switch (prog->info.stage) { - case MESA_SHADER_VERTEX: - brw_vs_populate_default_key(compiler, &prog_key->vs, prog); - break; - case MESA_SHADER_TESS_CTRL: - brw_tcs_populate_default_key(compiler, &prog_key->tcs, sh_prog, prog); - break; - case MESA_SHADER_TESS_EVAL: - brw_tes_populate_default_key(compiler, &prog_key->tes, sh_prog, prog); - break; - case MESA_SHADER_GEOMETRY: - brw_gs_populate_default_key(compiler, &prog_key->gs, prog); - break; - case MESA_SHADER_FRAGMENT: - brw_wm_populate_default_key(compiler, &prog_key->wm, prog); - break; - case MESA_SHADER_COMPUTE: - brw_cs_populate_default_key(compiler, &prog_key->cs, prog); - break; - default: - unreachable("Unsupported stage!"); - } -} - -void -brw_debug_recompile(struct brw_context *brw, - gl_shader_stage stage, - unsigned api_id, - struct brw_base_prog_key *key) -{ - const struct brw_compiler *compiler = brw->screen->compiler; - enum brw_cache_id cache_id = brw_stage_cache_id(stage); - - brw_shader_perf_log(compiler, brw, "Recompiling %s shader for program %d\n", - _mesa_shader_stage_to_string(stage), api_id); - - const void *old_key = - brw_find_previous_compile(&brw->cache, cache_id, key->program_string_id); - - brw_debug_key_recompile(compiler, brw, stage, old_key, key); -} diff --git a/src/mesa/drivers/dri/i965/brw_program.h b/src/mesa/drivers/dri/i965/brw_program.h deleted file mode 100644 index 965ec1a..0000000 --- a/src/mesa/drivers/dri/i965/brw_program.h +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Copyright © 2011 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#ifndef BRW_PROGRAM_H -#define BRW_PROGRAM_H - -#include "compiler/brw_compiler.h" -#include "nir.h" - -#ifdef __cplusplus -extern "C" { -#endif - -struct brw_context; -struct blob; -struct blob_reader; - -enum brw_param_domain { - BRW_PARAM_DOMAIN_BUILTIN = 0, - BRW_PARAM_DOMAIN_PARAMETER, - BRW_PARAM_DOMAIN_UNIFORM, - BRW_PARAM_DOMAIN_IMAGE, -}; - -#define BRW_PARAM(domain, val) (BRW_PARAM_DOMAIN_##domain << 24 | (val)) -#define BRW_PARAM_DOMAIN(param) ((uint32_t)(param) >> 24) -#define BRW_PARAM_VALUE(param) ((uint32_t)(param) & 0x00ffffff) - -#define BRW_PARAM_PARAMETER(idx, comp) \ - BRW_PARAM(PARAMETER, ((idx) << 2) | (comp)) -#define BRW_PARAM_PARAMETER_IDX(param) (BRW_PARAM_VALUE(param) >> 2) -#define BRW_PARAM_PARAMETER_COMP(param) (BRW_PARAM_VALUE(param) & 0x3) - -#define BRW_PARAM_UNIFORM(idx) BRW_PARAM(UNIFORM, (idx)) -#define BRW_PARAM_UNIFORM_IDX(param) BRW_PARAM_VALUE(param) - -#define BRW_PARAM_IMAGE(idx, offset) BRW_PARAM(IMAGE, ((idx) << 8) | (offset)) -#define BRW_PARAM_IMAGE_IDX(value) (BRW_PARAM_VALUE(value) >> 8) -#define BRW_PARAM_IMAGE_OFFSET(value) (BRW_PARAM_VALUE(value) & 0xf) - -struct nir_shader *brw_create_nir(struct brw_context *brw, - const struct gl_shader_program *shader_prog, - struct gl_program *prog, - gl_shader_stage stage, - bool is_scalar); - -void brw_nir_lower_resources(nir_shader *nir, - struct gl_shader_program *shader_prog, - struct gl_program *prog, - const struct intel_device_info *devinfo); - -void brw_shader_gather_info(nir_shader *nir, struct gl_program *prog); - -void brw_setup_tex_for_precompile(const struct intel_device_info *devinfo, - struct brw_sampler_prog_key_data *tex, - const struct gl_program *prog); - -void brw_populate_base_prog_key(struct gl_context *ctx, - const struct brw_program *prog, - struct brw_base_prog_key *key); -void brw_populate_default_base_prog_key(const struct intel_device_info *devinfo, - const struct brw_program *prog, - struct brw_base_prog_key *key); -void brw_debug_recompile(struct brw_context *brw, gl_shader_stage stage, - unsigned api_id, struct brw_base_prog_key *key); - -uint32_t -brw_assign_common_binding_table_offsets(const struct intel_device_info *devinfo, - const struct gl_program *prog, - struct brw_stage_prog_data *stage_prog_data, - uint32_t next_binding_table_offset); - -void -brw_populate_default_key(const struct brw_compiler *compiler, - union brw_any_prog_key *prog_key, - struct gl_shader_program *sh_prog, - struct gl_program *prog); - -void -brw_stage_prog_data_free(const void *prog_data); - -void -brw_dump_arb_asm(const char *stage, struct gl_program *prog); - -bool brw_vs_precompile(struct gl_context *ctx, struct gl_program *prog); -bool brw_tcs_precompile(struct gl_context *ctx, - struct gl_shader_program *shader_prog, - struct gl_program *prog); -bool brw_tes_precompile(struct gl_context *ctx, - struct gl_shader_program *shader_prog, - struct gl_program *prog); -bool brw_gs_precompile(struct gl_context *ctx, struct gl_program *prog); -bool brw_fs_precompile(struct gl_context *ctx, struct gl_program *prog); -bool brw_cs_precompile(struct gl_context *ctx, struct gl_program *prog); - -GLboolean brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog); - -void brw_upload_tcs_prog(struct brw_context *brw); -void brw_tcs_populate_key(struct brw_context *brw, - struct brw_tcs_prog_key *key); -void brw_tcs_populate_default_key(const struct brw_compiler *compiler, - struct brw_tcs_prog_key *key, - struct gl_shader_program *sh_prog, - struct gl_program *prog); -void brw_upload_tes_prog(struct brw_context *brw); -void brw_tes_populate_key(struct brw_context *brw, - struct brw_tes_prog_key *key); -void brw_tes_populate_default_key(const struct brw_compiler *compiler, - struct brw_tes_prog_key *key, - struct gl_shader_program *sh_prog, - struct gl_program *prog); - -void brw_write_blob_program_data(struct blob *binary, gl_shader_stage stage, - const void *program, - struct brw_stage_prog_data *prog_data); -bool brw_read_blob_program_data(struct blob_reader *binary, - struct gl_program *prog, gl_shader_stage stage, - const uint8_t **program, - struct brw_stage_prog_data *prog_data); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif diff --git a/src/mesa/drivers/dri/i965/brw_program_binary.c b/src/mesa/drivers/dri/i965/brw_program_binary.c deleted file mode 100644 index 919ddd3..0000000 --- a/src/mesa/drivers/dri/i965/brw_program_binary.c +++ /dev/null @@ -1,353 +0,0 @@ -/* - * Copyright (c) 2017 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include - -#include "compiler/nir/nir_serialize.h" -#include "util/build_id.h" -#include "util/mesa-sha1.h" - -#include "brw_context.h" -#include "brw_program.h" -#include "brw_state.h" - -static uint8_t driver_sha1[20]; - -void -brw_program_binary_init(unsigned device_id) -{ - const struct build_id_note *note = - build_id_find_nhdr_for_addr(brw_program_binary_init); - assert(note); - - /** - * With Mesa's megadrivers, taking the sha1 of i965_dri.so may not be - * unique. Therefore, we make a sha1 of the "i965" string and the sha1 - * build id from i965_dri.so. - */ - struct mesa_sha1 ctx; - _mesa_sha1_init(&ctx); - char renderer[10]; - assert(device_id < 0x10000); - int len = snprintf(renderer, sizeof(renderer), "i965_%04x", device_id); - assert(len == sizeof(renderer) - 1); - _mesa_sha1_update(&ctx, renderer, len); - _mesa_sha1_update(&ctx, build_id_data(note), build_id_length(note)); - _mesa_sha1_final(&ctx, driver_sha1); -} - -void -brw_get_program_binary_driver_sha1(struct gl_context *ctx, uint8_t *sha1) -{ - memcpy(sha1, driver_sha1, sizeof(uint8_t) * 20); -} - -enum driver_cache_blob_part { - END_PART, - INTEL_PART, - NIR_PART, -}; - -static bool -blob_parts_valid(void *blob, uint32_t size) -{ - struct blob_reader reader; - blob_reader_init(&reader, blob, size); - - do { - uint32_t part_type = blob_read_uint32(&reader); - if (reader.overrun) - return false; - if (part_type == END_PART) - return reader.current == reader.end; - switch ((enum driver_cache_blob_part)part_type) { - case INTEL_PART: - case NIR_PART: - /* Read the uint32_t part-size and skip over it */ - blob_skip_bytes(&reader, blob_read_uint32(&reader)); - if (reader.overrun) - return false; - break; - default: - return false; - } - } while (true); -} - -static bool -blob_has_part(void *blob, uint32_t size, enum driver_cache_blob_part part) -{ - struct blob_reader reader; - blob_reader_init(&reader, blob, size); - - assert(blob_parts_valid(blob, size)); - do { - uint32_t part_type = blob_read_uint32(&reader); - if (part_type == END_PART) - return false; - if (part_type == part) - return true; - blob_skip_bytes(&reader, blob_read_uint32(&reader)); - } while (true); -} - -static bool -driver_blob_is_ready(void *blob, uint32_t size, bool with_intel_program) -{ - if (!blob) { - return false; - } else if (!blob_parts_valid(blob, size)) { - unreachable("Driver blob format is bad!"); - return false; - } else if (blob_has_part(blob, size, INTEL_PART) == with_intel_program) { - return true; - } else { - return false; - } -} - -static void -serialize_nir_part(struct blob *writer, struct gl_program *prog) -{ - blob_write_uint32(writer, NIR_PART); - intptr_t size_offset = blob_reserve_uint32(writer); - size_t nir_start = writer->size; - nir_serialize(writer, prog->nir, false); - blob_overwrite_uint32(writer, size_offset, writer->size - nir_start); -} - -void -brw_program_serialize_nir(struct gl_context *ctx, struct gl_program *prog) -{ - if (driver_blob_is_ready(prog->driver_cache_blob, - prog->driver_cache_blob_size, false)) - return; - - if (prog->driver_cache_blob) - ralloc_free(prog->driver_cache_blob); - - struct blob writer; - blob_init(&writer); - serialize_nir_part(&writer, prog); - blob_write_uint32(&writer, END_PART); - prog->driver_cache_blob = ralloc_size(NULL, writer.size); - memcpy(prog->driver_cache_blob, writer.data, writer.size); - prog->driver_cache_blob_size = writer.size; - blob_finish(&writer); -} - -static bool -deserialize_intel_program(struct blob_reader *reader, struct gl_context *ctx, - struct gl_program *prog, gl_shader_stage stage) -{ - struct brw_context *brw = brw_context(ctx); - - union brw_any_prog_key prog_key; - blob_copy_bytes(reader, &prog_key, brw_prog_key_size(stage)); - prog_key.base.program_string_id = brw_program(prog)->id; - - enum brw_cache_id cache_id = brw_stage_cache_id(stage); - - const uint8_t *program; - struct brw_stage_prog_data *prog_data = - ralloc_size(NULL, sizeof(union brw_any_prog_data)); - - if (!brw_read_blob_program_data(reader, prog, stage, &program, prog_data)) { - ralloc_free(prog_data); - return false; - } - - uint32_t offset; - void *out_prog_data; - brw_upload_cache(&brw->cache, cache_id, &prog_key, brw_prog_key_size(stage), - program, prog_data->program_size, prog_data, - brw_prog_data_size(stage), &offset, &out_prog_data); - - ralloc_free(prog_data); - - return true; -} - -void -brw_program_deserialize_driver_blob(struct gl_context *ctx, - struct gl_program *prog, - gl_shader_stage stage) -{ - if (!prog->driver_cache_blob) - return; - - struct blob_reader reader; - blob_reader_init(&reader, prog->driver_cache_blob, - prog->driver_cache_blob_size); - - do { - uint32_t part_type = blob_read_uint32(&reader); - if ((enum driver_cache_blob_part)part_type == END_PART) - break; - switch ((enum driver_cache_blob_part)part_type) { - case INTEL_PART: { - ASSERTED uint32_t gen_size = blob_read_uint32(&reader); - assert(!reader.overrun && - (uintptr_t)(reader.end - reader.current) > gen_size); - deserialize_intel_program(&reader, ctx, prog, stage); - break; - } - case NIR_PART: { - ASSERTED uint32_t nir_size = blob_read_uint32(&reader); - assert(!reader.overrun && - (uintptr_t)(reader.end - reader.current) > nir_size); - const struct nir_shader_compiler_options *options = - ctx->Const.ShaderCompilerOptions[stage].NirOptions; - prog->nir = nir_deserialize(NULL, options, &reader); - break; - } - default: - unreachable("Unsupported blob part type!"); - break; - } - } while (true); - - ralloc_free(prog->driver_cache_blob); - prog->driver_cache_blob = NULL; - prog->driver_cache_blob_size = 0; -} - -/* This is just a wrapper around brw_program_deserialize_nir() as i965 - * doesn't need gl_shader_program like other drivers do. - */ -void -brw_deserialize_program_binary(struct gl_context *ctx, - struct gl_shader_program *shProg, - struct gl_program *prog) -{ - brw_program_deserialize_driver_blob(ctx, prog, prog->info.stage); -} - -static void -serialize_intel_part(struct blob *writer, struct gl_context *ctx, - struct gl_shader_program *sh_prog, - struct gl_program *prog) -{ - struct brw_context *brw = brw_context(ctx); - - union brw_any_prog_key key; - brw_populate_default_key(brw->screen->compiler, &key, sh_prog, prog); - - const gl_shader_stage stage = prog->info.stage; - uint32_t offset = 0; - void *prog_data = NULL; - if (brw_search_cache(&brw->cache, brw_stage_cache_id(stage), &key, - brw_prog_key_size(stage), &offset, &prog_data, - false)) { - const void *program_map = brw->cache.map + offset; - /* TODO: Improve perf for non-LLC. It would be best to save it at - * program generation time when the program is in normal memory - * accessible with cache to the CPU. Another easier change would be to - * use _mesa_streaming_load_memcpy to read from the program mapped - * memory. - */ - blob_write_uint32(writer, INTEL_PART); - intptr_t size_offset = blob_reserve_uint32(writer); - size_t gen_start = writer->size; - blob_write_bytes(writer, &key, brw_prog_key_size(stage)); - brw_write_blob_program_data(writer, stage, program_map, prog_data); - blob_overwrite_uint32(writer, size_offset, writer->size - gen_start); - } -} - -void -brw_serialize_program_binary(struct gl_context *ctx, - struct gl_shader_program *sh_prog, - struct gl_program *prog) -{ - if (driver_blob_is_ready(prog->driver_cache_blob, - prog->driver_cache_blob_size, true)) - return; - - if (prog->driver_cache_blob) { - if (!prog->nir) { - /* If we loaded from the disk shader cache, then the nir might not - * have been deserialized yet. - */ - brw_program_deserialize_driver_blob(ctx, prog, prog->info.stage); - } - ralloc_free(prog->driver_cache_blob); - } - - struct blob writer; - blob_init(&writer); - serialize_nir_part(&writer, prog); - serialize_intel_part(&writer, ctx, sh_prog, prog); - blob_write_uint32(&writer, END_PART); - prog->driver_cache_blob = ralloc_size(NULL, writer.size); - memcpy(prog->driver_cache_blob, writer.data, writer.size); - prog->driver_cache_blob_size = writer.size; - blob_finish(&writer); -} - -void -brw_write_blob_program_data(struct blob *binary, gl_shader_stage stage, - const void *program, - struct brw_stage_prog_data *prog_data) -{ - /* Write prog_data to blob. */ - blob_write_bytes(binary, prog_data, brw_prog_data_size(stage)); - - /* Write program to blob. */ - blob_write_bytes(binary, program, prog_data->program_size); - - /* Write push params */ - blob_write_bytes(binary, prog_data->param, - sizeof(uint32_t) * prog_data->nr_params); - - /* Write pull params */ - blob_write_bytes(binary, prog_data->pull_param, - sizeof(uint32_t) * prog_data->nr_pull_params); -} - -bool -brw_read_blob_program_data(struct blob_reader *binary, struct gl_program *prog, - gl_shader_stage stage, const uint8_t **program, - struct brw_stage_prog_data *prog_data) -{ - /* Read shader prog_data from blob. */ - blob_copy_bytes(binary, prog_data, brw_prog_data_size(stage)); - if (binary->overrun) - return false; - - /* Read shader program from blob. */ - *program = blob_read_bytes(binary, prog_data->program_size); - - /* Read push params */ - prog_data->param = rzalloc_array(NULL, uint32_t, prog_data->nr_params); - blob_copy_bytes(binary, prog_data->param, - sizeof(uint32_t) * prog_data->nr_params); - - /* Read pull params */ - prog_data->pull_param = rzalloc_array(NULL, uint32_t, - prog_data->nr_pull_params); - blob_copy_bytes(binary, prog_data->pull_param, - sizeof(uint32_t) * prog_data->nr_pull_params); - - return !binary->overrun; -} diff --git a/src/mesa/drivers/dri/i965/brw_program_cache.c b/src/mesa/drivers/dri/i965/brw_program_cache.c deleted file mode 100644 index df2b92e..0000000 --- a/src/mesa/drivers/dri/i965/brw_program_cache.c +++ /dev/null @@ -1,523 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - -/** @file brw_program_cache.c - * - * This file implements a simple program cache for 965. The consumers can - * query the hash table of programs using a cache_id and program key, and - * receive the corresponding program buffer object (plus associated auxiliary - * data) in return. Objects in the cache may not have relocations - * (pointers to other BOs) in them. - * - * The inner workings are a simple hash table based on a FNV-1a of the - * key data. - * - * Replacement is not implemented. Instead, when the cache gets too - * big we throw out all of the cache data and let it get regenerated. - */ - -#include "main/streaming-load-memcpy.h" -#include "x86/common_x86_asm.h" -#include "brw_batch.h" -#include "brw_state.h" -#include "brw_wm.h" -#include "brw_gs.h" -#include "brw_cs.h" -#include "brw_program.h" -#include "compiler/brw_eu.h" -#include "util/u_memory.h" -#define XXH_INLINE_ALL -#include "util/xxhash.h" - -#define FILE_DEBUG_FLAG DEBUG_STATE - -struct brw_cache_item { - /** - * Effectively part of the key, cache_id identifies what kind of state - * buffer is involved, and also which dirty flag should set. - */ - enum brw_cache_id cache_id; - - /** 32-bit hash of the key data */ - GLuint hash; - - /** for variable-sized keys */ - GLuint key_size; - GLuint prog_data_size; - const struct brw_base_prog_key *key; - - uint32_t offset; - uint32_t size; - - struct brw_cache_item *next; -}; - -enum brw_cache_id -brw_stage_cache_id(gl_shader_stage stage) -{ - static const enum brw_cache_id stage_ids[] = { - BRW_CACHE_VS_PROG, - BRW_CACHE_TCS_PROG, - BRW_CACHE_TES_PROG, - BRW_CACHE_GS_PROG, - BRW_CACHE_FS_PROG, - BRW_CACHE_CS_PROG, - }; - assert((int)stage >= 0 && stage < ARRAY_SIZE(stage_ids)); - return stage_ids[stage]; -} - -static GLuint -hash_key(struct brw_cache_item *item) -{ - uint32_t hash = 0; - hash = XXH32(&item->cache_id, sizeof(item->cache_id), hash); - hash = XXH32(item->key, item->key_size, hash); - - return hash; -} - -static int -brw_cache_item_equals(const struct brw_cache_item *a, - const struct brw_cache_item *b) -{ - return a->cache_id == b->cache_id && - a->hash == b->hash && - a->key_size == b->key_size && - (memcmp(a->key, b->key, a->key_size) == 0); -} - -static struct brw_cache_item * -search_cache(struct brw_cache *cache, GLuint hash, - struct brw_cache_item *lookup) -{ - struct brw_cache_item *c; - -#if 0 - int bucketcount = 0; - - for (c = cache->items[hash % cache->size]; c; c = c->next) - bucketcount++; - - fprintf(stderr, "bucket %d/%d = %d/%d items\n", hash % cache->size, - cache->size, bucketcount, cache->n_items); -#endif - - for (c = cache->items[hash % cache->size]; c; c = c->next) { - if (brw_cache_item_equals(lookup, c)) - return c; - } - - return NULL; -} - - -static void -rehash(struct brw_cache *cache) -{ - struct brw_cache_item **items; - struct brw_cache_item *c, *next; - GLuint size, i; - - size = cache->size * 3; - items = calloc(size, sizeof(*items)); - - for (i = 0; i < cache->size; i++) - for (c = cache->items[i]; c; c = next) { - next = c->next; - c->next = items[c->hash % size]; - items[c->hash % size] = c; - } - - free(cache->items); - cache->items = items; - cache->size = size; -} - - -/** - * Returns the buffer object matching cache_id and key, or NULL. - */ -bool -brw_search_cache(struct brw_cache *cache, enum brw_cache_id cache_id, - const void *key, GLuint key_size, uint32_t *inout_offset, - void *inout_prog_data, bool flag_state) -{ - struct brw_cache_item *item; - struct brw_cache_item lookup; - GLuint hash; - - lookup.cache_id = cache_id; - lookup.key = key; - lookup.key_size = key_size; - hash = hash_key(&lookup); - lookup.hash = hash; - - item = search_cache(cache, hash, &lookup); - - if (item == NULL) - return false; - - void *prog_data = ((char *) item->key) + item->key_size; - - if (item->offset != *inout_offset || - prog_data != *((void **) inout_prog_data)) { - if (likely(flag_state)) - cache->brw->ctx.NewDriverState |= (1 << cache_id); - *inout_offset = item->offset; - *((void **) inout_prog_data) = prog_data; - } - - return true; -} - -static void -brw_cache_new_bo(struct brw_cache *cache, uint32_t new_size) -{ - struct brw_context *brw = cache->brw; - struct brw_bo *new_bo; - - perf_debug("Copying to larger program cache: %u kB -> %u kB\n", - (unsigned) cache->bo->size / 1024, new_size / 1024); - - new_bo = brw_bo_alloc(brw->bufmgr, "program cache", new_size, - BRW_MEMZONE_SHADER); - if (can_do_exec_capture(brw->screen)) - new_bo->kflags |= EXEC_OBJECT_CAPTURE; - - void *map = brw_bo_map(brw, new_bo, MAP_READ | MAP_WRITE | - MAP_ASYNC | MAP_PERSISTENT); - - /* Copy any existing data that needs to be saved. */ - if (cache->next_offset != 0) { -#ifdef USE_SSE41 - if (!cache->bo->cache_coherent && cpu_has_sse4_1) - _mesa_streaming_load_memcpy(map, cache->map, cache->next_offset); - else -#endif - memcpy(map, cache->map, cache->next_offset); - } - - brw_bo_unmap(cache->bo); - brw_bo_unreference(cache->bo); - cache->bo = new_bo; - cache->map = map; - - /* Since we have a new BO in place, we need to signal the units - * that depend on it (state base address on gfx5+, or unit state before). - */ - brw->ctx.NewDriverState |= BRW_NEW_PROGRAM_CACHE; - brw->batch.state_base_address_emitted = false; -} - -/** - * Attempts to find an item in the cache with identical data. - */ -static const struct brw_cache_item * -brw_lookup_prog(const struct brw_cache *cache, - enum brw_cache_id cache_id, - const void *data, unsigned data_size) -{ - unsigned i; - const struct brw_cache_item *item; - - for (i = 0; i < cache->size; i++) { - for (item = cache->items[i]; item; item = item->next) { - if (item->cache_id != cache_id || item->size != data_size || - memcmp(cache->map + item->offset, data, item->size) != 0) - continue; - - return item; - } - } - - return NULL; -} - -static uint32_t -brw_alloc_item_data(struct brw_cache *cache, uint32_t size) -{ - uint32_t offset; - - /* Allocate space in the cache BO for our new program. */ - if (cache->next_offset + size > cache->bo->size) { - uint32_t new_size = cache->bo->size * 2; - - while (cache->next_offset + size > new_size) - new_size *= 2; - - brw_cache_new_bo(cache, new_size); - } - - offset = cache->next_offset; - - /* Programs are always 64-byte aligned, so set up the next one now */ - cache->next_offset = ALIGN(offset + size, 64); - - return offset; -} - -const void * -brw_find_previous_compile(struct brw_cache *cache, - enum brw_cache_id cache_id, - unsigned program_string_id) -{ - for (unsigned i = 0; i < cache->size; i++) { - for (struct brw_cache_item *c = cache->items[i]; c; c = c->next) { - if (c->cache_id == cache_id && - c->key->program_string_id == program_string_id) { - return c->key; - } - } - } - - return NULL; -} - -void -brw_upload_cache(struct brw_cache *cache, - enum brw_cache_id cache_id, - const void *key, - GLuint key_size, - const void *data, - GLuint data_size, - const void *prog_data, - GLuint prog_data_size, - uint32_t *out_offset, - void *out_prog_data) -{ - struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item); - const struct brw_cache_item *matching_data = - brw_lookup_prog(cache, cache_id, data, data_size); - GLuint hash; - void *tmp; - - item->cache_id = cache_id; - item->size = data_size; - item->key = key; - item->key_size = key_size; - item->prog_data_size = prog_data_size; - hash = hash_key(item); - item->hash = hash; - - /* If we can find a matching prog in the cache already, then reuse the - * existing stuff without creating new copy into the underlying buffer - * object. This is notably useful for programs generating shaders at - * runtime, where multiple shaders may compile to the same thing in our - * backend. - */ - if (matching_data) { - item->offset = matching_data->offset; - } else { - item->offset = brw_alloc_item_data(cache, data_size); - - /* Copy data to the buffer */ - memcpy(cache->map + item->offset, data, data_size); - } - - /* Set up the memory containing the key and prog_data */ - tmp = malloc(key_size + prog_data_size); - - memcpy(tmp, key, key_size); - memcpy(tmp + key_size, prog_data, prog_data_size); - - item->key = tmp; - - if (cache->n_items > cache->size * 1.5f) - rehash(cache); - - hash %= cache->size; - item->next = cache->items[hash]; - cache->items[hash] = item; - cache->n_items++; - - *out_offset = item->offset; - *(void **)out_prog_data = (void *)((char *)item->key + item->key_size); - cache->brw->ctx.NewDriverState |= 1 << cache_id; -} - -void -brw_init_caches(struct brw_context *brw) -{ - struct brw_cache *cache = &brw->cache; - - cache->brw = brw; - - cache->size = 7; - cache->n_items = 0; - cache->items = - calloc(cache->size, sizeof(struct brw_cache_item *)); - - cache->bo = brw_bo_alloc(brw->bufmgr, "program cache", 16384, - BRW_MEMZONE_SHADER); - if (can_do_exec_capture(brw->screen)) - cache->bo->kflags |= EXEC_OBJECT_CAPTURE; - - cache->map = brw_bo_map(brw, cache->bo, MAP_READ | MAP_WRITE | - MAP_ASYNC | MAP_PERSISTENT); -} - -static void -brw_clear_cache(struct brw_context *brw, struct brw_cache *cache) -{ - struct brw_cache_item *c, *next; - GLuint i; - - DBG("%s\n", __func__); - - for (i = 0; i < cache->size; i++) { - for (c = cache->items[i]; c; c = next) { - next = c->next; - if (c->cache_id == BRW_CACHE_VS_PROG || - c->cache_id == BRW_CACHE_TCS_PROG || - c->cache_id == BRW_CACHE_TES_PROG || - c->cache_id == BRW_CACHE_GS_PROG || - c->cache_id == BRW_CACHE_FS_PROG || - c->cache_id == BRW_CACHE_CS_PROG) { - const void *item_prog_data = ((char *)c->key) + c->key_size; - brw_stage_prog_data_free(item_prog_data); - } - free((void *)c->key); - free(c); - } - cache->items[i] = NULL; - } - - cache->n_items = 0; - - /* Start putting programs into the start of the BO again, since - * we'll never find the old results. - */ - cache->next_offset = 0; - - /* We need to make sure that the programs get regenerated, since - * any offsets leftover in brw_context will no longer be valid. - */ - brw->NewGLState = ~0; - brw->ctx.NewDriverState = ~0ull; - brw->state.pipelines[BRW_RENDER_PIPELINE].mesa = ~0; - brw->state.pipelines[BRW_RENDER_PIPELINE].brw = ~0ull; - brw->state.pipelines[BRW_COMPUTE_PIPELINE].mesa = ~0; - brw->state.pipelines[BRW_COMPUTE_PIPELINE].brw = ~0ull; - - /* Also, NULL out any stale program pointers. */ - brw->vs.base.prog_data = NULL; - brw->tcs.base.prog_data = NULL; - brw->tes.base.prog_data = NULL; - brw->gs.base.prog_data = NULL; - brw->wm.base.prog_data = NULL; - brw->cs.base.prog_data = NULL; - - brw_batch_flush(brw); -} - -void -brw_program_cache_check_size(struct brw_context *brw) -{ - /* un-tuned guess. Each object is generally a page, so 2000 of them is 8 MB of - * state cache. - */ - if (brw->cache.n_items > 2000) { - perf_debug("Exceeded state cache size limit. Clearing the set " - "of compiled programs, which will trigger recompiles\n"); - brw_clear_cache(brw, &brw->cache); - brw_cache_new_bo(&brw->cache, brw->cache.bo->size); - } -} - - -static void -brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache) -{ - - DBG("%s\n", __func__); - - /* This can be NULL if context creation failed early on */ - if (cache->bo) { - brw_bo_unmap(cache->bo); - brw_bo_unreference(cache->bo); - cache->bo = NULL; - cache->map = NULL; - } - brw_clear_cache(brw, cache); - free(cache->items); - cache->items = NULL; - cache->size = 0; -} - - -void -brw_destroy_caches(struct brw_context *brw) -{ - brw_destroy_cache(brw, &brw->cache); -} - -static const char * -cache_name(enum brw_cache_id cache_id) -{ - switch (cache_id) { - case BRW_CACHE_VS_PROG: - return "VS kernel"; - case BRW_CACHE_TCS_PROG: - return "TCS kernel"; - case BRW_CACHE_TES_PROG: - return "TES kernel"; - case BRW_CACHE_FF_GS_PROG: - return "Fixed-function GS kernel"; - case BRW_CACHE_GS_PROG: - return "GS kernel"; - case BRW_CACHE_CLIP_PROG: - return "CLIP kernel"; - case BRW_CACHE_SF_PROG: - return "SF kernel"; - case BRW_CACHE_FS_PROG: - return "FS kernel"; - case BRW_CACHE_CS_PROG: - return "CS kernel"; - default: - return "unknown"; - } -} - -void -brw_print_program_cache(struct brw_context *brw) -{ - const struct brw_cache *cache = &brw->cache; - struct brw_cache_item *item; - - for (unsigned i = 0; i < cache->size; i++) { - for (item = cache->items[i]; item; item = item->next) { - fprintf(stderr, "%s:\n", cache_name(i)); - brw_disassemble_with_labels(&brw->screen->devinfo, cache->map, - item->offset, item->size, stderr); - } - } -} diff --git a/src/mesa/drivers/dri/i965/brw_queryobj.c b/src/mesa/drivers/dri/i965/brw_queryobj.c deleted file mode 100644 index efaa8fb..0000000 --- a/src/mesa/drivers/dri/i965/brw_queryobj.c +++ /dev/null @@ -1,621 +0,0 @@ -/* - * Copyright © 2008 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Eric Anholt - * - */ - -/** @file brw_queryobj.c - * - * Support for query objects (GL_ARB_occlusion_query, GL_ARB_timer_query, - * GL_EXT_transform_feedback, and friends). - * - * The hardware provides a PIPE_CONTROL command that can report the number of - * fragments that passed the depth test, or the hardware timer. They are - * appropriately synced with the stage of the pipeline for our extensions' - * needs. - */ -#include "main/queryobj.h" - -#include "brw_context.h" -#include "brw_defines.h" -#include "brw_state.h" -#include "brw_batch.h" - -/* As best we know currently, the Gen HW timestamps are 36bits across - * all platforms, which we need to account for when calculating a - * delta to measure elapsed time. - * - * The timestamps read via glGetTimestamp() / brw_get_timestamp() sometimes - * only have 32bits due to a kernel bug and so in that case we make sure to - * treat all raw timestamps as 32bits so they overflow consistently and remain - * comparable. (Note: the timestamps being passed here are not from the kernel - * so we don't need to be taking the upper 32bits in this buggy kernel case we - * are just clipping to 32bits here for consistency.) - */ -uint64_t -brw_raw_timestamp_delta(struct brw_context *brw, uint64_t time0, uint64_t time1) -{ - if (brw->screen->hw_has_timestamp == 2) { - /* Kernel clips timestamps to 32bits in this case, so we also clip - * PIPE_CONTROL timestamps for consistency. - */ - return (uint32_t)time1 - (uint32_t)time0; - } else { - if (time0 > time1) { - return (1ULL << 36) + time1 - time0; - } else { - return time1 - time0; - } - } -} - -/** - * Emit PIPE_CONTROLs to write the current GPU timestamp into a buffer. - */ -void -brw_write_timestamp(struct brw_context *brw, struct brw_bo *query_bo, int idx) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - if (devinfo->ver == 6) { - /* Emit Sandybridge workaround flush: */ - brw_emit_pipe_control_flush(brw, - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_STALL_AT_SCOREBOARD); - } - - uint32_t flags = PIPE_CONTROL_WRITE_TIMESTAMP; - - if (devinfo->ver == 9 && devinfo->gt == 4) - flags |= PIPE_CONTROL_CS_STALL; - - brw_emit_pipe_control_write(brw, flags, - query_bo, idx * sizeof(uint64_t), 0); -} - -/** - * Emit PIPE_CONTROLs to write the PS_DEPTH_COUNT register into a buffer. - */ -void -brw_write_depth_count(struct brw_context *brw, struct brw_bo *query_bo, int idx) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - uint32_t flags = PIPE_CONTROL_WRITE_DEPTH_COUNT | PIPE_CONTROL_DEPTH_STALL; - - if (devinfo->ver == 9 && devinfo->gt == 4) - flags |= PIPE_CONTROL_CS_STALL; - - if (devinfo->ver >= 10) { - /* "Driver must program PIPE_CONTROL with only Depth Stall Enable bit set - * prior to programming a PIPE_CONTROL with Write PS Depth Count Post sync - * operation." - */ - brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_STALL); - } - - brw_emit_pipe_control_write(brw, flags, - query_bo, idx * sizeof(uint64_t), 0); -} - -/** - * Wait on the query object's BO and calculate the final result. - */ -static void -brw_queryobj_get_results(struct gl_context *ctx, - struct brw_query_object *query) -{ - struct brw_context *brw = brw_context(ctx); - UNUSED const struct intel_device_info *devinfo = &brw->screen->devinfo; - - int i; - uint64_t *results; - - assert(devinfo->ver < 6); - - if (query->bo == NULL) - return; - - /* If the application has requested the query result, but this batch is - * still contributing to it, flush it now so the results will be present - * when mapped. - */ - if (brw_batch_references(&brw->batch, query->bo)) - brw_batch_flush(brw); - - if (unlikely(brw->perf_debug)) { - if (brw_bo_busy(query->bo)) { - perf_debug("Stalling on the GPU waiting for a query object.\n"); - } - } - - results = brw_bo_map(brw, query->bo, MAP_READ); - switch (query->Base.Target) { - case GL_TIME_ELAPSED_EXT: - /* The query BO contains the starting and ending timestamps. - * Subtract the two and convert to nanoseconds. - */ - query->Base.Result = brw_raw_timestamp_delta(brw, results[0], results[1]); - query->Base.Result = intel_device_info_timebase_scale(devinfo, query->Base.Result); - break; - - case GL_TIMESTAMP: - /* The query BO contains a single timestamp value in results[0]. */ - query->Base.Result = intel_device_info_timebase_scale(devinfo, results[0]); - - /* Ensure the scaled timestamp overflows according to - * GL_QUERY_COUNTER_BITS - */ - query->Base.Result &= (1ull << ctx->Const.QueryCounterBits.Timestamp) - 1; - break; - - case GL_SAMPLES_PASSED_ARB: - /* Loop over pairs of values from the BO, which are the PS_DEPTH_COUNT - * value at the start and end of the batchbuffer. Subtract them to - * get the number of fragments which passed the depth test in each - * individual batch, and add those differences up to get the number - * of fragments for the entire query. - * - * Note that query->Base.Result may already be non-zero. We may have - * run out of space in the query's BO and allocated a new one. If so, - * this function was already called to accumulate the results so far. - */ - for (i = 0; i < query->last_index; i++) { - query->Base.Result += results[i * 2 + 1] - results[i * 2]; - } - break; - - case GL_ANY_SAMPLES_PASSED: - case GL_ANY_SAMPLES_PASSED_CONSERVATIVE: - /* If the starting and ending PS_DEPTH_COUNT from any of the batches - * differ, then some fragments passed the depth test. - */ - for (i = 0; i < query->last_index; i++) { - if (results[i * 2 + 1] != results[i * 2]) { - query->Base.Result = GL_TRUE; - break; - } - } - break; - - default: - unreachable("Unrecognized query target in brw_queryobj_get_results()"); - } - brw_bo_unmap(query->bo); - - /* Now that we've processed the data stored in the query's buffer object, - * we can release it. - */ - brw_bo_unreference(query->bo); - query->bo = NULL; -} - -/** - * The NewQueryObject() driver hook. - * - * Allocates and initializes a new query object. - */ -static struct gl_query_object * -brw_new_query_object(struct gl_context *ctx, GLuint id) -{ - struct brw_query_object *query; - - query = calloc(1, sizeof(struct brw_query_object)); - - query->Base.Id = id; - query->Base.Result = 0; - query->Base.Active = false; - query->Base.Ready = true; - - return &query->Base; -} - -/** - * The DeleteQuery() driver hook. - */ -static void -brw_delete_query(struct gl_context *ctx, struct gl_query_object *q) -{ - struct brw_query_object *query = (struct brw_query_object *)q; - - brw_bo_unreference(query->bo); - _mesa_delete_query(ctx, q); -} - -/** - * Gfx4-5 driver hook for glBeginQuery(). - * - * Initializes driver structures and emits any GPU commands required to begin - * recording data for the query. - */ -static void -brw_begin_query(struct gl_context *ctx, struct gl_query_object *q) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_query_object *query = (struct brw_query_object *)q; - UNUSED const struct intel_device_info *devinfo = &brw->screen->devinfo; - - assert(devinfo->ver < 6); - - switch (query->Base.Target) { - case GL_TIME_ELAPSED_EXT: - /* For timestamp queries, we record the starting time right away so that - * we measure the full time between BeginQuery and EndQuery. There's - * some debate about whether this is the right thing to do. Our decision - * is based on the following text from the ARB_timer_query extension: - * - * "(5) Should the extension measure total time elapsed between the full - * completion of the BeginQuery and EndQuery commands, or just time - * spent in the graphics library? - * - * RESOLVED: This extension will measure the total time elapsed - * between the full completion of these commands. Future extensions - * may implement a query to determine time elapsed at different stages - * of the graphics pipeline." - * - * We write a starting timestamp now (at index 0). At EndQuery() time, - * we'll write a second timestamp (at index 1), and subtract the two to - * obtain the time elapsed. Notably, this includes time elapsed while - * the system was doing other work, such as running other applications. - */ - brw_bo_unreference(query->bo); - query->bo = - brw_bo_alloc(brw->bufmgr, "timer query", 4096, BRW_MEMZONE_OTHER); - brw_write_timestamp(brw, query->bo, 0); - break; - - case GL_ANY_SAMPLES_PASSED: - case GL_ANY_SAMPLES_PASSED_CONSERVATIVE: - case GL_SAMPLES_PASSED_ARB: - /* For occlusion queries, we delay taking an initial sample until the - * first drawing occurs in this batch. See the reasoning in the comments - * for brw_emit_query_begin() below. - * - * Since we're starting a new query, we need to be sure to throw away - * any previous occlusion query results. - */ - brw_bo_unreference(query->bo); - query->bo = NULL; - query->last_index = -1; - - brw->query.obj = query; - - /* Depth statistics on Gfx4 require strange workarounds, so we try to - * avoid them when necessary. They're required for occlusion queries, - * so turn them on now. - */ - brw->stats_wm++; - brw->ctx.NewDriverState |= BRW_NEW_STATS_WM; - break; - - default: - unreachable("Unrecognized query target in brw_begin_query()"); - } -} - -/** - * Gfx4-5 driver hook for glEndQuery(). - * - * Emits GPU commands to record a final query value, ending any data capturing. - * However, the final result isn't necessarily available until the GPU processes - * those commands. brw_queryobj_get_results() processes the captured data to - * produce the final result. - */ -static void -brw_end_query(struct gl_context *ctx, struct gl_query_object *q) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_query_object *query = (struct brw_query_object *)q; - UNUSED const struct intel_device_info *devinfo = &brw->screen->devinfo; - - assert(devinfo->ver < 6); - - switch (query->Base.Target) { - case GL_TIME_ELAPSED_EXT: - /* Write the final timestamp. */ - brw_write_timestamp(brw, query->bo, 1); - break; - - case GL_ANY_SAMPLES_PASSED: - case GL_ANY_SAMPLES_PASSED_CONSERVATIVE: - case GL_SAMPLES_PASSED_ARB: - - /* No query->bo means that EndQuery was called after BeginQuery with no - * intervening drawing. Rather than doing nothing at all here in this - * case, we emit the query_begin and query_end state to the - * hardware. This is to guarantee that waiting on the result of this - * empty state will cause all previous queries to complete at all, as - * required by the OpenGL 4.3 (Core Profile) spec, section 4.2.1: - * - * "It must always be true that if any query object returns - * a result available of TRUE, all queries of the same type - * issued prior to that query must also return TRUE." - */ - if (!query->bo) { - brw_emit_query_begin(brw); - } - - assert(query->bo); - - brw_emit_query_end(brw); - - brw->query.obj = NULL; - - brw->stats_wm--; - brw->ctx.NewDriverState |= BRW_NEW_STATS_WM; - break; - - default: - unreachable("Unrecognized query target in brw_end_query()"); - } -} - -/** - * The Gfx4-5 WaitQuery() driver hook. - * - * Wait for a query result to become available and return it. This is the - * backing for glGetQueryObjectiv() with the GL_QUERY_RESULT pname. - */ -static void brw_wait_query(struct gl_context *ctx, struct gl_query_object *q) -{ - struct brw_query_object *query = (struct brw_query_object *)q; - UNUSED const struct intel_device_info *devinfo = - &brw_context(ctx)->screen->devinfo; - - assert(devinfo->ver < 6); - - brw_queryobj_get_results(ctx, query); - query->Base.Ready = true; -} - -/** - * The Gfx4-5 CheckQuery() driver hook. - * - * Checks whether a query result is ready yet. If not, flushes. - * This is the backing for glGetQueryObjectiv()'s QUERY_RESULT_AVAILABLE pname. - */ -static void brw_check_query(struct gl_context *ctx, struct gl_query_object *q) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_query_object *query = (struct brw_query_object *)q; - UNUSED const struct intel_device_info *devinfo = &brw->screen->devinfo; - - assert(devinfo->ver < 6); - - /* From the GL_ARB_occlusion_query spec: - * - * "Instead of allowing for an infinite loop, performing a - * QUERY_RESULT_AVAILABLE_ARB will perform a flush if the result is - * not ready yet on the first time it is queried. This ensures that - * the async query will return true in finite time. - */ - if (query->bo && brw_batch_references(&brw->batch, query->bo)) - brw_batch_flush(brw); - - if (query->bo == NULL || !brw_bo_busy(query->bo)) { - brw_queryobj_get_results(ctx, query); - query->Base.Ready = true; - } -} - -/** - * Ensure there query's BO has enough space to store a new pair of values. - * - * If not, gather the existing BO's results and create a new buffer of the - * same size. - */ -static void -ensure_bo_has_space(struct gl_context *ctx, struct brw_query_object *query) -{ - struct brw_context *brw = brw_context(ctx); - UNUSED const struct intel_device_info *devinfo = &brw->screen->devinfo; - - assert(devinfo->ver < 6); - - if (!query->bo || query->last_index * 2 + 1 >= 4096 / sizeof(uint64_t)) { - - if (query->bo != NULL) { - /* The old query BO did not have enough space, so we allocated a new - * one. Gather the results so far (adding up the differences) and - * release the old BO. - */ - brw_queryobj_get_results(ctx, query); - } - - query->bo = brw_bo_alloc(brw->bufmgr, "query", 4096, BRW_MEMZONE_OTHER); - query->last_index = 0; - } -} - -/** - * Record the PS_DEPTH_COUNT value (for occlusion queries) just before - * primitive drawing. - * - * In a pre-hardware context world, the single PS_DEPTH_COUNT register is - * shared among all applications using the GPU. However, our query value - * needs to only include fragments generated by our application/GL context. - * - * To accommodate this, we record PS_DEPTH_COUNT at the start and end of - * each batchbuffer (technically, the first primitive drawn and flush time). - * Subtracting each pair of values calculates the change in PS_DEPTH_COUNT - * caused by a batchbuffer. Since there is no preemption inside batches, - * this is guaranteed to only measure the effects of our current application. - * - * Adding each of these differences (in case drawing is done over many batches) - * produces the final expected value. - * - * In a world with hardware contexts, PS_DEPTH_COUNT is saved and restored - * as part of the context state, so this is unnecessary, and skipped. - */ -void -brw_emit_query_begin(struct brw_context *brw) -{ - struct gl_context *ctx = &brw->ctx; - struct brw_query_object *query = brw->query.obj; - - /* Skip if we're not doing any queries, or we've already recorded the - * initial query value for this batchbuffer. - */ - if (!query || brw->query.begin_emitted) - return; - - ensure_bo_has_space(ctx, query); - - brw_write_depth_count(brw, query->bo, query->last_index * 2); - - brw->query.begin_emitted = true; -} - -/** - * Called at batchbuffer flush to get an ending PS_DEPTH_COUNT - * (for non-hardware context platforms). - * - * See the explanation in brw_emit_query_begin(). - */ -void -brw_emit_query_end(struct brw_context *brw) -{ - struct brw_query_object *query = brw->query.obj; - - if (!brw->query.begin_emitted) - return; - - brw_write_depth_count(brw, query->bo, query->last_index * 2 + 1); - - brw->query.begin_emitted = false; - query->last_index++; -} - -/** - * Driver hook for glQueryCounter(). - * - * This handles GL_TIMESTAMP queries, which perform a pipelined read of the - * current GPU time. This is unlike GL_TIME_ELAPSED, which measures the - * time while the query is active. - */ -void -brw_query_counter(struct gl_context *ctx, struct gl_query_object *q) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_query_object *query = (struct brw_query_object *) q; - - assert(q->Target == GL_TIMESTAMP); - - brw_bo_unreference(query->bo); - query->bo = - brw_bo_alloc(brw->bufmgr, "timestamp query", 4096, BRW_MEMZONE_OTHER); - brw_write_timestamp(brw, query->bo, 0); - - query->flushed = false; -} - -/** - * Read the TIMESTAMP register immediately (in a non-pipelined fashion). - * - * This is used to implement the GetTimestamp() driver hook. - */ -static uint64_t -brw_get_timestamp(struct gl_context *ctx) -{ - struct brw_context *brw = brw_context(ctx); - const struct intel_device_info *devinfo = &brw->screen->devinfo; - uint64_t result = 0; - - switch (brw->screen->hw_has_timestamp) { - case 3: /* New kernel, always full 36bit accuracy */ - brw_reg_read(brw->bufmgr, TIMESTAMP | 1, &result); - break; - case 2: /* 64bit kernel, result is left-shifted by 32bits, losing 4bits */ - brw_reg_read(brw->bufmgr, TIMESTAMP, &result); - result = result >> 32; - break; - case 1: /* 32bit kernel, result is 36bit wide but may be inaccurate! */ - brw_reg_read(brw->bufmgr, TIMESTAMP, &result); - break; - } - - /* Scale to nanosecond units */ - result = intel_device_info_timebase_scale(devinfo, result); - - /* Ensure the scaled timestamp overflows according to - * GL_QUERY_COUNTER_BITS. Technically this isn't required if - * querying GL_TIMESTAMP via glGetInteger but it seems best to keep - * QueryObject and GetInteger timestamps consistent. - */ - result &= (1ull << ctx->Const.QueryCounterBits.Timestamp) - 1; - return result; -} - -/** - * Is this type of query written by PIPE_CONTROL? - */ -bool -brw_is_query_pipelined(struct brw_query_object *query) -{ - switch (query->Base.Target) { - case GL_TIMESTAMP: - case GL_TIME_ELAPSED: - case GL_ANY_SAMPLES_PASSED: - case GL_ANY_SAMPLES_PASSED_CONSERVATIVE: - case GL_SAMPLES_PASSED_ARB: - return true; - - case GL_PRIMITIVES_GENERATED: - case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN: - case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB: - case GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB: - case GL_VERTICES_SUBMITTED_ARB: - case GL_PRIMITIVES_SUBMITTED_ARB: - case GL_VERTEX_SHADER_INVOCATIONS_ARB: - case GL_GEOMETRY_SHADER_INVOCATIONS: - case GL_GEOMETRY_SHADER_PRIMITIVES_EMITTED_ARB: - case GL_FRAGMENT_SHADER_INVOCATIONS_ARB: - case GL_CLIPPING_INPUT_PRIMITIVES_ARB: - case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB: - case GL_COMPUTE_SHADER_INVOCATIONS_ARB: - case GL_TESS_CONTROL_SHADER_PATCHES_ARB: - case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB: - return false; - - default: - unreachable("Unrecognized query target in is_query_pipelined()"); - } -} - -/* Initialize query object functions used on all generations. */ -void brw_init_common_queryobj_functions(struct dd_function_table *functions) -{ - functions->NewQueryObject = brw_new_query_object; - functions->DeleteQuery = brw_delete_query; - functions->GetTimestamp = brw_get_timestamp; -} - -/* Initialize Gfx4/5-specific query object functions. */ -void gfx4_init_queryobj_functions(struct dd_function_table *functions) -{ - functions->BeginQuery = brw_begin_query; - functions->EndQuery = brw_end_query; - functions->CheckQuery = brw_check_query; - functions->WaitQuery = brw_wait_query; - functions->QueryCounter = brw_query_counter; -} diff --git a/src/mesa/drivers/dri/i965/brw_reset.c b/src/mesa/drivers/dri/i965/brw_reset.c deleted file mode 100644 index 9051878..0000000 --- a/src/mesa/drivers/dri/i965/brw_reset.c +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include "main/context.h" - -#include -#include "brw_context.h" - -/** - * Query information about GPU resets observed by this context - * - * Called via \c dd_function_table::GetGraphicsResetStatus. - */ -GLenum -brw_get_graphics_reset_status(struct gl_context *ctx) -{ - struct brw_context *brw = brw_context(ctx); - struct drm_i915_reset_stats stats = { .ctx_id = brw->hw_ctx }; - - /* If hardware contexts are not being used (or - * DRM_IOCTL_I915_GET_RESET_STATS is not supported), this function should - * not be accessible. - */ - assert(brw->hw_ctx != 0); - - /* A reset status other than NO_ERROR was returned last time. I915 returns - * nonzero active/pending only if reset has been encountered and completed. - * Return NO_ERROR from now on. - */ - if (brw->reset_count != 0) - return GL_NO_ERROR; - - if (drmIoctl(brw->screen->fd, DRM_IOCTL_I915_GET_RESET_STATS, &stats) != 0) - return GL_NO_ERROR; - - /* A reset was observed while a batch from this context was executing. - * Assume that this context was at fault. - */ - if (stats.batch_active != 0) { - brw->reset_count = stats.reset_count; - return GL_GUILTY_CONTEXT_RESET_ARB; - } - - /* A reset was observed while a batch from this context was in progress, - * but the batch was not executing. In this case, assume that the context - * was not at fault. - */ - if (stats.batch_pending != 0) { - brw->reset_count = stats.reset_count; - return GL_INNOCENT_CONTEXT_RESET_ARB; - } - - return GL_NO_ERROR; -} - -void -brw_check_for_reset(struct brw_context *brw) -{ - struct drm_i915_reset_stats stats = { .ctx_id = brw->hw_ctx }; - - if (drmIoctl(brw->screen->fd, DRM_IOCTL_I915_GET_RESET_STATS, &stats) != 0) - return; - - if (stats.batch_active > 0 || stats.batch_pending > 0) - _mesa_set_context_lost_dispatch(&brw->ctx); -} diff --git a/src/mesa/drivers/dri/i965/brw_screen.c b/src/mesa/drivers/dri/i965/brw_screen.c deleted file mode 100644 index 4d02e73..0000000 --- a/src/mesa/drivers/dri/i965/brw_screen.c +++ /dev/null @@ -1,2886 +0,0 @@ -/* - * Copyright 2003 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "drm-uapi/drm_fourcc.h" -#include -#include -#include -#include "main/context.h" -#include "main/framebuffer.h" -#include "main/renderbuffer.h" -#include "main/texobj.h" -#include "main/hash.h" -#include "main/fbobject.h" -#include "main/version.h" -#include "main/glthread.h" -#include "swrast/s_renderbuffer.h" -#include "util/ralloc.h" -#include "util/disk_cache.h" -#include "brw_defines.h" -#include "brw_state.h" -#include "compiler/nir/nir.h" - -#include "utils.h" -#include "util/disk_cache.h" -#include "util/driconf.h" -#include "util/u_cpu_detect.h" -#include "util/u_memory.h" - -#include "common/intel_defines.h" - -static const driOptionDescription brw_driconf[] = { - DRI_CONF_SECTION_PERFORMANCE - /* Options correspond to DRI_CONF_BO_REUSE_DISABLED, - * DRI_CONF_BO_REUSE_ALL - */ - DRI_CONF_OPT_E(bo_reuse, 1, 0, 1, - "Buffer object reuse", - DRI_CONF_ENUM(0, "Disable buffer object reuse") - DRI_CONF_ENUM(1, "Enable reuse of all sizes of buffer objects")) - DRI_CONF_MESA_NO_ERROR(false) - DRI_CONF_MESA_GLTHREAD(false) - DRI_CONF_SECTION_END - - DRI_CONF_SECTION_QUALITY - DRI_CONF_PRECISE_TRIG(false) - - DRI_CONF_OPT_I(clamp_max_samples, -1, 0, 0, - "Clamp the value of GL_MAX_SAMPLES to the " - "given integer. If negative, then do not clamp.") - DRI_CONF_SECTION_END - - DRI_CONF_SECTION_DEBUG - DRI_CONF_ALWAYS_FLUSH_BATCH(false) - DRI_CONF_ALWAYS_FLUSH_CACHE(false) - DRI_CONF_DISABLE_THROTTLING(false) - DRI_CONF_FORCE_GLSL_EXTENSIONS_WARN(false) - DRI_CONF_FORCE_GLSL_VERSION(0) - DRI_CONF_DISABLE_GLSL_LINE_CONTINUATIONS(false) - DRI_CONF_DISABLE_BLEND_FUNC_EXTENDED(false) - DRI_CONF_DUAL_COLOR_BLEND_BY_LOCATION(false) - DRI_CONF_ALLOW_EXTRA_PP_TOKENS(false) - DRI_CONF_ALLOW_GLSL_EXTENSION_DIRECTIVE_MIDSHADER(false) - DRI_CONF_ALLOW_GLSL_BUILTIN_VARIABLE_REDECLARATION(false) - DRI_CONF_ALLOW_GLSL_CROSS_STAGE_INTERPOLATION_MISMATCH(false) - DRI_CONF_ALLOW_HIGHER_COMPAT_VERSION(false) - DRI_CONF_FORCE_COMPAT_PROFILE(false) - DRI_CONF_FORCE_GLSL_ABS_SQRT(false) - DRI_CONF_FORCE_GL_VENDOR() - - DRI_CONF_OPT_B(shader_precompile, true, "Perform code generation at shader link time.") - DRI_CONF_SECTION_END - - DRI_CONF_SECTION_MISCELLANEOUS - DRI_CONF_GLSL_ZERO_INIT(false) - DRI_CONF_VS_POSITION_ALWAYS_INVARIANT(false) - DRI_CONF_VS_POSITION_ALWAYS_PRECISE(false) - DRI_CONF_ALLOW_RGB10_CONFIGS(false) - DRI_CONF_ALLOW_RGB565_CONFIGS(true) - DRI_CONF_SECTION_END -}; - -static char * -brw_driconf_get_xml(UNUSED const char *driver_name) -{ - return driGetOptionsXml(brw_driconf, ARRAY_SIZE(brw_driconf)); -} - -static const __DRIconfigOptionsExtension brw_config_options = { - .base = { __DRI_CONFIG_OPTIONS, 2 }, - .xml = NULL, - .getXml = brw_driconf_get_xml, -}; - -#include "brw_batch.h" -#include "brw_buffers.h" -#include "brw_bufmgr.h" -#include "brw_fbo.h" -#include "brw_mipmap_tree.h" -#include "brw_screen.h" -#include "brw_tex.h" -#include "brw_image.h" - -#include "brw_context.h" - -#include "drm-uapi/i915_drm.h" - -/** - * For debugging purposes, this returns a time in seconds. - */ -double -get_time(void) -{ - struct timespec tp; - - clock_gettime(CLOCK_MONOTONIC, &tp); - - return tp.tv_sec + tp.tv_nsec / 1000000000.0; -} - -static const __DRItexBufferExtension brwTexBufferExtension = { - .base = { __DRI_TEX_BUFFER, 3 }, - - .setTexBuffer = brw_set_texbuffer, - .setTexBuffer2 = brw_set_texbuffer2, - .releaseTexBuffer = brw_release_texbuffer, -}; - -static void -brw_dri2_flush_with_flags(__DRIcontext *cPriv, - __DRIdrawable *dPriv, - unsigned flags, - enum __DRI2throttleReason reason) -{ - struct brw_context *brw = cPriv->driverPrivate; - - if (!brw) - return; - - struct gl_context *ctx = &brw->ctx; - - _mesa_glthread_finish(ctx); - - FLUSH_VERTICES(ctx, 0, 0); - - if (flags & __DRI2_FLUSH_DRAWABLE) - brw_resolve_for_dri2_flush(brw, dPriv); - - if (reason == __DRI2_THROTTLE_SWAPBUFFER) - brw->need_swap_throttle = true; - if (reason == __DRI2_THROTTLE_FLUSHFRONT) - brw->need_flush_throttle = true; - - brw_batch_flush(brw); -} - -/** - * Provides compatibility with loaders that only support the older (version - * 1-3) flush interface. - * - * That includes libGL up to Mesa 9.0, and the X Server at least up to 1.13. - */ -static void -brw_dri2_flush(__DRIdrawable *drawable) -{ - brw_dri2_flush_with_flags(drawable->driContextPriv, drawable, - __DRI2_FLUSH_DRAWABLE, - __DRI2_THROTTLE_SWAPBUFFER); -} - -static const struct __DRI2flushExtensionRec brwFlushExtension = { - .base = { __DRI2_FLUSH, 4 }, - - .flush = brw_dri2_flush, - .invalidate = dri2InvalidateDrawable, - .flush_with_flags = brw_dri2_flush_with_flags, -}; - -static const struct brw_image_format brw_image_formats[] = { - { DRM_FORMAT_ABGR16161616F, __DRI_IMAGE_COMPONENTS_RGBA, 1, - { { 0, 0, 0, __DRI_IMAGE_FORMAT_ABGR16161616F, 8 } } }, - - { DRM_FORMAT_XBGR16161616F, __DRI_IMAGE_COMPONENTS_RGB, 1, - { { 0, 0, 0, __DRI_IMAGE_FORMAT_XBGR16161616F, 8 } } }, - - { DRM_FORMAT_ARGB2101010, __DRI_IMAGE_COMPONENTS_RGBA, 1, - { { 0, 0, 0, __DRI_IMAGE_FORMAT_ARGB2101010, 4 } } }, - - { DRM_FORMAT_XRGB2101010, __DRI_IMAGE_COMPONENTS_RGB, 1, - { { 0, 0, 0, __DRI_IMAGE_FORMAT_XRGB2101010, 4 } } }, - - { DRM_FORMAT_ABGR2101010, __DRI_IMAGE_COMPONENTS_RGBA, 1, - { { 0, 0, 0, __DRI_IMAGE_FORMAT_ABGR2101010, 4 } } }, - - { DRM_FORMAT_XBGR2101010, __DRI_IMAGE_COMPONENTS_RGB, 1, - { { 0, 0, 0, __DRI_IMAGE_FORMAT_XBGR2101010, 4 } } }, - - { DRM_FORMAT_ARGB8888, __DRI_IMAGE_COMPONENTS_RGBA, 1, - { { 0, 0, 0, __DRI_IMAGE_FORMAT_ARGB8888, 4 } } }, - - { DRM_FORMAT_ABGR8888, __DRI_IMAGE_COMPONENTS_RGBA, 1, - { { 0, 0, 0, __DRI_IMAGE_FORMAT_ABGR8888, 4 } } }, - - { __DRI_IMAGE_FOURCC_SARGB8888, __DRI_IMAGE_COMPONENTS_RGBA, 1, - { { 0, 0, 0, __DRI_IMAGE_FORMAT_SARGB8, 4 } } }, - - { __DRI_IMAGE_FOURCC_SXRGB8888, __DRI_IMAGE_COMPONENTS_RGB, 1, - { { 0, 0, 0, __DRI_IMAGE_FORMAT_SXRGB8, 4 } } }, - - { DRM_FORMAT_XRGB8888, __DRI_IMAGE_COMPONENTS_RGB, 1, - { { 0, 0, 0, __DRI_IMAGE_FORMAT_XRGB8888, 4 }, } }, - - { DRM_FORMAT_XBGR8888, __DRI_IMAGE_COMPONENTS_RGB, 1, - { { 0, 0, 0, __DRI_IMAGE_FORMAT_XBGR8888, 4 }, } }, - - { DRM_FORMAT_ARGB1555, __DRI_IMAGE_COMPONENTS_RGBA, 1, - { { 0, 0, 0, __DRI_IMAGE_FORMAT_ARGB1555, 2 } } }, - - { DRM_FORMAT_RGB565, __DRI_IMAGE_COMPONENTS_RGB, 1, - { { 0, 0, 0, __DRI_IMAGE_FORMAT_RGB565, 2 } } }, - - { DRM_FORMAT_R8, __DRI_IMAGE_COMPONENTS_R, 1, - { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 }, } }, - - { DRM_FORMAT_R16, __DRI_IMAGE_COMPONENTS_R, 1, - { { 0, 0, 0, __DRI_IMAGE_FORMAT_R16, 1 }, } }, - - { DRM_FORMAT_GR88, __DRI_IMAGE_COMPONENTS_RG, 1, - { { 0, 0, 0, __DRI_IMAGE_FORMAT_GR88, 2 }, } }, - - { DRM_FORMAT_GR1616, __DRI_IMAGE_COMPONENTS_RG, 1, - { { 0, 0, 0, __DRI_IMAGE_FORMAT_GR1616, 2 }, } }, - - { DRM_FORMAT_YUV410, __DRI_IMAGE_COMPONENTS_Y_U_V, 3, - { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 }, - { 1, 2, 2, __DRI_IMAGE_FORMAT_R8, 1 }, - { 2, 2, 2, __DRI_IMAGE_FORMAT_R8, 1 } } }, - - { DRM_FORMAT_YUV411, __DRI_IMAGE_COMPONENTS_Y_U_V, 3, - { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 }, - { 1, 2, 0, __DRI_IMAGE_FORMAT_R8, 1 }, - { 2, 2, 0, __DRI_IMAGE_FORMAT_R8, 1 } } }, - - { DRM_FORMAT_YUV420, __DRI_IMAGE_COMPONENTS_Y_U_V, 3, - { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 }, - { 1, 1, 1, __DRI_IMAGE_FORMAT_R8, 1 }, - { 2, 1, 1, __DRI_IMAGE_FORMAT_R8, 1 } } }, - - { DRM_FORMAT_YUV422, __DRI_IMAGE_COMPONENTS_Y_U_V, 3, - { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 }, - { 1, 1, 0, __DRI_IMAGE_FORMAT_R8, 1 }, - { 2, 1, 0, __DRI_IMAGE_FORMAT_R8, 1 } } }, - - { DRM_FORMAT_YUV444, __DRI_IMAGE_COMPONENTS_Y_U_V, 3, - { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 }, - { 1, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 }, - { 2, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 } } }, - - { DRM_FORMAT_YVU410, __DRI_IMAGE_COMPONENTS_Y_U_V, 3, - { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 }, - { 2, 2, 2, __DRI_IMAGE_FORMAT_R8, 1 }, - { 1, 2, 2, __DRI_IMAGE_FORMAT_R8, 1 } } }, - - { DRM_FORMAT_YVU411, __DRI_IMAGE_COMPONENTS_Y_U_V, 3, - { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 }, - { 2, 2, 0, __DRI_IMAGE_FORMAT_R8, 1 }, - { 1, 2, 0, __DRI_IMAGE_FORMAT_R8, 1 } } }, - - { DRM_FORMAT_YVU420, __DRI_IMAGE_COMPONENTS_Y_U_V, 3, - { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 }, - { 2, 1, 1, __DRI_IMAGE_FORMAT_R8, 1 }, - { 1, 1, 1, __DRI_IMAGE_FORMAT_R8, 1 } } }, - - { DRM_FORMAT_YVU422, __DRI_IMAGE_COMPONENTS_Y_U_V, 3, - { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 }, - { 2, 1, 0, __DRI_IMAGE_FORMAT_R8, 1 }, - { 1, 1, 0, __DRI_IMAGE_FORMAT_R8, 1 } } }, - - { DRM_FORMAT_YVU444, __DRI_IMAGE_COMPONENTS_Y_U_V, 3, - { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 }, - { 2, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 }, - { 1, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 } } }, - - { DRM_FORMAT_NV12, __DRI_IMAGE_COMPONENTS_Y_UV, 2, - { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 }, - { 1, 1, 1, __DRI_IMAGE_FORMAT_GR88, 2 } } }, - - { DRM_FORMAT_P010, __DRI_IMAGE_COMPONENTS_Y_UV, 2, - { { 0, 0, 0, __DRI_IMAGE_FORMAT_R16, 2 }, - { 1, 1, 1, __DRI_IMAGE_FORMAT_GR1616, 4 } } }, - - { DRM_FORMAT_P012, __DRI_IMAGE_COMPONENTS_Y_UV, 2, - { { 0, 0, 0, __DRI_IMAGE_FORMAT_R16, 2 }, - { 1, 1, 1, __DRI_IMAGE_FORMAT_GR1616, 4 } } }, - - { DRM_FORMAT_P016, __DRI_IMAGE_COMPONENTS_Y_UV, 2, - { { 0, 0, 0, __DRI_IMAGE_FORMAT_R16, 2 }, - { 1, 1, 1, __DRI_IMAGE_FORMAT_GR1616, 4 } } }, - - { DRM_FORMAT_NV16, __DRI_IMAGE_COMPONENTS_Y_UV, 2, - { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 }, - { 1, 1, 0, __DRI_IMAGE_FORMAT_GR88, 2 } } }, - - { DRM_FORMAT_AYUV, __DRI_IMAGE_COMPONENTS_AYUV, 1, - { { 0, 0, 0, __DRI_IMAGE_FORMAT_ABGR8888, 4 } } }, - - { DRM_FORMAT_XYUV8888, __DRI_IMAGE_COMPONENTS_XYUV, 1, - { { 0, 0, 0, __DRI_IMAGE_FORMAT_XBGR8888, 4 } } }, - - /* For YUYV and UYVY buffers, we set up two overlapping DRI images - * and treat them as planar buffers in the compositors. - * Plane 0 is GR88 and samples YU or YV pairs and places Y into - * the R component, while plane 1 is ARGB/ABGR and samples YUYV/UYVY - * clusters and places pairs and places U into the G component and - * V into A. This lets the texture sampler interpolate the Y - * components correctly when sampling from plane 0, and interpolate - * U and V correctly when sampling from plane 1. */ - { DRM_FORMAT_YUYV, __DRI_IMAGE_COMPONENTS_Y_XUXV, 2, - { { 0, 0, 0, __DRI_IMAGE_FORMAT_GR88, 2 }, - { 0, 1, 0, __DRI_IMAGE_FORMAT_ARGB8888, 4 } } }, - { DRM_FORMAT_UYVY, __DRI_IMAGE_COMPONENTS_Y_UXVX, 2, - { { 0, 0, 0, __DRI_IMAGE_FORMAT_GR88, 2 }, - { 0, 1, 0, __DRI_IMAGE_FORMAT_ABGR8888, 4 } } } -}; - -static const struct { - uint64_t modifier; - unsigned since_ver; -} supported_modifiers[] = { - { .modifier = DRM_FORMAT_MOD_LINEAR , .since_ver = 1 }, - { .modifier = I915_FORMAT_MOD_X_TILED , .since_ver = 1 }, - { .modifier = I915_FORMAT_MOD_Y_TILED , .since_ver = 6 }, - { .modifier = I915_FORMAT_MOD_Y_TILED_CCS , .since_ver = 9 }, -}; - -static bool -modifier_is_supported(const struct intel_device_info *devinfo, - const struct brw_image_format *fmt, int dri_format, - unsigned use, uint64_t modifier) -{ - const struct isl_drm_modifier_info *modinfo = - isl_drm_modifier_get_info(modifier); - int i; - - /* ISL had better know about the modifier */ - if (!modinfo) - return false; - - if (devinfo->ver < 9 && (use & __DRI_IMAGE_USE_SCANOUT) && - !(modinfo->tiling == ISL_TILING_LINEAR || - modinfo->tiling == ISL_TILING_X)) - return false; - - if (modinfo->aux_usage == ISL_AUX_USAGE_CCS_E) { - /* If INTEL_DEBUG=norbc is set, don't support any CCS_E modifiers */ - if (INTEL_DEBUG(DEBUG_NO_RBC)) - return false; - - /* CCS_E is not supported for planar images */ - if (fmt && fmt->nplanes > 1) - return false; - - if (fmt) { - assert(dri_format == 0); - dri_format = fmt->planes[0].dri_format; - } - - mesa_format format = driImageFormatToGLFormat(dri_format); - /* Whether or not we support compression is based on the RGBA non-sRGB - * version of the format. - */ - format = _mesa_format_fallback_rgbx_to_rgba(format); - format = _mesa_get_srgb_format_linear(format); - if (!isl_format_supports_ccs_e(devinfo, - brw_isl_format_for_mesa_format(format))) - return false; - } - - for (i = 0; i < ARRAY_SIZE(supported_modifiers); i++) { - if (supported_modifiers[i].modifier != modifier) - continue; - - return supported_modifiers[i].since_ver <= devinfo->ver; - } - - return false; -} - -static uint64_t -tiling_to_modifier(uint32_t tiling) -{ - static const uint64_t map[] = { - [I915_TILING_NONE] = DRM_FORMAT_MOD_LINEAR, - [I915_TILING_X] = I915_FORMAT_MOD_X_TILED, - [I915_TILING_Y] = I915_FORMAT_MOD_Y_TILED, - }; - - assert(tiling < ARRAY_SIZE(map)); - - return map[tiling]; -} - -static void -brw_image_warn_if_unaligned(__DRIimage *image, const char *func) -{ - uint32_t tiling, swizzle; - brw_bo_get_tiling(image->bo, &tiling, &swizzle); - - if (tiling != I915_TILING_NONE && (image->offset & 0xfff)) { - _mesa_warning(NULL, "%s: offset 0x%08x not on tile boundary", - func, image->offset); - } -} - -static const struct brw_image_format * -brw_image_format_lookup(int fourcc) -{ - for (unsigned i = 0; i < ARRAY_SIZE(brw_image_formats); i++) { - if (brw_image_formats[i].fourcc == fourcc) - return &brw_image_formats[i]; - } - - return NULL; -} - -static bool -brw_image_get_fourcc(__DRIimage *image, int *fourcc) -{ - if (image->planar_format) { - *fourcc = image->planar_format->fourcc; - return true; - } - - for (unsigned i = 0; i < ARRAY_SIZE(brw_image_formats); i++) { - if (brw_image_formats[i].planes[0].dri_format == image->dri_format) { - *fourcc = brw_image_formats[i].fourcc; - return true; - } - } - return false; -} - -static __DRIimage * -brw_allocate_image(struct brw_screen *screen, int dri_format, - void *loaderPrivate) -{ - __DRIimage *image; - - image = calloc(1, sizeof *image); - if (image == NULL) - return NULL; - - image->screen = screen; - image->dri_format = dri_format; - image->offset = 0; - - image->format = driImageFormatToGLFormat(dri_format); - if (dri_format != __DRI_IMAGE_FORMAT_NONE && - image->format == MESA_FORMAT_NONE) { - free(image); - return NULL; - } - - image->internal_format = _mesa_get_format_base_format(image->format); - image->driScrnPriv = screen->driScrnPriv; - image->loader_private = loaderPrivate; - - return image; -} - -/** - * Sets up a DRIImage structure to point to a slice out of a miptree. - */ -static void -brw_setup_image_from_mipmap_tree(struct brw_context *brw, __DRIimage *image, - struct brw_mipmap_tree *mt, GLuint level, - GLuint zoffset) -{ - brw_miptree_make_shareable(brw, mt); - - brw_miptree_check_level_layer(mt, level, zoffset); - - image->width = minify(mt->surf.phys_level0_sa.width, - level - mt->first_level); - image->height = minify(mt->surf.phys_level0_sa.height, - level - mt->first_level); - image->pitch = mt->surf.row_pitch_B; - - image->offset = brw_miptree_get_tile_offsets(mt, level, zoffset, - &image->tile_x, - &image->tile_y); - - brw_bo_unreference(image->bo); - image->bo = mt->bo; - brw_bo_reference(mt->bo); -} - -static __DRIimage * -brw_create_image_from_name(__DRIscreen *dri_screen, - int width, int height, int format, - int name, int pitch, void *loaderPrivate) -{ - struct brw_screen *screen = dri_screen->driverPrivate; - __DRIimage *image; - int cpp; - - image = brw_allocate_image(screen, format, loaderPrivate); - if (image == NULL) - return NULL; - - if (image->format == MESA_FORMAT_NONE) - cpp = 1; - else - cpp = _mesa_get_format_bytes(image->format); - - image->width = width; - image->height = height; - image->pitch = pitch * cpp; - image->bo = brw_bo_gem_create_from_name(screen->bufmgr, "image", - name); - if (!image->bo) { - free(image); - return NULL; - } - image->modifier = tiling_to_modifier(image->bo->tiling_mode); - - return image; -} - -static __DRIimage * -brw_create_image_from_renderbuffer(__DRIcontext *context, - int renderbuffer, void *loaderPrivate) -{ - __DRIimage *image; - struct brw_context *brw = context->driverPrivate; - struct gl_context *ctx = &brw->ctx; - struct gl_renderbuffer *rb; - struct brw_renderbuffer *irb; - - rb = _mesa_lookup_renderbuffer(ctx, renderbuffer); - if (!rb) { - _mesa_error(ctx, GL_INVALID_OPERATION, "glRenderbufferExternalMESA"); - return NULL; - } - - irb = brw_renderbuffer(rb); - brw_miptree_make_shareable(brw, irb->mt); - image = calloc(1, sizeof *image); - if (image == NULL) - return NULL; - - image->internal_format = rb->InternalFormat; - image->format = rb->Format; - image->modifier = tiling_to_modifier( - isl_tiling_to_i915_tiling(irb->mt->surf.tiling)); - image->offset = 0; - image->driScrnPriv = context->driScreenPriv; - image->loader_private = loaderPrivate; - brw_bo_unreference(image->bo); - image->bo = irb->mt->bo; - brw_bo_reference(irb->mt->bo); - image->width = rb->Width; - image->height = rb->Height; - image->pitch = irb->mt->surf.row_pitch_B; - image->dri_format = driGLFormatToImageFormat(image->format); - image->has_depthstencil = irb->mt->stencil_mt? true : false; - - rb->NeedsFinishRenderTexture = true; - return image; -} - -static __DRIimage * -brw_create_image_from_texture(__DRIcontext *context, int target, - unsigned texture, int zoffset, - int level, - unsigned *error, - void *loaderPrivate) -{ - __DRIimage *image; - struct brw_context *brw = context->driverPrivate; - struct gl_texture_object *obj; - struct brw_texture_object *iobj; - GLuint face = 0; - - obj = _mesa_lookup_texture(&brw->ctx, texture); - if (!obj || obj->Target != target) { - *error = __DRI_IMAGE_ERROR_BAD_PARAMETER; - return NULL; - } - - if (target == GL_TEXTURE_CUBE_MAP) - face = zoffset; - - _mesa_test_texobj_completeness(&brw->ctx, obj); - iobj = brw_texture_object(obj); - if (!obj->_BaseComplete || (level > 0 && !obj->_MipmapComplete)) { - *error = __DRI_IMAGE_ERROR_BAD_PARAMETER; - return NULL; - } - - if (level < obj->Attrib.BaseLevel || level > obj->_MaxLevel) { - *error = __DRI_IMAGE_ERROR_BAD_MATCH; - return NULL; - } - - if (target == GL_TEXTURE_3D && obj->Image[face][level]->Depth < zoffset) { - *error = __DRI_IMAGE_ERROR_BAD_MATCH; - return NULL; - } - image = calloc(1, sizeof *image); - if (image == NULL) { - *error = __DRI_IMAGE_ERROR_BAD_ALLOC; - return NULL; - } - - image->internal_format = obj->Image[face][level]->InternalFormat; - image->format = obj->Image[face][level]->TexFormat; - image->modifier = tiling_to_modifier( - isl_tiling_to_i915_tiling(iobj->mt->surf.tiling)); - image->driScrnPriv = context->driScreenPriv; - image->loader_private = loaderPrivate; - brw_setup_image_from_mipmap_tree(brw, image, iobj->mt, level, zoffset); - image->dri_format = driGLFormatToImageFormat(image->format); - image->has_depthstencil = iobj->mt->stencil_mt? true : false; - image->planar_format = iobj->planar_format; - if (image->dri_format == __DRI_IMAGE_FORMAT_NONE) { - *error = __DRI_IMAGE_ERROR_BAD_PARAMETER; - free(image); - return NULL; - } - - *error = __DRI_IMAGE_ERROR_SUCCESS; - return image; -} - -static void -brw_destroy_image(__DRIimage *image) -{ - const __DRIscreen * driScreen = image->driScrnPriv; - const __DRIimageLoaderExtension *imgLoader = driScreen->image.loader; - const __DRIdri2LoaderExtension *dri2Loader = driScreen->dri2.loader; - - if (imgLoader && imgLoader->base.version >= 4 && - imgLoader->destroyLoaderImageState) { - imgLoader->destroyLoaderImageState(image->loader_private); - } else if (dri2Loader && dri2Loader->base.version >= 5 && - dri2Loader->destroyLoaderImageState) { - dri2Loader->destroyLoaderImageState(image->loader_private); - } - - brw_bo_unreference(image->bo); - free(image); -} - -enum modifier_priority { - MODIFIER_PRIORITY_INVALID = 0, - MODIFIER_PRIORITY_LINEAR, - MODIFIER_PRIORITY_X, - MODIFIER_PRIORITY_Y, - MODIFIER_PRIORITY_Y_CCS, -}; - -const uint64_t priority_to_modifier[] = { - [MODIFIER_PRIORITY_INVALID] = DRM_FORMAT_MOD_INVALID, - [MODIFIER_PRIORITY_LINEAR] = DRM_FORMAT_MOD_LINEAR, - [MODIFIER_PRIORITY_X] = I915_FORMAT_MOD_X_TILED, - [MODIFIER_PRIORITY_Y] = I915_FORMAT_MOD_Y_TILED, - [MODIFIER_PRIORITY_Y_CCS] = I915_FORMAT_MOD_Y_TILED_CCS, -}; - -static uint64_t -select_best_modifier(struct intel_device_info *devinfo, - int dri_format, - unsigned use, - const uint64_t *modifiers, - const unsigned count) -{ - enum modifier_priority prio = MODIFIER_PRIORITY_INVALID; - - for (int i = 0; i < count; i++) { - if (!modifier_is_supported(devinfo, NULL, dri_format, use, modifiers[i])) - continue; - - switch (modifiers[i]) { - case I915_FORMAT_MOD_Y_TILED_CCS: - prio = MAX2(prio, MODIFIER_PRIORITY_Y_CCS); - break; - case I915_FORMAT_MOD_Y_TILED: - prio = MAX2(prio, MODIFIER_PRIORITY_Y); - break; - case I915_FORMAT_MOD_X_TILED: - prio = MAX2(prio, MODIFIER_PRIORITY_X); - break; - case DRM_FORMAT_MOD_LINEAR: - prio = MAX2(prio, MODIFIER_PRIORITY_LINEAR); - break; - case DRM_FORMAT_MOD_INVALID: - default: - break; - } - } - - return priority_to_modifier[prio]; -} - -static __DRIimage * -brw_create_image_common(__DRIscreen *dri_screen, - int width, int height, int format, - unsigned int use, - const uint64_t *modifiers, - unsigned count, - void *loaderPrivate) -{ - __DRIimage *image; - struct brw_screen *screen = dri_screen->driverPrivate; - uint64_t modifier = DRM_FORMAT_MOD_INVALID; - bool ok; - - if (use & __DRI_IMAGE_USE_CURSOR) { - if (width != 64 || height != 64) - return NULL; - modifier = DRM_FORMAT_MOD_LINEAR; - } - - if (use & __DRI_IMAGE_USE_LINEAR) - modifier = DRM_FORMAT_MOD_LINEAR; - - if (modifier == DRM_FORMAT_MOD_INVALID) { - if (modifiers) { - /* User requested specific modifiers */ - modifier = select_best_modifier(&screen->devinfo, format, use, - modifiers, count); - if (modifier == DRM_FORMAT_MOD_INVALID) - return NULL; - } else { - /* Historically, X-tiled was the default, and so lack of modifier means - * X-tiled. - */ - modifier = I915_FORMAT_MOD_X_TILED; - } - } - - image = brw_allocate_image(screen, format, loaderPrivate); - if (image == NULL) - return NULL; - - const struct isl_drm_modifier_info *mod_info = - isl_drm_modifier_get_info(modifier); - - struct isl_surf surf; - ok = isl_surf_init(&screen->isl_dev, &surf, - .dim = ISL_SURF_DIM_2D, - .format = brw_isl_format_for_mesa_format(image->format), - .width = width, - .height = height, - .depth = 1, - .levels = 1, - .array_len = 1, - .samples = 1, - .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT | - ISL_SURF_USAGE_TEXTURE_BIT | - ISL_SURF_USAGE_STORAGE_BIT | - ((use & __DRI_IMAGE_USE_SCANOUT) ? - ISL_SURF_USAGE_DISPLAY_BIT : 0), - .tiling_flags = (1 << mod_info->tiling)); - assert(ok); - if (!ok) { - free(image); - return NULL; - } - - struct isl_surf aux_surf = {0,}; - if (mod_info->aux_usage == ISL_AUX_USAGE_CCS_E) { - ok = isl_surf_get_ccs_surf(&screen->isl_dev, &surf, NULL, &aux_surf, 0); - if (!ok) { - free(image); - return NULL; - } - } else { - assert(mod_info->aux_usage == ISL_AUX_USAGE_NONE); - aux_surf.size_B = 0; - } - - /* We request that the bufmgr zero the buffer for us for two reasons: - * - * 1) If a buffer gets re-used from the pool, we don't want to leak random - * garbage from our process to some other. - * - * 2) For images with CCS_E, we want to ensure that the CCS starts off in - * a valid state. A CCS value of 0 indicates that the given block is - * in the pass-through state which is what we want. - */ - image->bo = brw_bo_alloc_tiled(screen->bufmgr, "image", - surf.size_B + aux_surf.size_B, - BRW_MEMZONE_OTHER, - isl_tiling_to_i915_tiling(mod_info->tiling), - surf.row_pitch_B, BO_ALLOC_ZEROED); - if (image->bo == NULL) { - free(image); - return NULL; - } - image->width = width; - image->height = height; - image->pitch = surf.row_pitch_B; - image->modifier = modifier; - - if (aux_surf.size_B) { - image->aux_offset = surf.size_B; - image->aux_pitch = aux_surf.row_pitch_B; - image->aux_size = aux_surf.size_B; - } - - return image; -} - -static __DRIimage * -brw_create_image(__DRIscreen *dri_screen, - int width, int height, int format, - unsigned int use, - void *loaderPrivate) -{ - return brw_create_image_common(dri_screen, width, height, format, use, - NULL, 0, loaderPrivate); -} - -static void * -brw_map_image(__DRIcontext *context, __DRIimage *image, - int x0, int y0, int width, int height, - unsigned int flags, int *stride, void **map_info) -{ - struct brw_context *brw = NULL; - struct brw_bo *bo = NULL; - void *raw_data = NULL; - GLuint pix_w = 1; - GLuint pix_h = 1; - GLint pix_bytes = 1; - - if (!context || !image || !stride || !map_info || *map_info) - return NULL; - - if (x0 < 0 || x0 >= image->width || width > image->width - x0) - return NULL; - - if (y0 < 0 || y0 >= image->height || height > image->height - y0) - return NULL; - - if (flags & MAP_INTERNAL_MASK) - return NULL; - - brw = context->driverPrivate; - bo = image->bo; - - assert(brw); - assert(bo); - - /* DRI flags and GL_MAP.*_BIT flags are the same, so just pass them on. */ - raw_data = brw_bo_map(brw, bo, flags); - if (!raw_data) - return NULL; - - _mesa_get_format_block_size(image->format, &pix_w, &pix_h); - pix_bytes = _mesa_get_format_bytes(image->format); - - assert(pix_w); - assert(pix_h); - assert(pix_bytes > 0); - - raw_data += (x0 / pix_w) * pix_bytes + (y0 / pix_h) * image->pitch; - - brw_bo_reference(bo); - - *stride = image->pitch; - *map_info = bo; - - return raw_data; -} - -static void -brw_unmap_image(UNUSED __DRIcontext *context, UNUSED __DRIimage *image, - void *map_info) -{ - struct brw_bo *bo = map_info; - - brw_bo_unmap(bo); - brw_bo_unreference(bo); -} - -static __DRIimage * -brw_create_image_with_modifiers(__DRIscreen *dri_screen, - int width, int height, int format, - const uint64_t *modifiers, - const unsigned count, - void *loaderPrivate) -{ - return brw_create_image_common(dri_screen, width, height, format, 0, - modifiers, count, loaderPrivate); -} - -static __DRIimage * -brw_create_image_with_modifiers2(__DRIscreen *dri_screen, - int width, int height, int format, - const uint64_t *modifiers, - const unsigned count, unsigned int use, - void *loaderPrivate) -{ - return brw_create_image_common(dri_screen, width, height, format, use, - modifiers, count, loaderPrivate); -} - -static GLboolean -brw_query_image(__DRIimage *image, int attrib, int *value) -{ - switch (attrib) { - case __DRI_IMAGE_ATTRIB_STRIDE: - *value = image->pitch; - return true; - case __DRI_IMAGE_ATTRIB_HANDLE: { - __DRIscreen *dri_screen = image->screen->driScrnPriv; - uint32_t handle; - if (brw_bo_export_gem_handle_for_device(image->bo, - dri_screen->fd, - &handle)) - return false; - *value = handle; - return true; - } - case __DRI_IMAGE_ATTRIB_NAME: - return !brw_bo_flink(image->bo, (uint32_t *) value); - case __DRI_IMAGE_ATTRIB_FORMAT: - *value = image->dri_format; - return true; - case __DRI_IMAGE_ATTRIB_WIDTH: - *value = image->width; - return true; - case __DRI_IMAGE_ATTRIB_HEIGHT: - *value = image->height; - return true; - case __DRI_IMAGE_ATTRIB_COMPONENTS: - if (image->planar_format == NULL) - return false; - *value = image->planar_format->components; - return true; - case __DRI_IMAGE_ATTRIB_FD: - return !brw_bo_gem_export_to_prime(image->bo, value); - case __DRI_IMAGE_ATTRIB_FOURCC: - return brw_image_get_fourcc(image, value); - case __DRI_IMAGE_ATTRIB_NUM_PLANES: - if (isl_drm_modifier_has_aux(image->modifier)) { - assert(!image->planar_format || image->planar_format->nplanes == 1); - *value = 2; - } else if (image->planar_format) { - *value = image->planar_format->nplanes; - } else { - *value = 1; - } - return true; - case __DRI_IMAGE_ATTRIB_OFFSET: - *value = image->offset; - return true; - case __DRI_IMAGE_ATTRIB_MODIFIER_LOWER: - *value = (image->modifier & 0xffffffff); - return true; - case __DRI_IMAGE_ATTRIB_MODIFIER_UPPER: - *value = ((image->modifier >> 32) & 0xffffffff); - return true; - - default: - return false; - } -} - -static GLboolean -brw_query_format_modifier_attribs(__DRIscreen *dri_screen, - uint32_t fourcc, uint64_t modifier, - int attrib, uint64_t *value) -{ - struct brw_screen *screen = dri_screen->driverPrivate; - const struct brw_image_format *f = brw_image_format_lookup(fourcc); - - if (!modifier_is_supported(&screen->devinfo, f, 0, 0, modifier)) - return false; - - switch (attrib) { - case __DRI_IMAGE_FORMAT_MODIFIER_ATTRIB_PLANE_COUNT: - *value = isl_drm_modifier_has_aux(modifier) ? 2 : f->nplanes; - return true; - - default: - return false; - } -} - -static __DRIimage * -brw_dup_image(__DRIimage *orig_image, void *loaderPrivate) -{ - __DRIimage *image; - - image = calloc(1, sizeof *image); - if (image == NULL) - return NULL; - - brw_bo_reference(orig_image->bo); - image->screen = orig_image->screen; - image->bo = orig_image->bo; - image->internal_format = orig_image->internal_format; - image->planar_format = orig_image->planar_format; - image->dri_format = orig_image->dri_format; - image->format = orig_image->format; - image->modifier = orig_image->modifier; - image->offset = orig_image->offset; - image->width = orig_image->width; - image->height = orig_image->height; - image->pitch = orig_image->pitch; - image->tile_x = orig_image->tile_x; - image->tile_y = orig_image->tile_y; - image->has_depthstencil = orig_image->has_depthstencil; - image->driScrnPriv = orig_image->driScrnPriv; - image->loader_private = loaderPrivate; - image->aux_offset = orig_image->aux_offset; - image->aux_pitch = orig_image->aux_pitch; - - memcpy(image->strides, orig_image->strides, sizeof(image->strides)); - memcpy(image->offsets, orig_image->offsets, sizeof(image->offsets)); - - return image; -} - -static GLboolean -brw_validate_usage(__DRIimage *image, unsigned int use) -{ - if (use & __DRI_IMAGE_USE_CURSOR) { - if (image->width != 64 || image->height != 64) - return GL_FALSE; - } - - return GL_TRUE; -} - -static __DRIimage * -brw_create_image_from_names(__DRIscreen *dri_screen, - int width, int height, int fourcc, - int *names, int num_names, - int *strides, int *offsets, - void *loaderPrivate) -{ - const struct brw_image_format *f = NULL; - __DRIimage *image; - int i, index; - - if (dri_screen == NULL || names == NULL || num_names != 1) - return NULL; - - f = brw_image_format_lookup(fourcc); - if (f == NULL) - return NULL; - - image = brw_create_image_from_name(dri_screen, width, height, - __DRI_IMAGE_FORMAT_NONE, - names[0], strides[0], - loaderPrivate); - - if (image == NULL) - return NULL; - - image->planar_format = f; - for (i = 0; i < f->nplanes; i++) { - index = f->planes[i].buffer_index; - image->offsets[index] = offsets[index]; - image->strides[index] = strides[index]; - } - - return image; -} - -static __DRIimage * -brw_create_image_from_fds_common(__DRIscreen *dri_screen, - int width, int height, int fourcc, - uint64_t modifier, int *fds, int num_fds, - int *strides, int *offsets, - void *loaderPrivate) -{ - struct brw_screen *screen = dri_screen->driverPrivate; - const struct brw_image_format *f; - __DRIimage *image; - int i, index; - bool ok; - - if (fds == NULL || num_fds < 1) - return NULL; - - f = brw_image_format_lookup(fourcc); - if (f == NULL) - return NULL; - - if (modifier != DRM_FORMAT_MOD_INVALID && - !modifier_is_supported(&screen->devinfo, f, 0, 0, modifier)) - return NULL; - - if (f->nplanes == 1) - image = brw_allocate_image(screen, f->planes[0].dri_format, - loaderPrivate); - else - image = brw_allocate_image(screen, __DRI_IMAGE_FORMAT_NONE, - loaderPrivate); - - if (image == NULL) - return NULL; - - image->width = width; - image->height = height; - image->pitch = strides[0]; - - image->planar_format = f; - - if (modifier != DRM_FORMAT_MOD_INVALID) { - const struct isl_drm_modifier_info *mod_info = - isl_drm_modifier_get_info(modifier); - uint32_t tiling = isl_tiling_to_i915_tiling(mod_info->tiling); - image->bo = brw_bo_gem_create_from_prime_tiled(screen->bufmgr, fds[0], - tiling, strides[0]); - } else { - image->bo = brw_bo_gem_create_from_prime(screen->bufmgr, fds[0]); - } - - if (image->bo == NULL) { - free(image); - return NULL; - } - - /* We only support all planes from the same bo. - * brw_bo_gem_create_from_prime() should return the same pointer for all - * fds received here */ - for (i = 1; i < num_fds; i++) { - struct brw_bo *aux = brw_bo_gem_create_from_prime(screen->bufmgr, fds[i]); - brw_bo_unreference(aux); - if (aux != image->bo) { - brw_bo_unreference(image->bo); - free(image); - return NULL; - } - } - - if (modifier != DRM_FORMAT_MOD_INVALID) - image->modifier = modifier; - else - image->modifier = tiling_to_modifier(image->bo->tiling_mode); - - const struct isl_drm_modifier_info *mod_info = - isl_drm_modifier_get_info(image->modifier); - - int size = 0; - struct isl_surf surf; - for (i = 0; i < f->nplanes; i++) { - index = f->planes[i].buffer_index; - image->offsets[index] = offsets[index]; - image->strides[index] = strides[index]; - - mesa_format format = driImageFormatToGLFormat(f->planes[i].dri_format); - /* The images we will create are actually based on the RGBA non-sRGB - * version of the format. - */ - format = _mesa_format_fallback_rgbx_to_rgba(format); - format = _mesa_get_srgb_format_linear(format); - - ok = isl_surf_init(&screen->isl_dev, &surf, - .dim = ISL_SURF_DIM_2D, - .format = brw_isl_format_for_mesa_format(format), - .width = image->width >> f->planes[i].width_shift, - .height = image->height >> f->planes[i].height_shift, - .depth = 1, - .levels = 1, - .array_len = 1, - .samples = 1, - .row_pitch_B = strides[index], - .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT | - ISL_SURF_USAGE_TEXTURE_BIT | - ISL_SURF_USAGE_STORAGE_BIT, - .tiling_flags = (1 << mod_info->tiling)); - if (!ok) { - brw_bo_unreference(image->bo); - free(image); - return NULL; - } - - const int end = offsets[index] + surf.size_B; - if (size < end) - size = end; - } - - if (mod_info->aux_usage == ISL_AUX_USAGE_CCS_E) { - /* Even though we initialize surf in the loop above, we know that - * anything with CCS_E will have exactly one plane so surf is properly - * initialized when we get here. - */ - assert(f->nplanes == 1); - - image->aux_offset = offsets[1]; - image->aux_pitch = strides[1]; - - /* Scanout hardware requires that the CCS be placed after the main - * surface in memory. We consider any CCS that is placed any earlier in - * memory to be invalid and reject it. - * - * At some point in the future, this restriction may be relaxed if the - * hardware becomes less strict but we may need a new modifier for that. - */ - assert(size > 0); - if (image->aux_offset < size) { - brw_bo_unreference(image->bo); - free(image); - return NULL; - } - - struct isl_surf aux_surf = {0,}; - ok = isl_surf_get_ccs_surf(&screen->isl_dev, &surf, NULL, &aux_surf, - image->aux_pitch); - if (!ok) { - brw_bo_unreference(image->bo); - free(image); - return NULL; - } - - image->aux_size = aux_surf.size_B; - - const int end = image->aux_offset + aux_surf.size_B; - if (size < end) - size = end; - } else { - assert(mod_info->aux_usage == ISL_AUX_USAGE_NONE); - } - - /* Check that the requested image actually fits within the BO. 'size' - * is already relative to the offsets, so we don't need to add that. */ - if (image->bo->size == 0) { - image->bo->size = size; - } else if (size > image->bo->size) { - brw_bo_unreference(image->bo); - free(image); - return NULL; - } - - if (f->nplanes == 1) { - image->offset = image->offsets[0]; - brw_image_warn_if_unaligned(image, __func__); - } - - return image; -} - -static __DRIimage * -brw_create_image_from_fds(__DRIscreen *dri_screen, - int width, int height, int fourcc, - int *fds, int num_fds, int *strides, int *offsets, - void *loaderPrivate) -{ - return brw_create_image_from_fds_common(dri_screen, width, height, fourcc, - DRM_FORMAT_MOD_INVALID, - fds, num_fds, strides, offsets, - loaderPrivate); -} - -static __DRIimage * -brw_create_image_from_dma_bufs2(__DRIscreen *dri_screen, - int width, int height, - int fourcc, uint64_t modifier, - int *fds, int num_fds, - int *strides, int *offsets, - enum __DRIYUVColorSpace yuv_color_space, - enum __DRISampleRange sample_range, - enum __DRIChromaSiting horizontal_siting, - enum __DRIChromaSiting vertical_siting, - unsigned *error, - void *loaderPrivate) -{ - __DRIimage *image; - const struct brw_image_format *f = brw_image_format_lookup(fourcc); - - if (!f) { - *error = __DRI_IMAGE_ERROR_BAD_MATCH; - return NULL; - } - - image = brw_create_image_from_fds_common(dri_screen, width, height, - fourcc, modifier, - fds, num_fds, strides, offsets, - loaderPrivate); - - /* - * Invalid parameters and any inconsistencies between are assumed to be - * checked by the caller. Therefore besides unsupported formats one can fail - * only in allocation. - */ - if (!image) { - *error = __DRI_IMAGE_ERROR_BAD_ALLOC; - return NULL; - } - - image->yuv_color_space = yuv_color_space; - image->sample_range = sample_range; - image->horizontal_siting = horizontal_siting; - image->vertical_siting = vertical_siting; - image->imported_dmabuf = true; - - *error = __DRI_IMAGE_ERROR_SUCCESS; - return image; -} - -static __DRIimage * -brw_create_image_from_dma_bufs(__DRIscreen *dri_screen, - int width, int height, int fourcc, - int *fds, int num_fds, - int *strides, int *offsets, - enum __DRIYUVColorSpace yuv_color_space, - enum __DRISampleRange sample_range, - enum __DRIChromaSiting horizontal_siting, - enum __DRIChromaSiting vertical_siting, - unsigned *error, - void *loaderPrivate) -{ - return brw_create_image_from_dma_bufs2(dri_screen, width, height, - fourcc, DRM_FORMAT_MOD_INVALID, - fds, num_fds, strides, offsets, - yuv_color_space, - sample_range, - horizontal_siting, - vertical_siting, - error, - loaderPrivate); -} - -static bool -brw_image_format_is_supported(const struct intel_device_info *devinfo, - const struct brw_image_format *fmt) -{ - /* Currently, all formats with an brw_image_format are available on all - * platforms so there's really nothing to check there. - */ - -#ifndef NDEBUG - if (fmt->nplanes == 1) { - mesa_format format = driImageFormatToGLFormat(fmt->planes[0].dri_format); - /* The images we will create are actually based on the RGBA non-sRGB - * version of the format. - */ - format = _mesa_format_fallback_rgbx_to_rgba(format); - format = _mesa_get_srgb_format_linear(format); - enum isl_format isl_format = brw_isl_format_for_mesa_format(format); - assert(isl_format_supports_rendering(devinfo, isl_format)); - } -#endif - - return true; -} - -static GLboolean -brw_query_dma_buf_formats(__DRIscreen *_screen, int max, - int *formats, int *count) -{ - struct brw_screen *screen = _screen->driverPrivate; - int num_formats = 0, i; - - for (i = 0; i < ARRAY_SIZE(brw_image_formats); i++) { - /* These formats are valid DRI formats but do not exist in drm_fourcc.h - * in the Linux kernel. We don't want to accidentally advertise them - * them through the EGL layer. - */ - if (brw_image_formats[i].fourcc == __DRI_IMAGE_FOURCC_SARGB8888 || - brw_image_formats[i].fourcc == __DRI_IMAGE_FOURCC_SABGR8888 || - brw_image_formats[i].fourcc == __DRI_IMAGE_FOURCC_SXRGB8888) - continue; - - if (!brw_image_format_is_supported(&screen->devinfo, - &brw_image_formats[i])) - continue; - - num_formats++; - if (max == 0) - continue; - - formats[num_formats - 1] = brw_image_formats[i].fourcc; - if (num_formats >= max) - break; - } - - *count = num_formats; - return true; -} - -static GLboolean -brw_query_dma_buf_modifiers(__DRIscreen *_screen, int fourcc, int max, - uint64_t *modifiers, - unsigned int *external_only, - int *count) -{ - struct brw_screen *screen = _screen->driverPrivate; - const struct brw_image_format *f; - int num_mods = 0, i; - - f = brw_image_format_lookup(fourcc); - if (f == NULL) - return false; - - if (!brw_image_format_is_supported(&screen->devinfo, f)) - return false; - - for (i = 0; i < ARRAY_SIZE(supported_modifiers); i++) { - uint64_t modifier = supported_modifiers[i].modifier; - if (!modifier_is_supported(&screen->devinfo, f, 0, 0, modifier)) - continue; - - num_mods++; - if (max == 0) - continue; - - modifiers[num_mods - 1] = modifier; - if (num_mods >= max) - break; - } - - if (external_only != NULL) { - for (i = 0; i < num_mods && i < max; i++) { - if (f->components == __DRI_IMAGE_COMPONENTS_Y_U_V || - f->components == __DRI_IMAGE_COMPONENTS_Y_UV || - f->components == __DRI_IMAGE_COMPONENTS_AYUV || - f->components == __DRI_IMAGE_COMPONENTS_XYUV || - f->components == __DRI_IMAGE_COMPONENTS_Y_XUXV || - f->components == __DRI_IMAGE_COMPONENTS_Y_UXVX) { - external_only[i] = GL_TRUE; - } - else { - external_only[i] = GL_FALSE; - } - } - } - - *count = num_mods; - return true; -} - -static __DRIimage * -brw_from_planar(__DRIimage *parent, int plane, void *loaderPrivate) -{ - int width, height, offset, stride, size, dri_format; - __DRIimage *image; - - if (parent == NULL) - return NULL; - - width = parent->width; - height = parent->height; - - const struct brw_image_format *f = parent->planar_format; - - if (f && plane < f->nplanes) { - /* Use the planar format definition. */ - width >>= f->planes[plane].width_shift; - height >>= f->planes[plane].height_shift; - dri_format = f->planes[plane].dri_format; - int index = f->planes[plane].buffer_index; - offset = parent->offsets[index]; - stride = parent->strides[index]; - size = height * stride; - } else if (plane == 0) { - /* The only plane of a non-planar image: copy the parent definition - * directly. */ - dri_format = parent->dri_format; - offset = parent->offset; - stride = parent->pitch; - size = height * stride; - } else if (plane == 1 && parent->modifier != DRM_FORMAT_MOD_INVALID && - isl_drm_modifier_has_aux(parent->modifier)) { - /* Auxiliary plane */ - dri_format = parent->dri_format; - offset = parent->aux_offset; - stride = parent->aux_pitch; - size = parent->aux_size; - } else { - return NULL; - } - - if (offset + size > parent->bo->size) { - _mesa_warning(NULL, "intel_from_planar: subimage out of bounds"); - return NULL; - } - - image = brw_allocate_image(parent->screen, dri_format, loaderPrivate); - if (image == NULL) - return NULL; - - image->bo = parent->bo; - brw_bo_reference(parent->bo); - image->modifier = parent->modifier; - - image->width = width; - image->height = height; - image->pitch = stride; - image->offset = offset; - - brw_image_warn_if_unaligned(image, __func__); - - return image; -} - -static const __DRIimageExtension brwImageExtension = { - .base = { __DRI_IMAGE, 19 }, - - .createImageFromName = brw_create_image_from_name, - .createImageFromRenderbuffer = brw_create_image_from_renderbuffer, - .destroyImage = brw_destroy_image, - .createImage = brw_create_image, - .queryImage = brw_query_image, - .dupImage = brw_dup_image, - .validateUsage = brw_validate_usage, - .createImageFromNames = brw_create_image_from_names, - .fromPlanar = brw_from_planar, - .createImageFromTexture = brw_create_image_from_texture, - .createImageFromFds = brw_create_image_from_fds, - .createImageFromDmaBufs = brw_create_image_from_dma_bufs, - .blitImage = NULL, - .getCapabilities = NULL, - .mapImage = brw_map_image, - .unmapImage = brw_unmap_image, - .createImageWithModifiers = brw_create_image_with_modifiers, - .createImageFromDmaBufs2 = brw_create_image_from_dma_bufs2, - .queryDmaBufFormats = brw_query_dma_buf_formats, - .queryDmaBufModifiers = brw_query_dma_buf_modifiers, - .queryDmaBufFormatModifierAttribs = brw_query_format_modifier_attribs, - .createImageWithModifiers2 = brw_create_image_with_modifiers2, -}; - -static int -brw_query_renderer_integer(__DRIscreen *dri_screen, - int param, unsigned int *value) -{ - const struct brw_screen *const screen = - (struct brw_screen *) dri_screen->driverPrivate; - - switch (param) { - case __DRI2_RENDERER_VENDOR_ID: - value[0] = 0x8086; - return 0; - case __DRI2_RENDERER_DEVICE_ID: - value[0] = screen->deviceID; - return 0; - case __DRI2_RENDERER_ACCELERATED: - value[0] = 1; - return 0; - case __DRI2_RENDERER_VIDEO_MEMORY: { - /* Once a batch uses more than 75% of the maximum mappable size, we - * assume that there's some fragmentation, and we start doing extra - * flushing, etc. That's the big cliff apps will care about. - */ - const unsigned gpu_mappable_megabytes = - screen->aperture_threshold / (1024 * 1024); - - const long system_memory_pages = sysconf(_SC_PHYS_PAGES); - const long system_page_size = sysconf(_SC_PAGE_SIZE); - - if (system_memory_pages <= 0 || system_page_size <= 0) - return -1; - - const uint64_t system_memory_bytes = (uint64_t) system_memory_pages - * (uint64_t) system_page_size; - - const unsigned system_memory_megabytes = - (unsigned) (system_memory_bytes / (1024 * 1024)); - - value[0] = MIN2(system_memory_megabytes, gpu_mappable_megabytes); - return 0; - } - case __DRI2_RENDERER_UNIFIED_MEMORY_ARCHITECTURE: - value[0] = 1; - return 0; - case __DRI2_RENDERER_HAS_TEXTURE_3D: - value[0] = 1; - return 0; - case __DRI2_RENDERER_HAS_CONTEXT_PRIORITY: - value[0] = 0; - if (brw_hw_context_set_priority(screen->bufmgr, - 0, INTEL_CONTEXT_HIGH_PRIORITY) == 0) - value[0] |= __DRI2_RENDERER_HAS_CONTEXT_PRIORITY_HIGH; - if (brw_hw_context_set_priority(screen->bufmgr, - 0, INTEL_CONTEXT_LOW_PRIORITY) == 0) - value[0] |= __DRI2_RENDERER_HAS_CONTEXT_PRIORITY_LOW; - /* reset to default last, just in case */ - if (brw_hw_context_set_priority(screen->bufmgr, - 0, INTEL_CONTEXT_MEDIUM_PRIORITY) == 0) - value[0] |= __DRI2_RENDERER_HAS_CONTEXT_PRIORITY_MEDIUM; - return 0; - case __DRI2_RENDERER_HAS_FRAMEBUFFER_SRGB: - value[0] = 1; - return 0; - default: - return driQueryRendererIntegerCommon(dri_screen, param, value); - } - - return -1; -} - -static int -brw_query_renderer_string(__DRIscreen *dri_screen, - int param, const char **value) -{ - const struct brw_screen *screen = - (struct brw_screen *) dri_screen->driverPrivate; - - switch (param) { - case __DRI2_RENDERER_VENDOR_ID: - value[0] = brw_vendor_string; - return 0; - case __DRI2_RENDERER_DEVICE_ID: - value[0] = brw_get_renderer_string(screen); - return 0; - default: - break; - } - - return -1; -} - -static void -brw_set_cache_funcs(__DRIscreen *dri_screen, - __DRIblobCacheSet set, __DRIblobCacheGet get) -{ - const struct brw_screen *const screen = - (struct brw_screen *) dri_screen->driverPrivate; - - if (!screen->disk_cache) - return; - - disk_cache_set_callbacks(screen->disk_cache, set, get); -} - -static const __DRI2rendererQueryExtension brwRendererQueryExtension = { - .base = { __DRI2_RENDERER_QUERY, 1 }, - - .queryInteger = brw_query_renderer_integer, - .queryString = brw_query_renderer_string -}; - -static const __DRIrobustnessExtension dri2Robustness = { - .base = { __DRI2_ROBUSTNESS, 1 } -}; - -static const __DRI2blobExtension brwBlobExtension = { - .base = { __DRI2_BLOB, 1 }, - .set_cache_funcs = brw_set_cache_funcs -}; - -static const __DRImutableRenderBufferDriverExtension brwMutableRenderBufferExtension = { - .base = { __DRI_MUTABLE_RENDER_BUFFER_DRIVER, 1 }, -}; - -static const __DRIextension *screenExtensions[] = { - &brwTexBufferExtension.base, - &brwFenceExtension.base, - &brwFlushExtension.base, - &brwImageExtension.base, - &brwRendererQueryExtension.base, - &brwMutableRenderBufferExtension.base, - &dri2ConfigQueryExtension.base, - &dri2NoErrorExtension.base, - &brwBlobExtension.base, - NULL -}; - -static const __DRIextension *brwRobustScreenExtensions[] = { - &brwTexBufferExtension.base, - &brwFenceExtension.base, - &brwFlushExtension.base, - &brwImageExtension.base, - &brwRendererQueryExtension.base, - &brwMutableRenderBufferExtension.base, - &dri2ConfigQueryExtension.base, - &dri2Robustness.base, - &dri2NoErrorExtension.base, - &brwBlobExtension.base, - NULL -}; - -static int -brw_get_param(struct brw_screen *screen, int param, int *value) -{ - int ret = 0; - struct drm_i915_getparam gp; - - memset(&gp, 0, sizeof(gp)); - gp.param = param; - gp.value = value; - - if (drmIoctl(screen->fd, DRM_IOCTL_I915_GETPARAM, &gp) == -1) { - ret = -errno; - if (ret != -EINVAL) - _mesa_warning(NULL, "drm_i915_getparam: %d", ret); - } - - return ret; -} - -static bool -brw_get_boolean(struct brw_screen *screen, int param) -{ - int value = 0; - return (brw_get_param(screen, param, &value) == 0) && value; -} - -static int -brw_get_integer(struct brw_screen *screen, int param) -{ - int value = -1; - - if (brw_get_param(screen, param, &value) == 0) - return value; - - return -1; -} - -static void -brw_destroy_screen(__DRIscreen *sPriv) -{ - struct brw_screen *screen = sPriv->driverPrivate; - - brw_bufmgr_unref(screen->bufmgr); - driDestroyOptionInfo(&screen->optionCache); - - disk_cache_destroy(screen->disk_cache); - - ralloc_free(screen); - sPriv->driverPrivate = NULL; -} - - -/** - * Create a gl_framebuffer and attach it to __DRIdrawable::driverPrivate. - * - *_This implements driDriverAPI::createNewDrawable, which the DRI layer calls - * when creating a EGLSurface, GLXDrawable, or GLXPixmap. Despite the name, - * this does not allocate GPU memory. - */ -static GLboolean -brw_create_buffer(__DRIscreen *dri_screen, - __DRIdrawable *driDrawPriv, - const struct gl_config *mesaVis, GLboolean isPixmap) -{ - struct brw_renderbuffer *rb; - struct brw_screen *screen = (struct brw_screen *) - dri_screen->driverPrivate; - mesa_format rgbFormat; - unsigned num_samples = - brw_quantize_num_samples(screen, mesaVis->samples); - - if (isPixmap) - return false; - - struct gl_framebuffer *fb = CALLOC_STRUCT(gl_framebuffer); - if (!fb) - return false; - - _mesa_initialize_window_framebuffer(fb, mesaVis); - - if (screen->winsys_msaa_samples_override != -1) { - num_samples = screen->winsys_msaa_samples_override; - fb->Visual.samples = num_samples; - } - - if (mesaVis->redBits == 16 && mesaVis->alphaBits > 0 && mesaVis->floatMode) { - rgbFormat = MESA_FORMAT_RGBA_FLOAT16; - } else if (mesaVis->redBits == 16 && mesaVis->floatMode) { - rgbFormat = MESA_FORMAT_RGBX_FLOAT16; - } else if (mesaVis->redBits == 10 && mesaVis->alphaBits > 0) { - rgbFormat = mesaVis->redMask == 0x3ff00000 ? MESA_FORMAT_B10G10R10A2_UNORM - : MESA_FORMAT_R10G10B10A2_UNORM; - } else if (mesaVis->redBits == 10) { - rgbFormat = mesaVis->redMask == 0x3ff00000 ? MESA_FORMAT_B10G10R10X2_UNORM - : MESA_FORMAT_R10G10B10X2_UNORM; - } else if (mesaVis->redBits == 5) { - rgbFormat = mesaVis->redMask == 0x1f ? MESA_FORMAT_R5G6B5_UNORM - : MESA_FORMAT_B5G6R5_UNORM; - } else if (mesaVis->alphaBits == 0) { - rgbFormat = mesaVis->redMask == 0xff ? MESA_FORMAT_R8G8B8X8_SRGB - : MESA_FORMAT_B8G8R8X8_SRGB; - fb->Visual.sRGBCapable = true; - } else if (mesaVis->sRGBCapable) { - rgbFormat = mesaVis->redMask == 0xff ? MESA_FORMAT_R8G8B8A8_SRGB - : MESA_FORMAT_B8G8R8A8_SRGB; - fb->Visual.sRGBCapable = true; - } else { - rgbFormat = mesaVis->redMask == 0xff ? MESA_FORMAT_R8G8B8A8_SRGB - : MESA_FORMAT_B8G8R8A8_SRGB; - fb->Visual.sRGBCapable = true; - } - - /* mesaVis->sRGBCapable was set, user is asking for sRGB */ - bool srgb_cap_set = mesaVis->redBits >= 8 && mesaVis->sRGBCapable; - - /* setup the hardware-based renderbuffers */ - rb = brw_create_winsys_renderbuffer(screen, rgbFormat, num_samples); - _mesa_attach_and_own_rb(fb, BUFFER_FRONT_LEFT, &rb->Base.Base); - rb->need_srgb = srgb_cap_set; - - if (mesaVis->doubleBufferMode) { - rb = brw_create_winsys_renderbuffer(screen, rgbFormat, num_samples); - _mesa_attach_and_own_rb(fb, BUFFER_BACK_LEFT, &rb->Base.Base); - rb->need_srgb = srgb_cap_set; - } - - /* - * Assert here that the gl_config has an expected depth/stencil bit - * combination: one of d24/s8, d16/s0, d0/s0. (See brw_init_screen(), - * which constructs the advertised configs.) - */ - if (mesaVis->depthBits == 24) { - assert(mesaVis->stencilBits == 8); - - if (screen->devinfo.has_hiz_and_separate_stencil) { - rb = brw_create_private_renderbuffer(screen, - MESA_FORMAT_Z24_UNORM_X8_UINT, - num_samples); - _mesa_attach_and_own_rb(fb, BUFFER_DEPTH, &rb->Base.Base); - rb = brw_create_private_renderbuffer(screen, MESA_FORMAT_S_UINT8, - num_samples); - _mesa_attach_and_own_rb(fb, BUFFER_STENCIL, &rb->Base.Base); - } else { - /* - * Use combined depth/stencil. Note that the renderbuffer is - * attached to two attachment points. - */ - rb = brw_create_private_renderbuffer(screen, - MESA_FORMAT_Z24_UNORM_S8_UINT, - num_samples); - _mesa_attach_and_own_rb(fb, BUFFER_DEPTH, &rb->Base.Base); - _mesa_attach_and_reference_rb(fb, BUFFER_STENCIL, &rb->Base.Base); - } - } - else if (mesaVis->depthBits == 16) { - assert(mesaVis->stencilBits == 0); - rb = brw_create_private_renderbuffer(screen, MESA_FORMAT_Z_UNORM16, - num_samples); - _mesa_attach_and_own_rb(fb, BUFFER_DEPTH, &rb->Base.Base); - } - else { - assert(mesaVis->depthBits == 0); - assert(mesaVis->stencilBits == 0); - } - - /* now add any/all software-based renderbuffers we may need */ - _swrast_add_soft_renderbuffers(fb, - false, /* never sw color */ - false, /* never sw depth */ - false, /* never sw stencil */ - mesaVis->accumRedBits > 0, - false /* never sw alpha */); - driDrawPriv->driverPrivate = fb; - - return true; -} - -static void -brw_destroy_buffer(__DRIdrawable *driDrawPriv) -{ - struct gl_framebuffer *fb = driDrawPriv->driverPrivate; - - _mesa_reference_framebuffer(&fb, NULL); -} - -static bool -brw_init_bufmgr(struct brw_screen *screen) -{ - __DRIscreen *dri_screen = screen->driScrnPriv; - - bool bo_reuse = false; - int bo_reuse_mode = driQueryOptioni(&screen->optionCache, "bo_reuse"); - switch (bo_reuse_mode) { - case DRI_CONF_BO_REUSE_DISABLED: - break; - case DRI_CONF_BO_REUSE_ALL: - bo_reuse = true; - break; - } - - screen->bufmgr = brw_bufmgr_get_for_fd(&screen->devinfo, dri_screen->fd, bo_reuse); - if (screen->bufmgr == NULL) { - fprintf(stderr, "[%s:%u] Error initializing buffer manager.\n", - __func__, __LINE__); - return false; - } - screen->fd = brw_bufmgr_get_fd(screen->bufmgr); - - if (!brw_get_boolean(screen, I915_PARAM_HAS_EXEC_NO_RELOC)) { - fprintf(stderr, "[%s: %u] Kernel 3.9 required.\n", __func__, __LINE__); - return false; - } - - return true; -} - -static int -brw_detect_timestamp(struct brw_screen *screen) -{ - uint64_t dummy = 0, last = 0; - int upper, lower, loops; - - /* On 64bit systems, some old kernels trigger a hw bug resulting in the - * TIMESTAMP register being shifted and the low 32bits always zero. - * - * More recent kernels offer an interface to read the full 36bits - * everywhere. - */ - if (brw_reg_read(screen->bufmgr, TIMESTAMP | 1, &dummy) == 0) - return 3; - - /* Determine if we have a 32bit or 64bit kernel by inspecting the - * upper 32bits for a rapidly changing timestamp. - */ - if (brw_reg_read(screen->bufmgr, TIMESTAMP, &last)) - return 0; - - upper = lower = 0; - for (loops = 0; loops < 10; loops++) { - /* The TIMESTAMP should change every 80ns, so several round trips - * through the kernel should be enough to advance it. - */ - if (brw_reg_read(screen->bufmgr, TIMESTAMP, &dummy)) - return 0; - - upper += (dummy >> 32) != (last >> 32); - if (upper > 1) /* beware 32bit counter overflow */ - return 2; /* upper dword holds the low 32bits of the timestamp */ - - lower += (dummy & 0xffffffff) != (last & 0xffffffff); - if (lower > 1) - return 1; /* timestamp is unshifted */ - - last = dummy; - } - - /* No advancement? No timestamp! */ - return 0; -} - - /** - * Test if we can use MI_LOAD_REGISTER_MEM from an untrusted batchbuffer. - * - * Some combinations of hardware and kernel versions allow this feature, - * while others don't. Instead of trying to enumerate every case, just - * try and write a register and see if works. - */ -static bool -brw_detect_pipelined_register(struct brw_screen *screen, - int reg, uint32_t expected_value, bool reset) -{ - if (screen->devinfo.no_hw) - return false; - - struct brw_bo *results, *bo; - uint32_t *batch; - uint32_t offset = 0; - void *map; - bool success = false; - - /* Create a zero'ed temporary buffer for reading our results */ - results = brw_bo_alloc(screen->bufmgr, "registers", 4096, BRW_MEMZONE_OTHER); - if (results == NULL) - goto err; - - bo = brw_bo_alloc(screen->bufmgr, "batchbuffer", 4096, BRW_MEMZONE_OTHER); - if (bo == NULL) - goto err_results; - - map = brw_bo_map(NULL, bo, MAP_WRITE); - if (!map) - goto err_batch; - - batch = map; - - /* Write the register. */ - *batch++ = MI_LOAD_REGISTER_IMM | (3 - 2); - *batch++ = reg; - *batch++ = expected_value; - - /* Save the register's value back to the buffer. */ - *batch++ = MI_STORE_REGISTER_MEM | (3 - 2); - *batch++ = reg; - struct drm_i915_gem_relocation_entry reloc = { - .offset = (char *) batch - (char *) map, - .delta = offset * sizeof(uint32_t), - .target_handle = results->gem_handle, - .read_domains = I915_GEM_DOMAIN_INSTRUCTION, - .write_domain = I915_GEM_DOMAIN_INSTRUCTION, - }; - *batch++ = reloc.presumed_offset + reloc.delta; - - /* And afterwards clear the register */ - if (reset) { - *batch++ = MI_LOAD_REGISTER_IMM | (3 - 2); - *batch++ = reg; - *batch++ = 0; - } - - *batch++ = MI_BATCH_BUFFER_END; - - struct drm_i915_gem_exec_object2 exec_objects[2] = { - { - .handle = results->gem_handle, - }, - { - .handle = bo->gem_handle, - .relocation_count = 1, - .relocs_ptr = (uintptr_t) &reloc, - } - }; - - struct drm_i915_gem_execbuffer2 execbuf = { - .buffers_ptr = (uintptr_t) exec_objects, - .buffer_count = 2, - .batch_len = ALIGN((char *) batch - (char *) map, 8), - .flags = I915_EXEC_RENDER, - }; - - /* Don't bother with error checking - if the execbuf fails, the - * value won't be written and we'll just report that there's no access. - */ - drmIoctl(screen->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf); - - /* Check whether the value got written. */ - void *results_map = brw_bo_map(NULL, results, MAP_READ); - if (results_map) { - success = *((uint32_t *)results_map + offset) == expected_value; - brw_bo_unmap(results); - } - -err_batch: - brw_bo_unreference(bo); -err_results: - brw_bo_unreference(results); -err: - return success; -} - -static bool -brw_detect_pipelined_so(struct brw_screen *screen) -{ - const struct intel_device_info *devinfo = &screen->devinfo; - - /* Supposedly, Broadwell just works. */ - if (devinfo->ver >= 8) - return true; - - if (devinfo->ver <= 6) - return false; - - /* See the big explanation about command parser versions below */ - if (screen->cmd_parser_version >= (devinfo->verx10 == 75 ? 7 : 2)) - return true; - - /* We use SO_WRITE_OFFSET0 since you're supposed to write it (unlike the - * statistics registers), and we already reset it to zero before using it. - */ - return brw_detect_pipelined_register(screen, - GFX7_SO_WRITE_OFFSET(0), - 0x1337d0d0, - false); -} - -/** - * Return array of MSAA modes supported by the hardware. The array is - * zero-terminated and sorted in decreasing order. - */ -const int* -brw_supported_msaa_modes(const struct brw_screen *screen) -{ - static const int gfx9_modes[] = {16, 8, 4, 2, 0, -1}; - static const int gfx8_modes[] = {8, 4, 2, 0, -1}; - static const int gfx7_modes[] = {8, 4, 0, -1}; - static const int gfx6_modes[] = {4, 0, -1}; - static const int gfx4_modes[] = {0, -1}; - - if (screen->devinfo.ver >= 9) { - return gfx9_modes; - } else if (screen->devinfo.ver >= 8) { - return gfx8_modes; - } else if (screen->devinfo.ver >= 7) { - return gfx7_modes; - } else if (screen->devinfo.ver == 6) { - return gfx6_modes; - } else { - return gfx4_modes; - } -} - -static unsigned -brw_loader_get_cap(const __DRIscreen *dri_screen, enum dri_loader_cap cap) -{ - if (dri_screen->dri2.loader && dri_screen->dri2.loader->base.version >= 4 && - dri_screen->dri2.loader->getCapability) - return dri_screen->dri2.loader->getCapability(dri_screen->loaderPrivate, cap); - - if (dri_screen->image.loader && dri_screen->image.loader->base.version >= 2 && - dri_screen->image.loader->getCapability) - return dri_screen->image.loader->getCapability(dri_screen->loaderPrivate, cap); - - return 0; -} - -static bool -brw_allowed_format(__DRIscreen *dri_screen, mesa_format format) -{ - struct brw_screen *screen = dri_screen->driverPrivate; - - /* Expose only BGRA ordering if the loader doesn't support RGBA ordering. */ - bool allow_rgba_ordering = brw_loader_get_cap(dri_screen, DRI_LOADER_CAP_RGBA_ORDERING); - if (!allow_rgba_ordering && - (format == MESA_FORMAT_R8G8B8A8_UNORM || - format == MESA_FORMAT_R8G8B8X8_UNORM || - format == MESA_FORMAT_R8G8B8A8_SRGB || - format == MESA_FORMAT_R8G8B8X8_SRGB)) - return false; - - /* Shall we expose 10 bpc formats? */ - bool allow_rgb10_configs = driQueryOptionb(&screen->optionCache, - "allow_rgb10_configs"); - if (!allow_rgb10_configs && - (format == MESA_FORMAT_B10G10R10A2_UNORM || - format == MESA_FORMAT_B10G10R10X2_UNORM)) - return false; - - /* Shall we expose 565 formats? */ - bool allow_rgb565_configs = driQueryOptionb(&screen->optionCache, - "allow_rgb565_configs"); - if (!allow_rgb565_configs && format == MESA_FORMAT_B5G6R5_UNORM) - return false; - - /* Shall we expose fp16 formats? */ - bool allow_fp16_configs = brw_loader_get_cap(dri_screen, DRI_LOADER_CAP_FP16); - if (!allow_fp16_configs && - (format == MESA_FORMAT_RGBA_FLOAT16 || - format == MESA_FORMAT_RGBX_FLOAT16)) - return false; - - return true; -} - -static __DRIconfig** -brw_screen_make_configs(__DRIscreen *dri_screen) -{ - static const mesa_format formats[] = { - MESA_FORMAT_B5G6R5_UNORM, - MESA_FORMAT_B8G8R8A8_UNORM, - MESA_FORMAT_B8G8R8X8_UNORM, - - MESA_FORMAT_B8G8R8A8_SRGB, - MESA_FORMAT_B8G8R8X8_SRGB, - - /* For 10 bpc, 30 bit depth framebuffers. */ - MESA_FORMAT_B10G10R10A2_UNORM, - MESA_FORMAT_B10G10R10X2_UNORM, - - MESA_FORMAT_RGBA_FLOAT16, - MESA_FORMAT_RGBX_FLOAT16, - - /* The 32-bit RGBA format must not precede the 32-bit BGRA format. - * Likewise for RGBX and BGRX. Otherwise, the GLX client and the GLX - * server may disagree on which format the GLXFBConfig represents, - * resulting in swapped color channels. - * - * The problem, as of 2017-05-30: - * When matching a GLXFBConfig to a __DRIconfig, GLX ignores the channel - * order and chooses the first __DRIconfig with the expected channel - * sizes. Specifically, GLX compares the GLXFBConfig's and __DRIconfig's - * __DRI_ATTRIB_{CHANNEL}_SIZE but ignores __DRI_ATTRIB_{CHANNEL}_MASK. - * - * EGL does not suffer from this problem. It correctly compares the - * channel masks when matching EGLConfig to __DRIconfig. - */ - - /* Required by Android, for HAL_PIXEL_FORMAT_RGBA_8888. */ - MESA_FORMAT_R8G8B8A8_UNORM, - MESA_FORMAT_R8G8B8A8_SRGB, - - /* Required by Android, for HAL_PIXEL_FORMAT_RGBX_8888. */ - MESA_FORMAT_R8G8B8X8_UNORM, - MESA_FORMAT_R8G8B8X8_SRGB, - }; - - /* __DRI_ATTRIB_SWAP_COPY is not supported due to page flipping. */ - static const GLenum back_buffer_modes[] = { - __DRI_ATTRIB_SWAP_UNDEFINED, __DRI_ATTRIB_SWAP_NONE - }; - - static const uint8_t singlesample_samples[1] = {0}; - - struct brw_screen *screen = dri_screen->driverPrivate; - const struct intel_device_info *devinfo = &screen->devinfo; - uint8_t depth_bits[4], stencil_bits[4]; - __DRIconfig **configs = NULL; - - unsigned num_formats = ARRAY_SIZE(formats); - - /* Generate singlesample configs, each without accumulation buffer - * and with EGL_MUTABLE_RENDER_BUFFER_BIT_KHR. - */ - for (unsigned i = 0; i < num_formats; i++) { - __DRIconfig **new_configs; - int num_depth_stencil_bits = 1; - - if (!brw_allowed_format(dri_screen, formats[i])) - continue; - - /* Starting with DRI2 protocol version 1.1 we can request a depth/stencil - * buffer that has a different number of bits per pixel than the color - * buffer, gen >= 6 supports this. - */ - depth_bits[0] = 0; - stencil_bits[0] = 0; - - if (formats[i] == MESA_FORMAT_B5G6R5_UNORM) { - if (devinfo->ver >= 8) { - depth_bits[num_depth_stencil_bits] = 16; - stencil_bits[num_depth_stencil_bits] = 0; - num_depth_stencil_bits++; - } - if (devinfo->ver >= 6) { - depth_bits[num_depth_stencil_bits] = 24; - stencil_bits[num_depth_stencil_bits] = 8; - num_depth_stencil_bits++; - } - } else { - depth_bits[num_depth_stencil_bits] = 24; - stencil_bits[num_depth_stencil_bits] = 8; - num_depth_stencil_bits++; - } - - new_configs = driCreateConfigs(formats[i], - depth_bits, - stencil_bits, - num_depth_stencil_bits, - back_buffer_modes, 2, - singlesample_samples, 1, - false, false); - configs = driConcatConfigs(configs, new_configs); - } - - /* Generate the minimum possible set of configs that include an - * accumulation buffer. - */ - for (unsigned i = 0; i < num_formats; i++) { - __DRIconfig **new_configs; - - if (!brw_allowed_format(dri_screen, formats[i])) - continue; - - if (formats[i] == MESA_FORMAT_B5G6R5_UNORM) { - if (devinfo->ver >= 8) { - depth_bits[0] = 16; - stencil_bits[0] = 0; - } else if (devinfo->ver >= 6) { - depth_bits[0] = 24; - stencil_bits[0] = 8; - } else { - depth_bits[0] = 0; - stencil_bits[0] = 0; - } - } else { - depth_bits[0] = 24; - stencil_bits[0] = 8; - } - - new_configs = driCreateConfigs(formats[i], - depth_bits, stencil_bits, 1, - back_buffer_modes, 1, - singlesample_samples, 1, - true, false); - configs = driConcatConfigs(configs, new_configs); - } - - /* Generate multisample configs. - * - * This loop breaks early, and hence is a no-op, on gen < 6. - * - * Multisample configs must follow the singlesample configs in order to - * work around an X server bug present in 1.12. The X server chooses to - * associate the first listed RGBA888-Z24S8 config, regardless of its - * sample count, with the 32-bit depth visual used for compositing. - * - * Only doublebuffer configs with GLX_SWAP_UNDEFINED_OML behavior are - * supported. Singlebuffer configs are not supported because no one wants - * them. - */ - for (unsigned i = 0; i < num_formats; i++) { - if (devinfo->ver < 6) - break; - - if (!brw_allowed_format(dri_screen, formats[i])) - continue; - - __DRIconfig **new_configs; - const int num_depth_stencil_bits = 2; - int num_msaa_modes = 0; - const uint8_t *multisample_samples = NULL; - - depth_bits[0] = 0; - stencil_bits[0] = 0; - - if (formats[i] == MESA_FORMAT_B5G6R5_UNORM && devinfo->ver >= 8) { - depth_bits[1] = 16; - stencil_bits[1] = 0; - } else { - depth_bits[1] = 24; - stencil_bits[1] = 8; - } - - if (devinfo->ver >= 9) { - static const uint8_t multisample_samples_gfx9[] = {2, 4, 8, 16}; - multisample_samples = multisample_samples_gfx9; - num_msaa_modes = ARRAY_SIZE(multisample_samples_gfx9); - } else if (devinfo->ver == 8) { - static const uint8_t multisample_samples_gfx8[] = {2, 4, 8}; - multisample_samples = multisample_samples_gfx8; - num_msaa_modes = ARRAY_SIZE(multisample_samples_gfx8); - } else if (devinfo->ver == 7) { - static const uint8_t multisample_samples_gfx7[] = {4, 8}; - multisample_samples = multisample_samples_gfx7; - num_msaa_modes = ARRAY_SIZE(multisample_samples_gfx7); - } else if (devinfo->ver == 6) { - static const uint8_t multisample_samples_gfx6[] = {4}; - multisample_samples = multisample_samples_gfx6; - num_msaa_modes = ARRAY_SIZE(multisample_samples_gfx6); - } - - new_configs = driCreateConfigs(formats[i], - depth_bits, - stencil_bits, - num_depth_stencil_bits, - back_buffer_modes, 1, - multisample_samples, - num_msaa_modes, - false, false); - configs = driConcatConfigs(configs, new_configs); - } - - if (configs == NULL) { - fprintf(stderr, "[%s:%u] Error creating FBConfig!\n", __func__, - __LINE__); - return NULL; - } - - return configs; -} - -static void -set_max_gl_versions(struct brw_screen *screen) -{ - __DRIscreen *dri_screen = screen->driScrnPriv; - const bool has_astc = screen->devinfo.ver >= 9; - - switch (screen->devinfo.ver) { - case 11: - case 10: - case 9: - case 8: - dri_screen->max_gl_core_version = 46; - dri_screen->max_gl_compat_version = 30; - dri_screen->max_gl_es1_version = 11; - dri_screen->max_gl_es2_version = has_astc ? 32 : 31; - break; - case 7: - dri_screen->max_gl_core_version = 33; - if (can_do_pipelined_register_writes(screen)) { - dri_screen->max_gl_core_version = 42; - if (screen->devinfo.platform == INTEL_PLATFORM_HSW && can_do_compute_dispatch(screen)) - dri_screen->max_gl_core_version = 43; - if (screen->devinfo.platform == INTEL_PLATFORM_HSW && can_do_mi_math_and_lrr(screen)) - dri_screen->max_gl_core_version = 45; - } - dri_screen->max_gl_compat_version = 30; - dri_screen->max_gl_es1_version = 11; - dri_screen->max_gl_es2_version = screen->devinfo.platform == INTEL_PLATFORM_HSW ? 31 : 30; - break; - case 6: - dri_screen->max_gl_core_version = 33; - dri_screen->max_gl_compat_version = 30; - dri_screen->max_gl_es1_version = 11; - dri_screen->max_gl_es2_version = 30; - break; - case 5: - case 4: - dri_screen->max_gl_core_version = 0; - dri_screen->max_gl_compat_version = 21; - dri_screen->max_gl_es1_version = 11; - dri_screen->max_gl_es2_version = 20; - break; - default: - unreachable("unrecognized brw_screen::gen"); - } - - /* OpenGL 3.3+ requires GL_ARB_blend_func_extended. Don't advertise those - * versions if driconf disables the extension. - */ - if (driQueryOptionb(&screen->optionCache, "disable_blend_func_extended")) { - dri_screen->max_gl_core_version = - MIN2(32, dri_screen->max_gl_core_version); - dri_screen->max_gl_compat_version = - MIN2(32, dri_screen->max_gl_compat_version); - } - - /* Using the `allow_higher_compat_version` option during context creation - * means that an application that doesn't request a specific version can be - * given a version higher than 3.0. However, an application still cannot - * request a higher version. For that to work, max_gl_compat_version must - * be set. - */ - if (dri_screen->max_gl_compat_version < dri_screen->max_gl_core_version) { - if (driQueryOptionb(&screen->optionCache, "allow_higher_compat_version")) - dri_screen->max_gl_compat_version = dri_screen->max_gl_core_version; - } -} - -static void -shader_debug_log_mesa(void *data, unsigned *msg_id, const char *fmt, ...) -{ - struct brw_context *brw = (struct brw_context *)data; - va_list args; - - va_start(args, fmt); - _mesa_gl_vdebugf(&brw->ctx, msg_id, - MESA_DEBUG_SOURCE_SHADER_COMPILER, - MESA_DEBUG_TYPE_OTHER, - MESA_DEBUG_SEVERITY_NOTIFICATION, fmt, args); - va_end(args); -} - -static void -shader_perf_log_mesa(void *data, unsigned *msg_id, const char *fmt, ...) -{ - struct brw_context *brw = (struct brw_context *)data; - - va_list args; - va_start(args, fmt); - - if (INTEL_DEBUG(DEBUG_PERF)) { - va_list args_copy; - va_copy(args_copy, args); - vfprintf(stderr, fmt, args_copy); - va_end(args_copy); - } - - if (brw->perf_debug) { - _mesa_gl_vdebugf(&brw->ctx, msg_id, - MESA_DEBUG_SOURCE_SHADER_COMPILER, - MESA_DEBUG_TYPE_PERFORMANCE, - MESA_DEBUG_SEVERITY_MEDIUM, fmt, args); - } - va_end(args); -} - -/** - * This is the driver specific part of the createNewScreen entry point. - * Called when using DRI2. - * - * \return the struct gl_config supported by this driver - */ -static const -__DRIconfig **brw_init_screen(__DRIscreen *dri_screen) -{ - struct brw_screen *screen; - - util_cpu_detect(); - - if (dri_screen->image.loader) { - } else if (dri_screen->dri2.loader->base.version <= 2 || - dri_screen->dri2.loader->getBuffersWithFormat == NULL) { - fprintf(stderr, - "\nERROR! DRI2 loader with getBuffersWithFormat() " - "support required\n"); - return NULL; - } - - /* Allocate the private area */ - screen = rzalloc(NULL, struct brw_screen); - if (!screen) { - fprintf(stderr, "\nERROR! Allocating private area failed\n"); - return NULL; - } - /* parse information in __driConfigOptions */ - driOptionCache options; - memset(&options, 0, sizeof(options)); - - driParseOptionInfo(&options, brw_driconf, ARRAY_SIZE(brw_driconf)); - driParseConfigFiles(&screen->optionCache, &options, dri_screen->myNum, - "i965", NULL, NULL, NULL, 0, NULL, 0); - driDestroyOptionCache(&options); - - screen->driScrnPriv = dri_screen; - dri_screen->driverPrivate = (void *) screen; - - if (!intel_get_device_info_from_fd(dri_screen->fd, &screen->devinfo)) - return NULL; - - const struct intel_device_info *devinfo = &screen->devinfo; - screen->deviceID = devinfo->chipset_id; - - if (devinfo->ver >= 12) { - fprintf(stderr, "gfx12 and newer are not supported on i965\n"); - return NULL; - } - - if (!brw_init_bufmgr(screen)) - return NULL; - - brw_process_intel_debug_variable(); - - if (INTEL_DEBUG(DEBUG_SHADER_TIME) && devinfo->ver < 7) { - fprintf(stderr, - "shader_time debugging requires gfx7 (Ivybridge) or better.\n"); - intel_debug &= ~DEBUG_SHADER_TIME; - } - - if (brw_get_integer(screen, I915_PARAM_MMAP_GTT_VERSION) >= 1) { - /* Theorectically unlimited! At least for individual objects... - * - * Currently the entire (global) address space for all GTT maps is - * limited to 64bits. That is all objects on the system that are - * setup for GTT mmapping must fit within 64bits. An attempt to use - * one that exceeds the limit with fail in brw_bo_map_gtt(). - * - * Long before we hit that limit, we will be practically limited by - * that any single object must fit in physical memory (RAM). The upper - * limit on the CPU's address space is currently 48bits (Skylake), of - * which only 39bits can be physical memory. (The GPU itself also has - * a 48bit addressable virtual space.) We can fit over 32 million - * objects of the current maximum allocable size before running out - * of mmap space. - */ - screen->max_gtt_map_object_size = UINT64_MAX; - } else { - /* Estimate the size of the mappable aperture into the GTT. There's an - * ioctl to get the whole GTT size, but not one to get the mappable subset. - * It turns out it's basically always 256MB, though some ancient hardware - * was smaller. - */ - uint32_t gtt_size = 256 * 1024 * 1024; - - /* We don't want to map two objects such that a memcpy between them would - * just fault one mapping in and then the other over and over forever. So - * we would need to divide the GTT size by 2. Additionally, some GTT is - * taken up by things like the framebuffer and the ringbuffer and such, so - * be more conservative. - */ - screen->max_gtt_map_object_size = gtt_size / 4; - } - - screen->aperture_threshold = devinfo->aperture_bytes * 3 / 4; - - screen->hw_has_timestamp = brw_detect_timestamp(screen); - - isl_device_init(&screen->isl_dev, &screen->devinfo); - - /* Gfx7-7.5 kernel requirements / command parser saga: - * - * - pre-v3.16: - * Haswell and Baytrail cannot use any privileged batchbuffer features. - * - * Ivybridge has aliasing PPGTT on by default, which accidentally marks - * all batches secure, allowing them to use any feature with no checking. - * This is effectively equivalent to a command parser version of - * \infinity - everything is possible. - * - * The command parser does not exist, and querying the version will - * return -EINVAL. - * - * - v3.16: - * The kernel enables the command parser by default, for systems with - * aliasing PPGTT enabled (Ivybridge and Haswell). However, the - * hardware checker is still enabled, so Haswell and Baytrail cannot - * do anything. - * - * Ivybridge goes from "everything is possible" to "only what the - * command parser allows" (if the user boots with i915.cmd_parser=0, - * then everything is possible again). We can only safely use features - * allowed by the supported command parser version. - * - * Annoyingly, I915_PARAM_CMD_PARSER_VERSION reports the static version - * implemented by the kernel, even if it's turned off. So, checking - * for version > 0 does not mean that you can write registers. We have - * to try it and see. The version does, however, indicate the age of - * the kernel. - * - * Instead of matching the hardware checker's behavior of converting - * privileged commands to MI_NOOP, it makes execbuf2 start returning - * -EINVAL, making it dangerous to try and use privileged features. - * - * Effective command parser versions: - * - Haswell: 0 (reporting 1, writes don't work) - * - Baytrail: 0 (reporting 1, writes don't work) - * - Ivybridge: 1 (enabled) or infinite (disabled) - * - * - v3.17: - * Baytrail aliasing PPGTT is enabled, making it like Ivybridge: - * effectively version 1 (enabled) or infinite (disabled). - * - * - v3.19: f1f55cc0556031c8ee3fe99dae7251e78b9b653b - * Command parser v2 supports predicate writes. - * - * - Haswell: 0 (reporting 1, writes don't work) - * - Baytrail: 2 (enabled) or infinite (disabled) - * - Ivybridge: 2 (enabled) or infinite (disabled) - * - * So version >= 2 is enough to know that Ivybridge and Baytrail - * will work. Haswell still can't do anything. - * - * - v4.0: Version 3 happened. Largely not relevant. - * - * - v4.1: 6702cf16e0ba8b0129f5aa1b6609d4e9c70bc13b - * L3 config registers are properly saved and restored as part - * of the hardware context. We can approximately detect this point - * in time by checking if I915_PARAM_REVISION is recognized - it - * landed in a later commit, but in the same release cycle. - * - * - v4.2: 245054a1fe33c06ad233e0d58a27ec7b64db9284 - * Command parser finally gains secure batch promotion. On Haswell, - * the hardware checker gets disabled, which finally allows it to do - * privileged commands. - * - * I915_PARAM_CMD_PARSER_VERSION reports 3. Effective versions: - * - Haswell: 3 (enabled) or 0 (disabled) - * - Baytrail: 3 (enabled) or infinite (disabled) - * - Ivybridge: 3 (enabled) or infinite (disabled) - * - * Unfortunately, detecting this point in time is tricky, because - * no version bump happened when this important change occurred. - * On Haswell, if we can write any register, then the kernel is at - * least this new, and we can start trusting the version number. - * - * - v4.4: 2bbe6bbb0dc94fd4ce287bdac9e1bd184e23057b and - * Command parser reaches version 4, allowing access to Haswell - * atomic scratch and chicken3 registers. If version >= 4, we know - * the kernel is new enough to support privileged features on all - * hardware. However, the user might have disabled it...and the - * kernel will still report version 4. So we still have to guess - * and check. - * - * - v4.4: 7b9748cb513a6bef4af87b79f0da3ff7e8b56cd8 - * Command parser v5 whitelists indirect compute shader dispatch - * registers, needed for OpenGL 4.3 and later. - * - * - v4.8: - * Command parser v7 lets us use MI_MATH on Haswell. - * - * Additionally, the kernel begins reporting version 0 when - * the command parser is disabled, allowing us to skip the - * guess-and-check step on Haswell. Unfortunately, this also - * means that we can no longer use it as an indicator of the - * age of the kernel. - */ - if (brw_get_param(screen, I915_PARAM_CMD_PARSER_VERSION, - &screen->cmd_parser_version) < 0) { - /* Command parser does not exist - getparam is unrecognized */ - screen->cmd_parser_version = 0; - } - - /* Kernel 4.13 retuired for exec object capture */ - if (brw_get_boolean(screen, I915_PARAM_HAS_EXEC_CAPTURE)) { - screen->kernel_features |= KERNEL_ALLOWS_EXEC_CAPTURE; - } - - if (brw_get_boolean(screen, I915_PARAM_HAS_EXEC_BATCH_FIRST)) { - screen->kernel_features |= KERNEL_ALLOWS_EXEC_BATCH_FIRST; - } - - if (!brw_detect_pipelined_so(screen)) { - /* We can't do anything, so the effective version is 0. */ - screen->cmd_parser_version = 0; - } else { - screen->kernel_features |= KERNEL_ALLOWS_SOL_OFFSET_WRITES; - } - - if (devinfo->ver >= 8 || screen->cmd_parser_version >= 2) - screen->kernel_features |= KERNEL_ALLOWS_PREDICATE_WRITES; - - /* Haswell requires command parser version 4 in order to have L3 - * atomic scratch1 and chicken3 bits - */ - if (devinfo->verx10 == 75 && screen->cmd_parser_version >= 4) { - screen->kernel_features |= - KERNEL_ALLOWS_HSW_SCRATCH1_AND_ROW_CHICKEN3; - } - - /* Haswell requires command parser version 6 in order to write to the - * MI_MATH GPR registers, and version 7 in order to use - * MI_LOAD_REGISTER_REG (which all users of MI_MATH use). - */ - if (devinfo->ver >= 8 || - (devinfo->verx10 == 75 && screen->cmd_parser_version >= 7)) { - screen->kernel_features |= KERNEL_ALLOWS_MI_MATH_AND_LRR; - } - - /* Gfx7 needs at least command parser version 5 to support compute */ - if (devinfo->ver >= 8 || screen->cmd_parser_version >= 5) - screen->kernel_features |= KERNEL_ALLOWS_COMPUTE_DISPATCH; - - if (brw_get_boolean(screen, I915_PARAM_HAS_CONTEXT_ISOLATION)) - screen->kernel_features |= KERNEL_ALLOWS_CONTEXT_ISOLATION; - - const char *force_msaa = getenv("INTEL_FORCE_MSAA"); - if (force_msaa) { - screen->winsys_msaa_samples_override = - brw_quantize_num_samples(screen, atoi(force_msaa)); - printf("Forcing winsys sample count to %d\n", - screen->winsys_msaa_samples_override); - } else { - screen->winsys_msaa_samples_override = -1; - } - - set_max_gl_versions(screen); - - /* Notification of GPU resets requires hardware contexts and a kernel new - * enough to support DRM_IOCTL_I915_GET_RESET_STATS. If the ioctl is - * supported, calling it with a context of 0 will either generate EPERM or - * no error. If the ioctl is not supported, it always generate EINVAL. - * Use this to determine whether to advertise the __DRI2_ROBUSTNESS - * extension to the loader. - * - * Don't even try on pre-Gfx6, since we don't attempt to use contexts there. - */ - if (devinfo->ver >= 6) { - struct drm_i915_reset_stats stats; - memset(&stats, 0, sizeof(stats)); - - const int ret = drmIoctl(screen->fd, DRM_IOCTL_I915_GET_RESET_STATS, &stats); - - screen->has_context_reset_notification = - (ret != -1 || errno != EINVAL); - } - - dri_screen->extensions = !screen->has_context_reset_notification - ? screenExtensions : brwRobustScreenExtensions; - - screen->compiler = brw_compiler_create(screen, devinfo); - screen->compiler->shader_debug_log = shader_debug_log_mesa; - screen->compiler->shader_perf_log = shader_perf_log_mesa; - - /* Changing the meaning of constant buffer pointers from a dynamic state - * offset to an absolute address is only safe if the kernel isolates other - * contexts from our changes. - */ - screen->compiler->constant_buffer_0_is_relative = devinfo->ver < 8 || - !(screen->kernel_features & KERNEL_ALLOWS_CONTEXT_ISOLATION); - - screen->compiler->glsl_compiler_options[MESA_SHADER_VERTEX].PositionAlwaysInvariant = driQueryOptionb(&screen->optionCache, "vs_position_always_invariant"); - screen->compiler->glsl_compiler_options[MESA_SHADER_TESS_EVAL].PositionAlwaysPrecise = driQueryOptionb(&screen->optionCache, "vs_position_always_precise"); - - screen->compiler->supports_pull_constants = true; - screen->compiler->compact_params = true; - screen->compiler->lower_variable_group_size = true; - - screen->has_exec_fence = - brw_get_boolean(screen, I915_PARAM_HAS_EXEC_FENCE); - - brw_screen_init_surface_formats(screen); - - if (INTEL_DEBUG(DEBUG_BATCH | DEBUG_SUBMIT)) { - unsigned int caps = brw_get_integer(screen, I915_PARAM_HAS_SCHEDULER); - if (caps) { - fprintf(stderr, "Kernel scheduler detected: %08x\n", caps); - if (caps & I915_SCHEDULER_CAP_PRIORITY) - fprintf(stderr, " - User priority sorting enabled\n"); - if (caps & I915_SCHEDULER_CAP_PREEMPTION) - fprintf(stderr, " - Preemption enabled\n"); - } - } - - brw_disk_cache_init(screen); - - return (const __DRIconfig**) brw_screen_make_configs(dri_screen); -} - -struct brw_buffer { - __DRIbuffer base; - struct brw_bo *bo; -}; - -static __DRIbuffer * -brw_allocate_buffer(__DRIscreen *dri_screen, - unsigned attachment, unsigned format, - int width, int height) -{ - struct brw_screen *screen = dri_screen->driverPrivate; - - assert(attachment == __DRI_BUFFER_FRONT_LEFT || - attachment == __DRI_BUFFER_BACK_LEFT); - - struct brw_buffer *buffer = calloc(1, sizeof *buffer); - if (buffer == NULL) - return NULL; - - /* The front and back buffers are color buffers, which are X tiled. GFX9+ - * supports Y tiled and compressed buffers, but there is no way to plumb that - * through to here. */ - uint32_t pitch; - int cpp = format / 8; - buffer->bo = brw_bo_alloc_tiled_2d(screen->bufmgr, - __func__, - width, - height, - cpp, - BRW_MEMZONE_OTHER, - I915_TILING_X, &pitch, - BO_ALLOC_BUSY); - - if (buffer->bo == NULL) { - free(buffer); - return NULL; - } - - brw_bo_flink(buffer->bo, &buffer->base.name); - - buffer->base.attachment = attachment; - buffer->base.cpp = cpp; - buffer->base.pitch = pitch; - - return &buffer->base; -} - -static void -brw_release_buffer(UNUSED __DRIscreen *dri_screen, __DRIbuffer *_buffer) -{ - struct brw_buffer *buffer = (struct brw_buffer *) _buffer; - - brw_bo_unreference(buffer->bo); - free(buffer); -} - -static const struct __DriverAPIRec brw_driver_api = { - .InitScreen = brw_init_screen, - .DestroyScreen = brw_destroy_screen, - .CreateContext = brw_create_context, - .DestroyContext = brw_destroy_context, - .CreateBuffer = brw_create_buffer, - .DestroyBuffer = brw_destroy_buffer, - .MakeCurrent = brw_make_current, - .UnbindContext = brw_unbind_context, - .AllocateBuffer = brw_allocate_buffer, - .ReleaseBuffer = brw_release_buffer -}; - -static const struct __DRIDriverVtableExtensionRec brw_vtable = { - .base = { __DRI_DRIVER_VTABLE, 1 }, - .vtable = &brw_driver_api, -}; - -static const __DRIextension *brw_driver_extensions[] = { - &driCoreExtension.base, - &driImageDriverExtension.base, - &driDRI2Extension.base, - &brw_vtable.base, - &brw_config_options.base, - NULL -}; - -PUBLIC const __DRIextension **__driDriverGetExtensions_i965(void) -{ - globalDriverAPI = &brw_driver_api; - - return brw_driver_extensions; -} diff --git a/src/mesa/drivers/dri/i965/brw_screen.h b/src/mesa/drivers/dri/i965/brw_screen.h deleted file mode 100644 index b68c2ac..0000000 --- a/src/mesa/drivers/dri/i965/brw_screen.h +++ /dev/null @@ -1,173 +0,0 @@ -/* - * Copyright 2003 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef _INTEL_INIT_H_ -#define _INTEL_INIT_H_ - -#include -#include - -#include - -#include "isl/isl.h" -#include "dri_util.h" -#include "brw_bufmgr.h" -#include "dev/intel_device_info.h" -#include "drm-uapi/i915_drm.h" -#include "util/xmlconfig.h" - -#include "isl/isl.h" - -#ifdef __cplusplus -extern "C" { -#endif - -struct brw_screen -{ - int deviceID; - struct intel_device_info devinfo; - - __DRIscreen *driScrnPriv; - - uint64_t max_gtt_map_object_size; - - /** Bytes of aperture usage beyond which execbuf is likely to fail. */ - uint64_t aperture_threshold; - - /** DRM fd associated with this screen. Not owned by this object. Do not close. */ - int fd; - - bool has_exec_fence; /**< I915_PARAM_HAS_EXEC_FENCE */ - - int hw_has_timestamp; - - struct isl_device isl_dev; - - /** - * Does the kernel support context reset notifications? - */ - bool has_context_reset_notification; - - /** - * Does the kernel support features such as pipelined register access to - * specific registers? - */ - unsigned kernel_features; -#define KERNEL_ALLOWS_SOL_OFFSET_WRITES (1<<0) -#define KERNEL_ALLOWS_PREDICATE_WRITES (1<<1) -#define KERNEL_ALLOWS_MI_MATH_AND_LRR (1<<2) -#define KERNEL_ALLOWS_HSW_SCRATCH1_AND_ROW_CHICKEN3 (1<<3) -#define KERNEL_ALLOWS_COMPUTE_DISPATCH (1<<4) -#define KERNEL_ALLOWS_EXEC_CAPTURE (1<<5) -#define KERNEL_ALLOWS_EXEC_BATCH_FIRST (1<<6) -#define KERNEL_ALLOWS_CONTEXT_ISOLATION (1<<7) - - struct brw_bufmgr *bufmgr; - - /** - * A unique ID for shader programs. - */ - unsigned program_id; - - int winsys_msaa_samples_override; - - struct brw_compiler *compiler; - - /** - * Configuration cache with default values for all contexts - */ - driOptionCache optionCache; - - /** - * Version of the command parser reported by the - * I915_PARAM_CMD_PARSER_VERSION parameter - */ - int cmd_parser_version; - - bool mesa_format_supports_texture[MESA_FORMAT_COUNT]; - bool mesa_format_supports_render[MESA_FORMAT_COUNT]; - enum isl_format mesa_to_isl_render_format[MESA_FORMAT_COUNT]; - - struct disk_cache *disk_cache; -}; - -extern void brw_destroy_context(__DRIcontext *driContextPriv); - -extern GLboolean brw_unbind_context(__DRIcontext *driContextPriv); - -PUBLIC const __DRIextension **__driDriverGetExtensions_i965(void); -extern const __DRI2fenceExtension brwFenceExtension; - -extern GLboolean -brw_make_current(__DRIcontext *driContextPriv, - __DRIdrawable *driDrawPriv, - __DRIdrawable *driReadPriv); - -double get_time(void); - -const int* -brw_supported_msaa_modes(const struct brw_screen *screen); - -static inline bool -can_do_pipelined_register_writes(const struct brw_screen *screen) -{ - return screen->kernel_features & KERNEL_ALLOWS_SOL_OFFSET_WRITES; -} - -static inline bool -can_do_hsw_l3_atomics(const struct brw_screen *screen) -{ - return screen->kernel_features & KERNEL_ALLOWS_HSW_SCRATCH1_AND_ROW_CHICKEN3; -} - -static inline bool -can_do_mi_math_and_lrr(const struct brw_screen *screen) -{ - return screen->kernel_features & KERNEL_ALLOWS_MI_MATH_AND_LRR; -} - -static inline bool -can_do_compute_dispatch(const struct brw_screen *screen) -{ - return screen->kernel_features & KERNEL_ALLOWS_COMPUTE_DISPATCH; -} - -static inline bool -can_do_predicate_writes(const struct brw_screen *screen) -{ - return screen->kernel_features & KERNEL_ALLOWS_PREDICATE_WRITES; -} - -static inline bool -can_do_exec_capture(const struct brw_screen *screen) -{ - return screen->kernel_features & KERNEL_ALLOWS_EXEC_CAPTURE; -} - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c deleted file mode 100644 index 8905033..0000000 --- a/src/mesa/drivers/dri/i965/brw_sf.c +++ /dev/null @@ -1,171 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - -#include "compiler/nir/nir.h" -#include "main/macros.h" -#include "main/mtypes.h" -#include "main/enums.h" -#include "main/fbobject.h" -#include "main/state.h" - -#include "brw_batch.h" - -#include "brw_defines.h" -#include "brw_context.h" -#include "brw_util.h" -#include "brw_state.h" -#include "compiler/brw_eu.h" - -#include "util/ralloc.h" - -static void -compile_sf_prog(struct brw_context *brw, struct brw_sf_prog_key *key) -{ - const unsigned *program; - void *mem_ctx; - unsigned program_size; - - mem_ctx = ralloc_context(NULL); - - struct brw_sf_prog_data prog_data; - program = brw_compile_sf(brw->screen->compiler, mem_ctx, key, &prog_data, - &brw->vue_map_geom_out, &program_size); - - brw_upload_cache(&brw->cache, BRW_CACHE_SF_PROG, - key, sizeof(*key), - program, program_size, - &prog_data, sizeof(prog_data), - &brw->sf.prog_offset, &brw->sf.prog_data); - ralloc_free(mem_ctx); -} - -/* Calculate interpolants for triangle and line rasterization. - */ -void -brw_upload_sf_prog(struct brw_context *brw) -{ - struct gl_context *ctx = &brw->ctx; - struct brw_sf_prog_key key; - - if (!brw_state_dirty(brw, - _NEW_BUFFERS | - _NEW_HINT | - _NEW_LIGHT | - _NEW_POINT | - _NEW_POLYGON | - _NEW_PROGRAM | - _NEW_TRANSFORM, - BRW_NEW_BLORP | - BRW_NEW_FS_PROG_DATA | - BRW_NEW_REDUCED_PRIMITIVE | - BRW_NEW_VUE_MAP_GEOM_OUT)) - return; - - /* _NEW_BUFFERS */ - bool flip_y = ctx->DrawBuffer->FlipY; - - memset(&key, 0, sizeof(key)); - - /* Populate the key, noting state dependencies: - */ - /* BRW_NEW_VUE_MAP_GEOM_OUT */ - key.attrs = brw->vue_map_geom_out.slots_valid; - - /* BRW_NEW_REDUCED_PRIMITIVE */ - switch (brw->reduced_primitive) { - case GL_TRIANGLES: - /* NOTE: We just use the edgeflag attribute as an indicator that - * unfilled triangles are active. We don't actually do the - * edgeflag testing here, it is already done in the clip - * program. - */ - if (key.attrs & BITFIELD64_BIT(VARYING_SLOT_EDGE)) - key.primitive = BRW_SF_PRIM_UNFILLED_TRIS; - else - key.primitive = BRW_SF_PRIM_TRIANGLES; - break; - case GL_LINES: - key.primitive = BRW_SF_PRIM_LINES; - break; - case GL_POINTS: - key.primitive = BRW_SF_PRIM_POINTS; - break; - } - - /* _NEW_TRANSFORM */ - key.userclip_active = (ctx->Transform.ClipPlanesEnabled != 0); - - /* _NEW_POINT */ - key.do_point_sprite = ctx->Point.PointSprite; - if (key.do_point_sprite) { - key.point_sprite_coord_replace = ctx->Point.CoordReplace & 0xff; - } - if (brw->programs[MESA_SHADER_FRAGMENT]->info.inputs_read & - BITFIELD64_BIT(VARYING_SLOT_PNTC)) { - key.do_point_coord = 1; - } - - /* - * Window coordinates in a FBO are inverted, which means point - * sprite origin must be inverted, too. - */ - if ((ctx->Point.SpriteOrigin == GL_LOWER_LEFT) == flip_y) - key.sprite_origin_lower_left = true; - - /* BRW_NEW_FS_PROG_DATA */ - const struct brw_wm_prog_data *wm_prog_data = - brw_wm_prog_data(brw->wm.base.prog_data); - if (wm_prog_data) { - key.contains_flat_varying = wm_prog_data->contains_flat_varying; - - STATIC_ASSERT(sizeof(key.interp_mode) == - sizeof(wm_prog_data->interp_mode)); - memcpy(key.interp_mode, wm_prog_data->interp_mode, - sizeof(key.interp_mode)); - } - - /* _NEW_LIGHT | _NEW_PROGRAM */ - key.do_twoside_color = _mesa_vertex_program_two_side_enabled(ctx); - - /* _NEW_POLYGON */ - if (key.do_twoside_color) { - /* If we're rendering to a FBO, we have to invert the polygon - * face orientation, just as we invert the viewport in - * sf_unit_create_from_key(). - */ - key.frontface_ccw = brw->polygon_front_bit != flip_y; - } - - if (!brw_search_cache(&brw->cache, BRW_CACHE_SF_PROG, &key, sizeof(key), - &brw->sf.prog_offset, &brw->sf.prog_data, true)) { - compile_sf_prog( brw, &key ); - } -} diff --git a/src/mesa/drivers/dri/i965/brw_state.c b/src/mesa/drivers/dri/i965/brw_state.c deleted file mode 100644 index 9901746..0000000 --- a/src/mesa/drivers/dri/i965/brw_state.c +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Copyright 2003 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "main/context.h" -#include "main/macros.h" -#include "main/enums.h" -#include "main/dd.h" - -#include "brw_screen.h" -#include "brw_context.h" -#include "brw_defines.h" - -int -brw_translate_shadow_compare_func(GLenum func) -{ - /* GL specifies the result of shadow comparisons as: - * 1 if ref texel, - * 0 otherwise. - * - * The hardware does: - * 0 if texel ref, - * 1 otherwise. - * - * So, these look a bit strange because there's both a negation - * and swapping of the arguments involved. - */ - switch (func) { - case GL_NEVER: - return BRW_COMPAREFUNCTION_ALWAYS; - case GL_LESS: - return BRW_COMPAREFUNCTION_LEQUAL; - case GL_LEQUAL: - return BRW_COMPAREFUNCTION_LESS; - case GL_GREATER: - return BRW_COMPAREFUNCTION_GEQUAL; - case GL_GEQUAL: - return BRW_COMPAREFUNCTION_GREATER; - case GL_NOTEQUAL: - return BRW_COMPAREFUNCTION_EQUAL; - case GL_EQUAL: - return BRW_COMPAREFUNCTION_NOTEQUAL; - case GL_ALWAYS: - return BRW_COMPAREFUNCTION_NEVER; - } - - unreachable("Invalid shadow comparison function."); -} - -int -brw_translate_compare_func(GLenum func) -{ - switch (func) { - case GL_NEVER: - return BRW_COMPAREFUNCTION_NEVER; - case GL_LESS: - return BRW_COMPAREFUNCTION_LESS; - case GL_LEQUAL: - return BRW_COMPAREFUNCTION_LEQUAL; - case GL_GREATER: - return BRW_COMPAREFUNCTION_GREATER; - case GL_GEQUAL: - return BRW_COMPAREFUNCTION_GEQUAL; - case GL_NOTEQUAL: - return BRW_COMPAREFUNCTION_NOTEQUAL; - case GL_EQUAL: - return BRW_COMPAREFUNCTION_EQUAL; - case GL_ALWAYS: - return BRW_COMPAREFUNCTION_ALWAYS; - } - - unreachable("Invalid comparison function."); -} - -int -brw_translate_stencil_op(GLenum op) -{ - switch (op) { - case GL_KEEP: - return BRW_STENCILOP_KEEP; - case GL_ZERO: - return BRW_STENCILOP_ZERO; - case GL_REPLACE: - return BRW_STENCILOP_REPLACE; - case GL_INCR: - return BRW_STENCILOP_INCRSAT; - case GL_DECR: - return BRW_STENCILOP_DECRSAT; - case GL_INCR_WRAP: - return BRW_STENCILOP_INCR; - case GL_DECR_WRAP: - return BRW_STENCILOP_DECR; - case GL_INVERT: - return BRW_STENCILOP_INVERT; - default: - return BRW_STENCILOP_ZERO; - } -} diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h deleted file mode 100644 index 17d9e54..0000000 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ /dev/null @@ -1,370 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - - -#ifndef BRW_STATE_H -#define BRW_STATE_H - -#include "brw_context.h" - -#ifdef __cplusplus -extern "C" { -#endif - -enum intel_msaa_layout; - -extern const struct brw_tracked_state brw_blend_constant_color; -extern const struct brw_tracked_state brw_clip_unit; -extern const struct brw_tracked_state brw_vs_pull_constants; -extern const struct brw_tracked_state brw_tcs_pull_constants; -extern const struct brw_tracked_state brw_tes_pull_constants; -extern const struct brw_tracked_state brw_gs_pull_constants; -extern const struct brw_tracked_state brw_wm_pull_constants; -extern const struct brw_tracked_state brw_cs_pull_constants; -extern const struct brw_tracked_state brw_constant_buffer; -extern const struct brw_tracked_state brw_curbe_offsets; -extern const struct brw_tracked_state brw_binding_table_pointers; -extern const struct brw_tracked_state brw_depthbuffer; -extern const struct brw_tracked_state brw_recalculate_urb_fence; -extern const struct brw_tracked_state brw_sf_vp; -extern const struct brw_tracked_state brw_cs_texture_surfaces; -extern const struct brw_tracked_state brw_vs_ubo_surfaces; -extern const struct brw_tracked_state brw_vs_image_surfaces; -extern const struct brw_tracked_state brw_tcs_ubo_surfaces; -extern const struct brw_tracked_state brw_tcs_image_surfaces; -extern const struct brw_tracked_state brw_tes_ubo_surfaces; -extern const struct brw_tracked_state brw_tes_image_surfaces; -extern const struct brw_tracked_state brw_gs_ubo_surfaces; -extern const struct brw_tracked_state brw_gs_image_surfaces; -extern const struct brw_tracked_state brw_renderbuffer_surfaces; -extern const struct brw_tracked_state brw_renderbuffer_read_surfaces; -extern const struct brw_tracked_state brw_texture_surfaces; -extern const struct brw_tracked_state brw_wm_binding_table; -extern const struct brw_tracked_state brw_gs_binding_table; -extern const struct brw_tracked_state brw_tes_binding_table; -extern const struct brw_tracked_state brw_tcs_binding_table; -extern const struct brw_tracked_state brw_vs_binding_table; -extern const struct brw_tracked_state brw_wm_ubo_surfaces; -extern const struct brw_tracked_state brw_wm_image_surfaces; -extern const struct brw_tracked_state brw_cs_ubo_surfaces; -extern const struct brw_tracked_state brw_cs_image_surfaces; - -extern const struct brw_tracked_state brw_psp_urb_cbs; - -extern const struct brw_tracked_state brw_indices; -extern const struct brw_tracked_state brw_index_buffer; -extern const struct brw_tracked_state gfx7_cs_push_constants; -extern const struct brw_tracked_state gfx6_binding_table_pointers; -extern const struct brw_tracked_state gfx6_gs_binding_table; -extern const struct brw_tracked_state gfx6_renderbuffer_surfaces; -extern const struct brw_tracked_state gfx6_sampler_state; -extern const struct brw_tracked_state gfx6_sol_surface; -extern const struct brw_tracked_state gfx6_sf_vp; -extern const struct brw_tracked_state gfx6_urb; -extern const struct brw_tracked_state gfx7_l3_state; -extern const struct brw_tracked_state gfx7_push_constant_space; -extern const struct brw_tracked_state gfx7_urb; -extern const struct brw_tracked_state gfx8_pma_fix; -extern const struct brw_tracked_state brw_cs_work_groups_surface; - -void gfx4_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags, - struct brw_bo *bo, uint32_t offset, - uint64_t imm); -void gfx45_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags, - struct brw_bo *bo, uint32_t offset, - uint64_t imm); -void gfx5_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags, - struct brw_bo *bo, uint32_t offset, - uint64_t imm); -void gfx6_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags, - struct brw_bo *bo, uint32_t offset, - uint64_t imm); -void gfx7_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags, - struct brw_bo *bo, uint32_t offset, - uint64_t imm); -void gfx75_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags, - struct brw_bo *bo, uint32_t offset, - uint64_t imm); -void gfx8_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags, - struct brw_bo *bo, uint32_t offset, - uint64_t imm); -void gfx9_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags, - struct brw_bo *bo, uint32_t offset, - uint64_t imm); -void gfx11_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags, - struct brw_bo *bo, uint32_t offset, - uint64_t imm); - -static inline bool -brw_state_dirty(const struct brw_context *brw, - GLuint mesa_flags, uint64_t brw_flags) -{ - return ((brw->NewGLState & mesa_flags) | - (brw->ctx.NewDriverState & brw_flags)) != 0; -} - -/* brw_binding_tables.c */ -void brw_upload_binding_table(struct brw_context *brw, - uint32_t packet_name, - const struct brw_stage_prog_data *prog_data, - struct brw_stage_state *stage_state); - -/* brw_misc_state.c */ -void brw_upload_invariant_state(struct brw_context *brw); -uint32_t -brw_depthbuffer_format(struct brw_context *brw); - -/* gfx8_depth_state.c */ -void gfx8_write_pma_stall_bits(struct brw_context *brw, - uint32_t pma_stall_bits); - -/* brw_disk_cache.c */ -void brw_disk_cache_init(struct brw_screen *screen); -bool brw_disk_cache_upload_program(struct brw_context *brw, - gl_shader_stage stage); -void brw_disk_cache_write_compute_program(struct brw_context *brw); -void brw_disk_cache_write_render_programs(struct brw_context *brw); - -/*********************************************************************** - * brw_state_upload.c - */ -void brw_upload_render_state(struct brw_context *brw); -void brw_render_state_finished(struct brw_context *brw); -void brw_upload_compute_state(struct brw_context *brw); -void brw_compute_state_finished(struct brw_context *brw); -void brw_init_state(struct brw_context *brw); -void brw_destroy_state(struct brw_context *brw); -void brw_emit_select_pipeline(struct brw_context *brw, - enum brw_pipeline pipeline); -void brw_enable_obj_preemption(struct brw_context *brw, bool enable); - -static inline void -brw_select_pipeline(struct brw_context *brw, enum brw_pipeline pipeline) -{ - if (unlikely(brw->last_pipeline != pipeline)) { - assert(pipeline < BRW_NUM_PIPELINES); - brw_emit_select_pipeline(brw, pipeline); - brw->last_pipeline = pipeline; - } -} - -/*********************************************************************** - * brw_program_cache.c - */ - -void brw_upload_cache(struct brw_cache *cache, - enum brw_cache_id cache_id, - const void *key, - GLuint key_sz, - const void *data, - GLuint data_sz, - const void *aux, - GLuint aux_sz, - uint32_t *out_offset, void *out_aux); - -bool brw_search_cache(struct brw_cache *cache, enum brw_cache_id cache_id, - const void *key, GLuint key_size, uint32_t *inout_offset, - void *inout_aux, bool flag_state); - -const void *brw_find_previous_compile(struct brw_cache *cache, - enum brw_cache_id cache_id, - unsigned program_string_id); - -void brw_program_cache_check_size(struct brw_context *brw); - -void brw_init_caches( struct brw_context *brw ); -void brw_destroy_caches( struct brw_context *brw ); - -void brw_print_program_cache(struct brw_context *brw); - -enum brw_cache_id brw_stage_cache_id(gl_shader_stage stage); - -/* brw_batch.c */ -void brw_require_statebuffer_space(struct brw_context *brw, int size); -void *brw_state_batch(struct brw_context *brw, - int size, int alignment, uint32_t *out_offset); - -/* brw_wm_surface_state.c */ -uint32_t brw_get_surface_tiling_bits(uint32_t tiling); -uint32_t brw_get_surface_num_multisamples(unsigned num_samples); -enum isl_format brw_isl_format_for_mesa_format(mesa_format mesa_format); - -GLuint translate_tex_target(GLenum target); - -enum isl_format translate_tex_format(struct brw_context *brw, - mesa_format mesa_format, - GLenum srgb_decode); - -int brw_get_texture_swizzle(const struct gl_context *ctx, - const struct gl_texture_object *t); - -void brw_emit_buffer_surface_state(struct brw_context *brw, - uint32_t *out_offset, - struct brw_bo *bo, - unsigned buffer_offset, - unsigned surface_format, - unsigned buffer_size, - unsigned pitch, - unsigned reloc_flags); - -/* brw_sampler_state.c */ -void brw_emit_sampler_state(struct brw_context *brw, - uint32_t *sampler_state, - uint32_t batch_offset_for_sampler_state, - unsigned min_filter, - unsigned mag_filter, - unsigned mip_filter, - unsigned max_anisotropy, - unsigned address_rounding, - unsigned wrap_s, - unsigned wrap_t, - unsigned wrap_r, - unsigned base_level, - unsigned min_lod, - unsigned max_lod, - int lod_bias, - unsigned shadow_function, - bool non_normalized_coordinates, - uint32_t border_color_offset); - -/* gfx6_constant_state.c */ -void -brw_populate_constant_data(struct brw_context *brw, - const struct gl_program *prog, - const struct brw_stage_state *stage_state, - void *dst, - const uint32_t *param, - unsigned nr_params); -void -brw_upload_pull_constants(struct brw_context *brw, - GLbitfield64 brw_new_constbuf, - const struct gl_program *prog, - struct brw_stage_state *stage_state, - const struct brw_stage_prog_data *prog_data); -void -brw_upload_cs_push_constants(struct brw_context *brw, - const struct gl_program *prog, - const struct brw_cs_prog_data *cs_prog_data, - struct brw_stage_state *stage_state); - -/* gfx7_vs_state.c */ -void -gfx7_upload_constant_state(struct brw_context *brw, - const struct brw_stage_state *stage_state, - bool active, unsigned opcode); - -/* brw_clip.c */ -void brw_upload_clip_prog(struct brw_context *brw); - -/* brw_sf.c */ -void brw_upload_sf_prog(struct brw_context *brw); - -bool brw_is_drawing_points(const struct brw_context *brw); -bool brw_is_drawing_lines(const struct brw_context *brw); - -/* gfx7_l3_state.c */ -void -gfx7_restore_default_l3_config(struct brw_context *brw); - -static inline bool -use_state_point_size(const struct brw_context *brw) -{ - const struct gl_context *ctx = &brw->ctx; - - /* Section 14.4 (Points) of the OpenGL 4.5 specification says: - * - * "If program point size mode is enabled, the derived point size is - * taken from the (potentially clipped) shader built-in gl_PointSize - * written by: - * - * * the geometry shader, if active; - * * the tessellation evaluation shader, if active and no - * geometry shader is active; - * * the vertex shader, otherwise - * - * and clamped to the implementation-dependent point size range. If - * the value written to gl_PointSize is less than or equal to zero, - * or if no value was written to gl_PointSize, results are undefined. - * If program point size mode is disabled, the derived point size is - * specified with the command - * - * void PointSize(float size); - * - * size specifies the requested size of a point. The default value - * is 1.0." - * - * The rules for GLES come from the ES 3.2, OES_geometry_point_size, and - * OES_tessellation_point_size specifications. To summarize: if the last - * stage before rasterization is a GS or TES, then use gl_PointSize from - * the shader if written. Otherwise, use 1.0. If the last stage is a - * vertex shader, use gl_PointSize, or it is undefined. - * - * We can combine these rules into a single condition for both APIs. - * Using the state point size when the last shader stage doesn't write - * gl_PointSize satisfies GL's requirements, as it's undefined. Because - * ES doesn't have a PointSize() command, the state point size will - * remain 1.0, satisfying the ES default value in the GS/TES case, and - * the VS case (1.0 works for "undefined"). Mesa sets the program point - * mode flag to always-enabled in ES, so we can safely check that, and - * it'll be ignored for ES. - * - * _NEW_PROGRAM | _NEW_POINT - * BRW_NEW_VUE_MAP_GEOM_OUT - */ - return (!ctx->VertexProgram.PointSizeEnabled && !ctx->Point._Attenuated) || - (brw->vue_map_geom_out.slots_valid & VARYING_BIT_PSIZ) == 0; -} - -void brw_copy_pipeline_atoms(struct brw_context *brw, - enum brw_pipeline pipeline, - const struct brw_tracked_state **atoms, - int num_atoms); -void gfx4_init_atoms(struct brw_context *brw); -void gfx45_init_atoms(struct brw_context *brw); -void gfx5_init_atoms(struct brw_context *brw); -void gfx6_init_atoms(struct brw_context *brw); -void gfx7_init_atoms(struct brw_context *brw); -void gfx75_init_atoms(struct brw_context *brw); -void gfx8_init_atoms(struct brw_context *brw); -void gfx9_init_atoms(struct brw_context *brw); -void gfx11_init_atoms(struct brw_context *brw); - -static inline uint32_t -brw_mocs(const struct isl_device *dev, struct brw_bo *bo) -{ - return isl_mocs(dev, 0, bo && bo->external); -} - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c deleted file mode 100644 index 7a6a8cd..0000000 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ /dev/null @@ -1,789 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - - - -#include "brw_context.h" -#include "brw_defines.h" -#include "brw_state.h" -#include "brw_program.h" -#include "drivers/common/meta.h" -#include "brw_batch.h" -#include "brw_buffers.h" -#include "brw_vs.h" -#include "brw_ff_gs.h" -#include "brw_gs.h" -#include "brw_wm.h" -#include "brw_cs.h" -#include "genxml/genX_bits.h" -#include "main/framebuffer.h" - -void -brw_enable_obj_preemption(struct brw_context *brw, bool enable) -{ - ASSERTED const struct intel_device_info *devinfo = &brw->screen->devinfo; - assert(devinfo->ver >= 9); - - if (enable == brw->object_preemption) - return; - - /* A fixed function pipe flush is required before modifying this field */ - brw_emit_end_of_pipe_sync(brw, PIPE_CONTROL_RENDER_TARGET_FLUSH); - - bool replay_mode = enable ? - GFX9_REPLAY_MODE_MIDOBJECT : GFX9_REPLAY_MODE_MIDBUFFER; - - /* enable object level preemption */ - brw_load_register_imm32(brw, CS_CHICKEN1, - replay_mode | GFX9_REPLAY_MODE_MASK); - - brw->object_preemption = enable; -} - -static void -brw_upload_gfx11_slice_hashing_state(struct brw_context *brw) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - int subslices_delta = - devinfo->ppipe_subslices[0] - devinfo->ppipe_subslices[1]; - if (subslices_delta == 0) - return; - - unsigned size = GFX11_SLICE_HASH_TABLE_length * 4; - uint32_t hash_address; - - uint32_t *map = brw_state_batch(brw, size, 64, &hash_address); - - unsigned idx = 0; - - unsigned sl_small = 0; - unsigned sl_big = 1; - if (subslices_delta > 0) { - sl_small = 1; - sl_big = 0; - } - - /** - * Create a 16x16 slice hashing table like the following one: - * - * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ] - * [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ] - * [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ] - * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ] - * [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ] - * [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ] - * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ] - * [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ] - * [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ] - * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ] - * [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ] - * [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ] - * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ] - * [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ] - * [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ] - * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ] - * - * The table above is used when the pixel pipe 0 has less subslices than - * pixel pipe 1. When pixel pipe 0 has more subslices, then a similar table - * with 0's and 1's inverted is used. - */ - for (int i = 0; i < GFX11_SLICE_HASH_TABLE_length; i++) { - uint32_t dw = 0; - - for (int j = 0; j < 8; j++) { - unsigned slice = idx++ % 3 ? sl_big : sl_small; - dw |= slice << (j * 4); - } - map[i] = dw; - } - - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_SLICE_TABLE_STATE_POINTERS << 16 | (2 - 2)); - OUT_RELOC(brw->batch.state.bo, 0, hash_address | 1); - ADVANCE_BATCH(); - - /* From gfx10/gfx11 workaround table in h/w specs: - * - * "On 3DSTATE_3D_MODE, driver must always program bits 31:16 of DW1 - * a value of 0xFFFF" - * - * This means that whenever we update a field with this instruction, we need - * to update all the others. - * - * Since this is the first time we emit this - * instruction, we are only setting the fSLICE_HASHING_TABLE_ENABLE flag, - * and leaving everything else at their default state (0). - */ - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_3D_MODE << 16 | (2 - 2)); - OUT_BATCH(0xffff0000 | SLICE_HASHING_TABLE_ENABLE); - ADVANCE_BATCH(); -} - -static void -brw_upload_initial_gpu_state(struct brw_context *brw) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - const struct brw_compiler *compiler = brw->screen->compiler; - - /* On platforms with hardware contexts, we can set our initial GPU state - * right away rather than doing it via state atoms. This saves a small - * amount of overhead on every draw call. - */ - if (!brw->hw_ctx) - return; - - if (devinfo->ver == 6) - brw_emit_post_sync_nonzero_flush(brw); - - brw_upload_invariant_state(brw); - - if (devinfo->ver == 11) { - /* The default behavior of bit 5 "Headerless Message for Pre-emptable - * Contexts" in SAMPLER MODE register is set to 0, which means - * headerless sampler messages are not allowed for pre-emptable - * contexts. Set the bit 5 to 1 to allow them. - */ - brw_load_register_imm32(brw, GFX11_SAMPLER_MODE, - HEADERLESS_MESSAGE_FOR_PREEMPTABLE_CONTEXTS_MASK | - HEADERLESS_MESSAGE_FOR_PREEMPTABLE_CONTEXTS); - - /* Bit 1 "Enabled Texel Offset Precision Fix" must be set in - * HALF_SLICE_CHICKEN7 register. - */ - brw_load_register_imm32(brw, HALF_SLICE_CHICKEN7, - TEXEL_OFFSET_FIX_MASK | - TEXEL_OFFSET_FIX_ENABLE); - - /* Wa_1406697149: Bit 9 "Error Detection Behavior Control" must be set - * in L3CNTLREG register. The default setting of the bit is not the - * desirable behavior. - */ - brw_load_register_imm32(brw, GFX8_L3CNTLREG, - GFX8_L3CNTLREG_EDBC_NO_HANG); - } - - /* hardware specification recommends disabling repacking for - * the compatibility with decompression mechanism in display controller. - */ - if (devinfo->disable_ccs_repack) { - brw_load_register_imm32(brw, GFX7_CACHE_MODE_0, - GFX11_DISABLE_REPACKING_FOR_COMPRESSION | - REG_MASK(GFX11_DISABLE_REPACKING_FOR_COMPRESSION)); - } - - if (devinfo->ver == 9) { - /* Recommended optimizations for Victim Cache eviction and floating - * point blending. - */ - brw_load_register_imm32(brw, GFX7_CACHE_MODE_1, - REG_MASK(GFX9_FLOAT_BLEND_OPTIMIZATION_ENABLE) | - REG_MASK(GFX9_MSC_RAW_HAZARD_AVOIDANCE_BIT) | - REG_MASK(GFX9_PARTIAL_RESOLVE_DISABLE_IN_VC) | - GFX9_FLOAT_BLEND_OPTIMIZATION_ENABLE | - GFX9_MSC_RAW_HAZARD_AVOIDANCE_BIT | - GFX9_PARTIAL_RESOLVE_DISABLE_IN_VC); - } - - if (devinfo->ver >= 8) { - gfx8_emit_3dstate_sample_pattern(brw); - - BEGIN_BATCH(5); - OUT_BATCH(_3DSTATE_WM_HZ_OP << 16 | (5 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_WM_CHROMAKEY << 16 | (2 - 2)); - OUT_BATCH(0); - ADVANCE_BATCH(); - } - - /* Set the "CONSTANT_BUFFER Address Offset Disable" bit, so - * 3DSTATE_CONSTANT_XS buffer 0 is an absolute address. - * - * This is only safe on kernels with context isolation support. - */ - if (!compiler->constant_buffer_0_is_relative) { - if (devinfo->ver >= 9) { - BEGIN_BATCH(3); - OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2)); - OUT_BATCH(CS_DEBUG_MODE2); - OUT_BATCH(REG_MASK(CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE) | - CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE); - ADVANCE_BATCH(); - } else if (devinfo->ver == 8) { - BEGIN_BATCH(3); - OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2)); - OUT_BATCH(INSTPM); - OUT_BATCH(REG_MASK(INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE) | - INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE); - ADVANCE_BATCH(); - } - } - - brw->object_preemption = false; - - if (devinfo->ver >= 10) - brw_enable_obj_preemption(brw, true); - - if (devinfo->ver == 11) - brw_upload_gfx11_slice_hashing_state(brw); -} - -static inline const struct brw_tracked_state * -brw_get_pipeline_atoms(struct brw_context *brw, - enum brw_pipeline pipeline) -{ - switch (pipeline) { - case BRW_RENDER_PIPELINE: - return brw->render_atoms; - case BRW_COMPUTE_PIPELINE: - return brw->compute_atoms; - default: - STATIC_ASSERT(BRW_NUM_PIPELINES == 2); - unreachable("Unsupported pipeline"); - return NULL; - } -} - -void -brw_copy_pipeline_atoms(struct brw_context *brw, - enum brw_pipeline pipeline, - const struct brw_tracked_state **atoms, - int num_atoms) -{ - /* This is to work around brw_context::atoms being declared const. We want - * it to be const, but it needs to be initialized somehow! - */ - struct brw_tracked_state *context_atoms = - (struct brw_tracked_state *) brw_get_pipeline_atoms(brw, pipeline); - - for (int i = 0; i < num_atoms; i++) { - context_atoms[i] = *atoms[i]; - assert(context_atoms[i].dirty.mesa | context_atoms[i].dirty.brw); - assert(context_atoms[i].emit); - } - - brw->num_atoms[pipeline] = num_atoms; -} - -void brw_init_state( struct brw_context *brw ) -{ - struct gl_context *ctx = &brw->ctx; - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - /* Force the first brw_select_pipeline to emit pipeline select */ - brw->last_pipeline = BRW_NUM_PIPELINES; - - brw_init_caches(brw); - - if (devinfo->ver >= 11) - gfx11_init_atoms(brw); - else if (devinfo->ver >= 10) - unreachable("Gfx10 support dropped."); - else if (devinfo->ver >= 9) - gfx9_init_atoms(brw); - else if (devinfo->ver >= 8) - gfx8_init_atoms(brw); - else if (devinfo->verx10 >= 75) - gfx75_init_atoms(brw); - else if (devinfo->ver >= 7) - gfx7_init_atoms(brw); - else if (devinfo->ver >= 6) - gfx6_init_atoms(brw); - else if (devinfo->ver >= 5) - gfx5_init_atoms(brw); - else if (devinfo->verx10 >= 45) - gfx45_init_atoms(brw); - else - gfx4_init_atoms(brw); - - brw_upload_initial_gpu_state(brw); - - brw->NewGLState = ~0; - brw->ctx.NewDriverState = ~0ull; - - /* ~0 is a nonsensical value which won't match anything we program, so - * the programming will take effect on the first time around. - */ - brw->pma_stall_bits = ~0; - - /* Make sure that brw->ctx.NewDriverState has enough bits to hold all possible - * dirty flags. - */ - STATIC_ASSERT(BRW_NUM_STATE_BITS <= 8 * sizeof(brw->ctx.NewDriverState)); - - ctx->DriverFlags.NewTransformFeedback = BRW_NEW_TRANSFORM_FEEDBACK; - ctx->DriverFlags.NewTransformFeedbackProg = BRW_NEW_TRANSFORM_FEEDBACK; - ctx->DriverFlags.NewRasterizerDiscard = BRW_NEW_RASTERIZER_DISCARD; - ctx->DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER; - ctx->DriverFlags.NewShaderStorageBuffer = BRW_NEW_UNIFORM_BUFFER; - ctx->DriverFlags.NewTextureBuffer = BRW_NEW_TEXTURE_BUFFER; - ctx->DriverFlags.NewAtomicBuffer = BRW_NEW_UNIFORM_BUFFER; - ctx->DriverFlags.NewImageUnits = BRW_NEW_IMAGE_UNITS; - ctx->DriverFlags.NewTessState = BRW_NEW_DEFAULT_TESS_LEVELS; - ctx->DriverFlags.NewIntelConservativeRasterization = BRW_NEW_CONSERVATIVE_RASTERIZATION; -} - - -void brw_destroy_state( struct brw_context *brw ) -{ - brw_destroy_caches(brw); -} - -/*********************************************************************** - */ - -static bool -check_state(const struct brw_state_flags *a, const struct brw_state_flags *b) -{ - return ((a->mesa & b->mesa) | (a->brw & b->brw)) != 0; -} - -static void -accumulate_state(struct brw_state_flags *a, const struct brw_state_flags *b) -{ - a->mesa |= b->mesa; - a->brw |= b->brw; -} - - -static void -xor_states(struct brw_state_flags *result, - const struct brw_state_flags *a, - const struct brw_state_flags *b) -{ - result->mesa = a->mesa ^ b->mesa; - result->brw = a->brw ^ b->brw; -} - -struct dirty_bit_map { - uint64_t bit; - char *name; - uint32_t count; -}; - -#define DEFINE_BIT(name) {name, #name, 0} - -static struct dirty_bit_map mesa_bits[] = { - DEFINE_BIT(_NEW_MODELVIEW), - DEFINE_BIT(_NEW_PROJECTION), - DEFINE_BIT(_NEW_TEXTURE_MATRIX), - DEFINE_BIT(_NEW_COLOR), - DEFINE_BIT(_NEW_DEPTH), - DEFINE_BIT(_NEW_FOG), - DEFINE_BIT(_NEW_HINT), - DEFINE_BIT(_NEW_LIGHT), - DEFINE_BIT(_NEW_LINE), - DEFINE_BIT(_NEW_PIXEL), - DEFINE_BIT(_NEW_POINT), - DEFINE_BIT(_NEW_POLYGON), - DEFINE_BIT(_NEW_POLYGONSTIPPLE), - DEFINE_BIT(_NEW_SCISSOR), - DEFINE_BIT(_NEW_STENCIL), - DEFINE_BIT(_NEW_TEXTURE_OBJECT), - DEFINE_BIT(_NEW_TRANSFORM), - DEFINE_BIT(_NEW_VIEWPORT), - DEFINE_BIT(_NEW_TEXTURE_STATE), - DEFINE_BIT(_NEW_RENDERMODE), - DEFINE_BIT(_NEW_BUFFERS), - DEFINE_BIT(_NEW_CURRENT_ATTRIB), - DEFINE_BIT(_NEW_MULTISAMPLE), - DEFINE_BIT(_NEW_TRACK_MATRIX), - DEFINE_BIT(_NEW_PROGRAM), - DEFINE_BIT(_NEW_PROGRAM_CONSTANTS), - DEFINE_BIT(_NEW_FRAG_CLAMP), - {0, 0, 0} -}; - -static struct dirty_bit_map brw_bits[] = { - DEFINE_BIT(BRW_NEW_FS_PROG_DATA), - DEFINE_BIT(BRW_NEW_BLORP_BLIT_PROG_DATA), - DEFINE_BIT(BRW_NEW_SF_PROG_DATA), - DEFINE_BIT(BRW_NEW_VS_PROG_DATA), - DEFINE_BIT(BRW_NEW_FF_GS_PROG_DATA), - DEFINE_BIT(BRW_NEW_GS_PROG_DATA), - DEFINE_BIT(BRW_NEW_TCS_PROG_DATA), - DEFINE_BIT(BRW_NEW_TES_PROG_DATA), - DEFINE_BIT(BRW_NEW_CLIP_PROG_DATA), - DEFINE_BIT(BRW_NEW_CS_PROG_DATA), - DEFINE_BIT(BRW_NEW_URB_FENCE), - DEFINE_BIT(BRW_NEW_FRAGMENT_PROGRAM), - DEFINE_BIT(BRW_NEW_GEOMETRY_PROGRAM), - DEFINE_BIT(BRW_NEW_TESS_PROGRAMS), - DEFINE_BIT(BRW_NEW_VERTEX_PROGRAM), - DEFINE_BIT(BRW_NEW_REDUCED_PRIMITIVE), - DEFINE_BIT(BRW_NEW_PATCH_PRIMITIVE), - DEFINE_BIT(BRW_NEW_PRIMITIVE), - DEFINE_BIT(BRW_NEW_CONTEXT), - DEFINE_BIT(BRW_NEW_PSP), - DEFINE_BIT(BRW_NEW_SURFACES), - DEFINE_BIT(BRW_NEW_BINDING_TABLE_POINTERS), - DEFINE_BIT(BRW_NEW_INDICES), - DEFINE_BIT(BRW_NEW_VERTICES), - DEFINE_BIT(BRW_NEW_DEFAULT_TESS_LEVELS), - DEFINE_BIT(BRW_NEW_BATCH), - DEFINE_BIT(BRW_NEW_INDEX_BUFFER), - DEFINE_BIT(BRW_NEW_VS_CONSTBUF), - DEFINE_BIT(BRW_NEW_TCS_CONSTBUF), - DEFINE_BIT(BRW_NEW_TES_CONSTBUF), - DEFINE_BIT(BRW_NEW_GS_CONSTBUF), - DEFINE_BIT(BRW_NEW_PROGRAM_CACHE), - DEFINE_BIT(BRW_NEW_STATE_BASE_ADDRESS), - DEFINE_BIT(BRW_NEW_VUE_MAP_GEOM_OUT), - DEFINE_BIT(BRW_NEW_TRANSFORM_FEEDBACK), - DEFINE_BIT(BRW_NEW_RASTERIZER_DISCARD), - DEFINE_BIT(BRW_NEW_STATS_WM), - DEFINE_BIT(BRW_NEW_UNIFORM_BUFFER), - DEFINE_BIT(BRW_NEW_IMAGE_UNITS), - DEFINE_BIT(BRW_NEW_META_IN_PROGRESS), - DEFINE_BIT(BRW_NEW_PUSH_CONSTANT_ALLOCATION), - DEFINE_BIT(BRW_NEW_NUM_SAMPLES), - DEFINE_BIT(BRW_NEW_TEXTURE_BUFFER), - DEFINE_BIT(BRW_NEW_GFX4_UNIT_STATE), - DEFINE_BIT(BRW_NEW_CC_VP), - DEFINE_BIT(BRW_NEW_SF_VP), - DEFINE_BIT(BRW_NEW_CLIP_VP), - DEFINE_BIT(BRW_NEW_SAMPLER_STATE_TABLE), - DEFINE_BIT(BRW_NEW_VS_ATTRIB_WORKAROUNDS), - DEFINE_BIT(BRW_NEW_COMPUTE_PROGRAM), - DEFINE_BIT(BRW_NEW_CS_WORK_GROUPS), - DEFINE_BIT(BRW_NEW_URB_SIZE), - DEFINE_BIT(BRW_NEW_CC_STATE), - DEFINE_BIT(BRW_NEW_BLORP), - DEFINE_BIT(BRW_NEW_VIEWPORT_COUNT), - DEFINE_BIT(BRW_NEW_CONSERVATIVE_RASTERIZATION), - DEFINE_BIT(BRW_NEW_DRAW_CALL), - DEFINE_BIT(BRW_NEW_AUX_STATE), - {0, 0, 0} -}; - -static void -brw_update_dirty_count(struct dirty_bit_map *bit_map, uint64_t bits) -{ - for (int i = 0; bit_map[i].bit != 0; i++) { - if (bit_map[i].bit & bits) - bit_map[i].count++; - } -} - -static void -brw_print_dirty_count(struct dirty_bit_map *bit_map) -{ - for (int i = 0; bit_map[i].bit != 0; i++) { - if (bit_map[i].count > 1) { - fprintf(stderr, "0x%016"PRIx64": %12d (%s)\n", - bit_map[i].bit, bit_map[i].count, bit_map[i].name); - } - } -} - -static inline void -brw_upload_tess_programs(struct brw_context *brw) -{ - if (brw->programs[MESA_SHADER_TESS_EVAL]) { - brw_upload_tcs_prog(brw); - brw_upload_tes_prog(brw); - } else { - brw->tcs.base.prog_data = NULL; - brw->tes.base.prog_data = NULL; - } -} - -static inline void -brw_upload_programs(struct brw_context *brw, - enum brw_pipeline pipeline) -{ - struct gl_context *ctx = &brw->ctx; - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - if (pipeline == BRW_RENDER_PIPELINE) { - brw_upload_vs_prog(brw); - brw_upload_tess_programs(brw); - - if (brw->programs[MESA_SHADER_GEOMETRY]) { - brw_upload_gs_prog(brw); - } else { - brw->gs.base.prog_data = NULL; - if (devinfo->ver < 7) - brw_upload_ff_gs_prog(brw); - } - - /* Update the VUE map for data exiting the GS stage of the pipeline. - * This comes from the last enabled shader stage. - */ - GLbitfield64 old_slots = brw->vue_map_geom_out.slots_valid; - bool old_separate = brw->vue_map_geom_out.separate; - struct brw_vue_prog_data *vue_prog_data; - if (brw->programs[MESA_SHADER_GEOMETRY]) - vue_prog_data = brw_vue_prog_data(brw->gs.base.prog_data); - else if (brw->programs[MESA_SHADER_TESS_EVAL]) - vue_prog_data = brw_vue_prog_data(brw->tes.base.prog_data); - else - vue_prog_data = brw_vue_prog_data(brw->vs.base.prog_data); - - brw->vue_map_geom_out = vue_prog_data->vue_map; - - /* If the layout has changed, signal BRW_NEW_VUE_MAP_GEOM_OUT. */ - if (old_slots != brw->vue_map_geom_out.slots_valid || - old_separate != brw->vue_map_geom_out.separate) - brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_GEOM_OUT; - - if ((old_slots ^ brw->vue_map_geom_out.slots_valid) & - VARYING_BIT_VIEWPORT) { - ctx->NewDriverState |= BRW_NEW_VIEWPORT_COUNT; - brw->clip.viewport_count = - (brw->vue_map_geom_out.slots_valid & VARYING_BIT_VIEWPORT) ? - ctx->Const.MaxViewports : 1; - } - - brw_upload_wm_prog(brw); - - if (devinfo->ver < 6) { - brw_upload_clip_prog(brw); - brw_upload_sf_prog(brw); - } - - brw_disk_cache_write_render_programs(brw); - } else if (pipeline == BRW_COMPUTE_PIPELINE) { - brw_upload_cs_prog(brw); - brw_disk_cache_write_compute_program(brw); - } -} - -static inline void -merge_ctx_state(struct brw_context *brw, - struct brw_state_flags *state) -{ - state->mesa |= brw->NewGLState; - state->brw |= brw->ctx.NewDriverState; -} - -static ALWAYS_INLINE void -check_and_emit_atom(struct brw_context *brw, - struct brw_state_flags *state, - const struct brw_tracked_state *atom) -{ - if (check_state(state, &atom->dirty)) { - atom->emit(brw); - merge_ctx_state(brw, state); - } -} - -static inline void -brw_upload_pipeline_state(struct brw_context *brw, - enum brw_pipeline pipeline) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - struct gl_context *ctx = &brw->ctx; - int i; - static int dirty_count = 0; - struct brw_state_flags state = brw->state.pipelines[pipeline]; - const unsigned fb_samples = - MAX2(_mesa_geometric_samples(ctx->DrawBuffer), 1); - - brw_select_pipeline(brw, pipeline); - - if (pipeline == BRW_RENDER_PIPELINE && brw->current_hash_scale != 1) - brw_emit_hashing_mode(brw, UINT_MAX, UINT_MAX, 1); - - if (INTEL_DEBUG(DEBUG_REEMIT)) { - /* Always re-emit all state. */ - brw->NewGLState = ~0; - ctx->NewDriverState = ~0ull; - } - - if (pipeline == BRW_RENDER_PIPELINE) { - if (brw->programs[MESA_SHADER_FRAGMENT] != - ctx->FragmentProgram._Current) { - brw->programs[MESA_SHADER_FRAGMENT] = ctx->FragmentProgram._Current; - brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM; - } - - if (brw->programs[MESA_SHADER_TESS_EVAL] != - ctx->TessEvalProgram._Current) { - brw->programs[MESA_SHADER_TESS_EVAL] = ctx->TessEvalProgram._Current; - brw->ctx.NewDriverState |= BRW_NEW_TESS_PROGRAMS; - } - - if (brw->programs[MESA_SHADER_TESS_CTRL] != - ctx->TessCtrlProgram._Current) { - brw->programs[MESA_SHADER_TESS_CTRL] = ctx->TessCtrlProgram._Current; - brw->ctx.NewDriverState |= BRW_NEW_TESS_PROGRAMS; - } - - if (brw->programs[MESA_SHADER_GEOMETRY] != - ctx->GeometryProgram._Current) { - brw->programs[MESA_SHADER_GEOMETRY] = ctx->GeometryProgram._Current; - brw->ctx.NewDriverState |= BRW_NEW_GEOMETRY_PROGRAM; - } - - if (brw->programs[MESA_SHADER_VERTEX] != ctx->VertexProgram._Current) { - brw->programs[MESA_SHADER_VERTEX] = ctx->VertexProgram._Current; - brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM; - } - } - - if (brw->programs[MESA_SHADER_COMPUTE] != ctx->ComputeProgram._Current) { - brw->programs[MESA_SHADER_COMPUTE] = ctx->ComputeProgram._Current; - brw->ctx.NewDriverState |= BRW_NEW_COMPUTE_PROGRAM; - } - - if (brw->meta_in_progress != _mesa_meta_in_progress(ctx)) { - brw->meta_in_progress = _mesa_meta_in_progress(ctx); - brw->ctx.NewDriverState |= BRW_NEW_META_IN_PROGRESS; - } - - if (brw->num_samples != fb_samples) { - brw->num_samples = fb_samples; - brw->ctx.NewDriverState |= BRW_NEW_NUM_SAMPLES; - } - - /* Exit early if no state is flagged as dirty */ - merge_ctx_state(brw, &state); - if ((state.mesa | state.brw) == 0) - return; - - /* Emit Sandybridge workaround flushes on every primitive, for safety. */ - if (devinfo->ver == 6) - brw_emit_post_sync_nonzero_flush(brw); - - brw_upload_programs(brw, pipeline); - merge_ctx_state(brw, &state); - - brw->vtbl.emit_state_base_address(brw); - - const struct brw_tracked_state *atoms = - brw_get_pipeline_atoms(brw, pipeline); - const int num_atoms = brw->num_atoms[pipeline]; - - if (INTEL_DEBUG(DEBUG_ANY)) { - /* Debug version which enforces various sanity checks on the - * state flags which are generated and checked to help ensure - * state atoms are ordered correctly in the list. - */ - struct brw_state_flags examined, prev; - memset(&examined, 0, sizeof(examined)); - prev = state; - - for (i = 0; i < num_atoms; i++) { - const struct brw_tracked_state *atom = &atoms[i]; - struct brw_state_flags generated; - - check_and_emit_atom(brw, &state, atom); - - accumulate_state(&examined, &atom->dirty); - - /* generated = (prev ^ state) - * if (examined & generated) - * fail; - */ - xor_states(&generated, &prev, &state); - assert(!check_state(&examined, &generated)); - prev = state; - } - } - else { - for (i = 0; i < num_atoms; i++) { - const struct brw_tracked_state *atom = &atoms[i]; - - check_and_emit_atom(brw, &state, atom); - } - } - - if (INTEL_DEBUG(DEBUG_STATE)) { - STATIC_ASSERT(ARRAY_SIZE(brw_bits) == BRW_NUM_STATE_BITS + 1); - - brw_update_dirty_count(mesa_bits, state.mesa); - brw_update_dirty_count(brw_bits, state.brw); - if (dirty_count++ % 1000 == 0) { - brw_print_dirty_count(mesa_bits); - brw_print_dirty_count(brw_bits); - fprintf(stderr, "\n"); - } - } -} - -/*********************************************************************** - * Emit all state: - */ -void brw_upload_render_state(struct brw_context *brw) -{ - brw_upload_pipeline_state(brw, BRW_RENDER_PIPELINE); -} - -static inline void -brw_pipeline_state_finished(struct brw_context *brw, - enum brw_pipeline pipeline) -{ - /* Save all dirty state into the other pipelines */ - for (unsigned i = 0; i < BRW_NUM_PIPELINES; i++) { - if (i != pipeline) { - brw->state.pipelines[i].mesa |= brw->NewGLState; - brw->state.pipelines[i].brw |= brw->ctx.NewDriverState; - } else { - memset(&brw->state.pipelines[i], 0, sizeof(struct brw_state_flags)); - } - } - - brw->NewGLState = 0; - brw->ctx.NewDriverState = 0ull; -} - -/** - * Clear dirty bits to account for the fact that the state emitted by - * brw_upload_render_state() has been committed to the hardware. This is a - * separate call from brw_upload_render_state() because it's possible that - * after the call to brw_upload_render_state(), we will discover that we've - * run out of aperture space, and need to rewind the batch buffer to the state - * it had before the brw_upload_render_state() call. - */ -void -brw_render_state_finished(struct brw_context *brw) -{ - brw_pipeline_state_finished(brw, BRW_RENDER_PIPELINE); -} - -void -brw_upload_compute_state(struct brw_context *brw) -{ - brw_upload_pipeline_state(brw, BRW_COMPUTE_PIPELINE); -} - -void -brw_compute_state_finished(struct brw_context *brw) -{ - brw_pipeline_state_finished(brw, BRW_COMPUTE_PIPELINE); -} diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h deleted file mode 100644 index c2d99be..0000000 --- a/src/mesa/drivers/dri/i965/brw_structs.h +++ /dev/null @@ -1,68 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - - -#ifndef BRW_STRUCTS_H -#define BRW_STRUCTS_H - -struct brw_urb_fence -{ - struct - { - unsigned length:8; - unsigned vs_realloc:1; - unsigned gs_realloc:1; - unsigned clp_realloc:1; - unsigned sf_realloc:1; - unsigned vfe_realloc:1; - unsigned cs_realloc:1; - unsigned pad:2; - unsigned opcode:16; - } header; - - struct - { - unsigned vs_fence:10; - unsigned gs_fence:10; - unsigned clp_fence:10; - unsigned pad:2; - } bits0; - - struct - { - unsigned sf_fence:10; - unsigned vf_fence:10; - unsigned cs_fence:11; - unsigned pad:1; - } bits1; -}; - -#endif diff --git a/src/mesa/drivers/dri/i965/brw_surface_formats.c b/src/mesa/drivers/dri/i965/brw_surface_formats.c deleted file mode 100644 index 235c15f..0000000 --- a/src/mesa/drivers/dri/i965/brw_surface_formats.c +++ /dev/null @@ -1,558 +0,0 @@ -/* - * Copyright © 2011 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ -#include "main/mtypes.h" - -#include "isl/isl.h" - -#include "brw_context.h" -#include "brw_state.h" -#include "brw_defines.h" - -enum isl_format -brw_isl_format_for_mesa_format(mesa_format mesa_format) -{ - /* This table is ordered according to the enum ordering in formats.h. We do - * expect that enum to be extended without our explicit initialization - * staying in sync, so we initialize to 0 even though - * ISL_FORMAT_R32G32B32A32_FLOAT happens to also be 0. - */ - static const enum isl_format table[MESA_FORMAT_COUNT] = { - [0 ... MESA_FORMAT_COUNT-1] = ISL_FORMAT_UNSUPPORTED, - - [MESA_FORMAT_R8G8B8A8_UNORM] = ISL_FORMAT_R8G8B8A8_UNORM, - [MESA_FORMAT_B8G8R8A8_UNORM] = ISL_FORMAT_B8G8R8A8_UNORM, - [MESA_FORMAT_R8G8B8X8_UNORM] = ISL_FORMAT_R8G8B8X8_UNORM, - [MESA_FORMAT_B8G8R8X8_UNORM] = ISL_FORMAT_B8G8R8X8_UNORM, - [MESA_FORMAT_RGB_UNORM8] = ISL_FORMAT_R8G8B8_UNORM, - [MESA_FORMAT_B5G6R5_UNORM] = ISL_FORMAT_B5G6R5_UNORM, - [MESA_FORMAT_B4G4R4A4_UNORM] = ISL_FORMAT_B4G4R4A4_UNORM, - [MESA_FORMAT_B5G5R5A1_UNORM] = ISL_FORMAT_B5G5R5A1_UNORM, - [MESA_FORMAT_LA_UNORM8] = ISL_FORMAT_L8A8_UNORM, - [MESA_FORMAT_LA_UNORM16] = ISL_FORMAT_L16A16_UNORM, - [MESA_FORMAT_A_UNORM8] = ISL_FORMAT_A8_UNORM, - [MESA_FORMAT_A_UNORM16] = ISL_FORMAT_A16_UNORM, - [MESA_FORMAT_L_UNORM8] = ISL_FORMAT_L8_UNORM, - [MESA_FORMAT_L_UNORM16] = ISL_FORMAT_L16_UNORM, - [MESA_FORMAT_I_UNORM8] = ISL_FORMAT_I8_UNORM, - [MESA_FORMAT_I_UNORM16] = ISL_FORMAT_I16_UNORM, - [MESA_FORMAT_YCBCR_REV] = ISL_FORMAT_YCRCB_NORMAL, - [MESA_FORMAT_YCBCR] = ISL_FORMAT_YCRCB_SWAPUVY, - [MESA_FORMAT_R_UNORM8] = ISL_FORMAT_R8_UNORM, - [MESA_FORMAT_RG_UNORM8] = ISL_FORMAT_R8G8_UNORM, - [MESA_FORMAT_R_UNORM16] = ISL_FORMAT_R16_UNORM, - [MESA_FORMAT_RG_UNORM16] = ISL_FORMAT_R16G16_UNORM, - [MESA_FORMAT_B10G10R10A2_UNORM] = ISL_FORMAT_B10G10R10A2_UNORM, - [MESA_FORMAT_S_UINT8] = ISL_FORMAT_R8_UINT, - - [MESA_FORMAT_B8G8R8A8_SRGB] = ISL_FORMAT_B8G8R8A8_UNORM_SRGB, - [MESA_FORMAT_R8G8B8A8_SRGB] = ISL_FORMAT_R8G8B8A8_UNORM_SRGB, - [MESA_FORMAT_B8G8R8X8_SRGB] = ISL_FORMAT_B8G8R8X8_UNORM_SRGB, - [MESA_FORMAT_R_SRGB8] = ISL_FORMAT_L8_UNORM_SRGB, - [MESA_FORMAT_L_SRGB8] = ISL_FORMAT_L8_UNORM_SRGB, - [MESA_FORMAT_LA_SRGB8] = ISL_FORMAT_L8A8_UNORM_SRGB, - [MESA_FORMAT_SRGB_DXT1] = ISL_FORMAT_BC1_UNORM_SRGB, - [MESA_FORMAT_SRGBA_DXT1] = ISL_FORMAT_BC1_UNORM_SRGB, - [MESA_FORMAT_SRGBA_DXT3] = ISL_FORMAT_BC2_UNORM_SRGB, - [MESA_FORMAT_SRGBA_DXT5] = ISL_FORMAT_BC3_UNORM_SRGB, - - [MESA_FORMAT_RGB_FXT1] = ISL_FORMAT_FXT1, - [MESA_FORMAT_RGBA_FXT1] = ISL_FORMAT_FXT1, - [MESA_FORMAT_RGB_DXT1] = ISL_FORMAT_BC1_UNORM, - [MESA_FORMAT_RGBA_DXT1] = ISL_FORMAT_BC1_UNORM, - [MESA_FORMAT_RGBA_DXT3] = ISL_FORMAT_BC2_UNORM, - [MESA_FORMAT_RGBA_DXT5] = ISL_FORMAT_BC3_UNORM, - - [MESA_FORMAT_RGBA_FLOAT32] = ISL_FORMAT_R32G32B32A32_FLOAT, - [MESA_FORMAT_RGBA_FLOAT16] = ISL_FORMAT_R16G16B16A16_FLOAT, - [MESA_FORMAT_RGB_FLOAT32] = ISL_FORMAT_R32G32B32_FLOAT, - [MESA_FORMAT_A_FLOAT32] = ISL_FORMAT_A32_FLOAT, - [MESA_FORMAT_A_FLOAT16] = ISL_FORMAT_A16_FLOAT, - [MESA_FORMAT_L_FLOAT32] = ISL_FORMAT_L32_FLOAT, - [MESA_FORMAT_L_FLOAT16] = ISL_FORMAT_L16_FLOAT, - [MESA_FORMAT_LA_FLOAT32] = ISL_FORMAT_L32A32_FLOAT, - [MESA_FORMAT_LA_FLOAT16] = ISL_FORMAT_L16A16_FLOAT, - [MESA_FORMAT_I_FLOAT32] = ISL_FORMAT_I32_FLOAT, - [MESA_FORMAT_I_FLOAT16] = ISL_FORMAT_I16_FLOAT, - [MESA_FORMAT_R_FLOAT32] = ISL_FORMAT_R32_FLOAT, - [MESA_FORMAT_R_FLOAT16] = ISL_FORMAT_R16_FLOAT, - [MESA_FORMAT_RG_FLOAT32] = ISL_FORMAT_R32G32_FLOAT, - [MESA_FORMAT_RG_FLOAT16] = ISL_FORMAT_R16G16_FLOAT, - - [MESA_FORMAT_R_SINT8] = ISL_FORMAT_R8_SINT, - [MESA_FORMAT_RG_SINT8] = ISL_FORMAT_R8G8_SINT, - [MESA_FORMAT_RGB_SINT8] = ISL_FORMAT_R8G8B8_SINT, - [MESA_FORMAT_RGBA_SINT8] = ISL_FORMAT_R8G8B8A8_SINT, - [MESA_FORMAT_R_SINT16] = ISL_FORMAT_R16_SINT, - [MESA_FORMAT_RG_SINT16] = ISL_FORMAT_R16G16_SINT, - [MESA_FORMAT_RGB_SINT16] = ISL_FORMAT_R16G16B16_SINT, - [MESA_FORMAT_RGBA_SINT16] = ISL_FORMAT_R16G16B16A16_SINT, - [MESA_FORMAT_R_SINT32] = ISL_FORMAT_R32_SINT, - [MESA_FORMAT_RG_SINT32] = ISL_FORMAT_R32G32_SINT, - [MESA_FORMAT_RGB_SINT32] = ISL_FORMAT_R32G32B32_SINT, - [MESA_FORMAT_RGBA_SINT32] = ISL_FORMAT_R32G32B32A32_SINT, - - [MESA_FORMAT_R_UINT8] = ISL_FORMAT_R8_UINT, - [MESA_FORMAT_RG_UINT8] = ISL_FORMAT_R8G8_UINT, - [MESA_FORMAT_RGB_UINT8] = ISL_FORMAT_R8G8B8_UINT, - [MESA_FORMAT_RGBA_UINT8] = ISL_FORMAT_R8G8B8A8_UINT, - [MESA_FORMAT_R_UINT16] = ISL_FORMAT_R16_UINT, - [MESA_FORMAT_RG_UINT16] = ISL_FORMAT_R16G16_UINT, - [MESA_FORMAT_RGB_UINT16] = ISL_FORMAT_R16G16B16_UINT, - [MESA_FORMAT_RGBA_UINT16] = ISL_FORMAT_R16G16B16A16_UINT, - [MESA_FORMAT_R_UINT32] = ISL_FORMAT_R32_UINT, - [MESA_FORMAT_RG_UINT32] = ISL_FORMAT_R32G32_UINT, - [MESA_FORMAT_RGB_UINT32] = ISL_FORMAT_R32G32B32_UINT, - [MESA_FORMAT_RGBA_UINT32] = ISL_FORMAT_R32G32B32A32_UINT, - - [MESA_FORMAT_R_SNORM8] = ISL_FORMAT_R8_SNORM, - [MESA_FORMAT_RG_SNORM8] = ISL_FORMAT_R8G8_SNORM, - [MESA_FORMAT_R8G8B8A8_SNORM] = ISL_FORMAT_R8G8B8A8_SNORM, - [MESA_FORMAT_R_SNORM16] = ISL_FORMAT_R16_SNORM, - [MESA_FORMAT_RG_SNORM16] = ISL_FORMAT_R16G16_SNORM, - [MESA_FORMAT_RGB_SNORM16] = ISL_FORMAT_R16G16B16_SNORM, - [MESA_FORMAT_RGBA_SNORM16] = ISL_FORMAT_R16G16B16A16_SNORM, - [MESA_FORMAT_RGBA_UNORM16] = ISL_FORMAT_R16G16B16A16_UNORM, - - [MESA_FORMAT_R_RGTC1_UNORM] = ISL_FORMAT_BC4_UNORM, - [MESA_FORMAT_R_RGTC1_SNORM] = ISL_FORMAT_BC4_SNORM, - [MESA_FORMAT_RG_RGTC2_UNORM] = ISL_FORMAT_BC5_UNORM, - [MESA_FORMAT_RG_RGTC2_SNORM] = ISL_FORMAT_BC5_SNORM, - - [MESA_FORMAT_ETC1_RGB8] = ISL_FORMAT_ETC1_RGB8, - [MESA_FORMAT_ETC2_RGB8] = ISL_FORMAT_ETC2_RGB8, - [MESA_FORMAT_ETC2_SRGB8] = ISL_FORMAT_ETC2_SRGB8, - [MESA_FORMAT_ETC2_RGBA8_EAC] = ISL_FORMAT_ETC2_EAC_RGBA8, - [MESA_FORMAT_ETC2_SRGB8_ALPHA8_EAC] = ISL_FORMAT_ETC2_EAC_SRGB8_A8, - [MESA_FORMAT_ETC2_R11_EAC] = ISL_FORMAT_EAC_R11, - [MESA_FORMAT_ETC2_RG11_EAC] = ISL_FORMAT_EAC_RG11, - [MESA_FORMAT_ETC2_SIGNED_R11_EAC] = ISL_FORMAT_EAC_SIGNED_R11, - [MESA_FORMAT_ETC2_SIGNED_RG11_EAC] = ISL_FORMAT_EAC_SIGNED_RG11, - [MESA_FORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1] = ISL_FORMAT_ETC2_RGB8_PTA, - [MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1] = ISL_FORMAT_ETC2_SRGB8_PTA, - - [MESA_FORMAT_BPTC_RGBA_UNORM] = ISL_FORMAT_BC7_UNORM, - [MESA_FORMAT_BPTC_SRGB_ALPHA_UNORM] = ISL_FORMAT_BC7_UNORM_SRGB, - [MESA_FORMAT_BPTC_RGB_SIGNED_FLOAT] = ISL_FORMAT_BC6H_SF16, - [MESA_FORMAT_BPTC_RGB_UNSIGNED_FLOAT] = ISL_FORMAT_BC6H_UF16, - - [MESA_FORMAT_RGBA_ASTC_4x4] = ISL_FORMAT_ASTC_LDR_2D_4X4_FLT16, - [MESA_FORMAT_RGBA_ASTC_5x4] = ISL_FORMAT_ASTC_LDR_2D_5X4_FLT16, - [MESA_FORMAT_RGBA_ASTC_5x5] = ISL_FORMAT_ASTC_LDR_2D_5X5_FLT16, - [MESA_FORMAT_RGBA_ASTC_6x5] = ISL_FORMAT_ASTC_LDR_2D_6X5_FLT16, - [MESA_FORMAT_RGBA_ASTC_6x6] = ISL_FORMAT_ASTC_LDR_2D_6X6_FLT16, - [MESA_FORMAT_RGBA_ASTC_8x5] = ISL_FORMAT_ASTC_LDR_2D_8X5_FLT16, - [MESA_FORMAT_RGBA_ASTC_8x6] = ISL_FORMAT_ASTC_LDR_2D_8X6_FLT16, - [MESA_FORMAT_RGBA_ASTC_8x8] = ISL_FORMAT_ASTC_LDR_2D_8X8_FLT16, - [MESA_FORMAT_RGBA_ASTC_10x5] = ISL_FORMAT_ASTC_LDR_2D_10X5_FLT16, - [MESA_FORMAT_RGBA_ASTC_10x6] = ISL_FORMAT_ASTC_LDR_2D_10X6_FLT16, - [MESA_FORMAT_RGBA_ASTC_10x8] = ISL_FORMAT_ASTC_LDR_2D_10X8_FLT16, - [MESA_FORMAT_RGBA_ASTC_10x10] = ISL_FORMAT_ASTC_LDR_2D_10X10_FLT16, - [MESA_FORMAT_RGBA_ASTC_12x10] = ISL_FORMAT_ASTC_LDR_2D_12X10_FLT16, - [MESA_FORMAT_RGBA_ASTC_12x12] = ISL_FORMAT_ASTC_LDR_2D_12X12_FLT16, - [MESA_FORMAT_SRGB8_ALPHA8_ASTC_4x4] = ISL_FORMAT_ASTC_LDR_2D_4X4_U8SRGB, - [MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x4] = ISL_FORMAT_ASTC_LDR_2D_5X4_U8SRGB, - [MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5] = ISL_FORMAT_ASTC_LDR_2D_5X5_U8SRGB, - [MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x5] = ISL_FORMAT_ASTC_LDR_2D_6X5_U8SRGB, - [MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x6] = ISL_FORMAT_ASTC_LDR_2D_6X6_U8SRGB, - [MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x5] = ISL_FORMAT_ASTC_LDR_2D_8X5_U8SRGB, - [MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x6] = ISL_FORMAT_ASTC_LDR_2D_8X6_U8SRGB, - [MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x8] = ISL_FORMAT_ASTC_LDR_2D_8X8_U8SRGB, - [MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x5] = ISL_FORMAT_ASTC_LDR_2D_10X5_U8SRGB, - [MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x6] = ISL_FORMAT_ASTC_LDR_2D_10X6_U8SRGB, - [MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x8] = ISL_FORMAT_ASTC_LDR_2D_10X8_U8SRGB, - [MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x10] = ISL_FORMAT_ASTC_LDR_2D_10X10_U8SRGB, - [MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x10] = ISL_FORMAT_ASTC_LDR_2D_12X10_U8SRGB, - [MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x12] = ISL_FORMAT_ASTC_LDR_2D_12X12_U8SRGB, - - [MESA_FORMAT_R9G9B9E5_FLOAT] = ISL_FORMAT_R9G9B9E5_SHAREDEXP, - [MESA_FORMAT_R11G11B10_FLOAT] = ISL_FORMAT_R11G11B10_FLOAT, - - [MESA_FORMAT_R10G10B10A2_UNORM] = ISL_FORMAT_R10G10B10A2_UNORM, - [MESA_FORMAT_B10G10R10A2_UINT] = ISL_FORMAT_B10G10R10A2_UINT, - [MESA_FORMAT_R10G10B10A2_UINT] = ISL_FORMAT_R10G10B10A2_UINT, - - [MESA_FORMAT_B5G5R5X1_UNORM] = ISL_FORMAT_B5G5R5X1_UNORM, - [MESA_FORMAT_R8G8B8X8_SRGB] = ISL_FORMAT_R8G8B8X8_UNORM_SRGB, - [MESA_FORMAT_B10G10R10X2_UNORM] = ISL_FORMAT_B10G10R10X2_UNORM, - [MESA_FORMAT_RGBX_UNORM16] = ISL_FORMAT_R16G16B16X16_UNORM, - [MESA_FORMAT_RGBX_FLOAT16] = ISL_FORMAT_R16G16B16X16_FLOAT, - [MESA_FORMAT_RGBX_FLOAT32] = ISL_FORMAT_R32G32B32X32_FLOAT, - }; - - assert(mesa_format < MESA_FORMAT_COUNT); - return table[mesa_format]; -} - -void -brw_screen_init_surface_formats(struct brw_screen *screen) -{ - const struct intel_device_info *devinfo = &screen->devinfo; - mesa_format format; - - memset(&screen->mesa_format_supports_texture, 0, - sizeof(screen->mesa_format_supports_texture)); - - for (format = MESA_FORMAT_NONE + 1; format < MESA_FORMAT_COUNT; format++) { - if (!_mesa_get_format_name(format)) - continue; - enum isl_format texture, render; - bool is_integer = _mesa_is_format_integer_color(format); - - render = texture = brw_isl_format_for_mesa_format(format); - - /* Only exposed with EXT_memory_object_* support which - * is not for older gens. - */ - if (devinfo->ver < 7 && format == MESA_FORMAT_Z_UNORM16) - continue; - - if (texture == ISL_FORMAT_UNSUPPORTED) - continue; - - /* Don't advertise 8 and 16-bit RGB formats to core mesa. This ensures - * that they are renderable from an API perspective since core mesa will - * fall back to RGBA or RGBX (we can't render to non-power-of-two - * formats). For 8-bit, formats, this also keeps us from hitting some - * nasty corners in brw_miptree_map_blit if you ever try to map one. - */ - int format_size = _mesa_get_format_bytes(format); - if (format_size == 3 || format_size == 6) - continue; - - if (isl_format_supports_sampling(devinfo, texture) && - (isl_format_supports_filtering(devinfo, texture) || is_integer)) - screen->mesa_format_supports_texture[format] = true; - - /* Re-map some render target formats to make them supported when they - * wouldn't be using their format for texturing. - */ - switch (render) { - /* For these formats, we just need to read/write the first - * channel into R, which is to say that we just treat them as - * GL_RED. - */ - case ISL_FORMAT_I32_FLOAT: - case ISL_FORMAT_L32_FLOAT: - render = ISL_FORMAT_R32_FLOAT; - break; - case ISL_FORMAT_I16_FLOAT: - case ISL_FORMAT_L16_FLOAT: - render = ISL_FORMAT_R16_FLOAT; - break; - case ISL_FORMAT_I8_UNORM: - case ISL_FORMAT_L8_UNORM: - render = ISL_FORMAT_R8_UNORM; - break; - case ISL_FORMAT_I16_UNORM: - case ISL_FORMAT_L16_UNORM: - render = ISL_FORMAT_R16_UNORM; - break; - case ISL_FORMAT_R16G16B16X16_UNORM: - render = ISL_FORMAT_R16G16B16A16_UNORM; - break; - case ISL_FORMAT_R16G16B16X16_FLOAT: - render = ISL_FORMAT_R16G16B16A16_FLOAT; - break; - case ISL_FORMAT_B8G8R8X8_UNORM: - /* XRGB is handled as ARGB because the chips in this family - * cannot render to XRGB targets. This means that we have to - * mask writes to alpha (ala glColorMask) and reconfigure the - * alpha blending hardware to use GL_ONE (or GL_ZERO) for - * cases where GL_DST_ALPHA (or GL_ONE_MINUS_DST_ALPHA) is - * used. On Gfx8+ BGRX is actually allowed (but not RGBX). - */ - if (!isl_format_supports_rendering(devinfo, texture)) - render = ISL_FORMAT_B8G8R8A8_UNORM; - break; - case ISL_FORMAT_B8G8R8X8_UNORM_SRGB: - if (!isl_format_supports_rendering(devinfo, texture)) - render = ISL_FORMAT_B8G8R8A8_UNORM_SRGB; - break; - case ISL_FORMAT_R8G8B8X8_UNORM: - render = ISL_FORMAT_R8G8B8A8_UNORM; - break; - case ISL_FORMAT_R8G8B8X8_UNORM_SRGB: - render = ISL_FORMAT_R8G8B8A8_UNORM_SRGB; - break; - default: - break; - } - - /* Note that GL_EXT_texture_integer says that blending doesn't occur for - * integer, so we don't need hardware support for blending on it. Other - * than that, GL in general requires alpha blending for render targets, - * even though we don't support it for some formats. - */ - if (isl_format_supports_rendering(devinfo, render) && - (isl_format_supports_alpha_blending(devinfo, render) || is_integer)) { - screen->mesa_to_isl_render_format[format] = render; - screen->mesa_format_supports_render[format] = true; - } - } - - /* We will check this table for FBO completeness, but the surface format - * table above only covered color rendering. - */ - screen->mesa_format_supports_render[MESA_FORMAT_Z24_UNORM_S8_UINT] = true; - screen->mesa_format_supports_render[MESA_FORMAT_Z24_UNORM_X8_UINT] = true; - screen->mesa_format_supports_render[MESA_FORMAT_S_UINT8] = true; - screen->mesa_format_supports_render[MESA_FORMAT_Z_FLOAT32] = true; - screen->mesa_format_supports_render[MESA_FORMAT_Z32_FLOAT_S8X24_UINT] = true; - if (devinfo->ver >= 8) - screen->mesa_format_supports_render[MESA_FORMAT_Z_UNORM16] = true; - - /* We remap depth formats to a supported texturing format in - * translate_tex_format(). - */ - screen->mesa_format_supports_texture[MESA_FORMAT_Z24_UNORM_S8_UINT] = true; - screen->mesa_format_supports_texture[MESA_FORMAT_Z24_UNORM_X8_UINT] = true; - screen->mesa_format_supports_texture[MESA_FORMAT_Z_FLOAT32] = true; - screen->mesa_format_supports_texture[MESA_FORMAT_Z32_FLOAT_S8X24_UINT] = true; - screen->mesa_format_supports_texture[MESA_FORMAT_S_UINT8] = true; - - /* Benchmarking shows that Z16 is slower than Z24, so there's no reason to - * use it unless you're under memory (not memory bandwidth) pressure. - * - * Apparently, the GPU's depth scoreboarding works on a 32-bit granularity, - * which corresponds to one pixel in the depth buffer for Z24 or Z32 formats. - * However, it corresponds to two pixels with Z16, which means both need to - * hit the early depth case in order for it to happen. - * - * Other speculation is that we may be hitting increased fragment shader - * execution from GL_LEQUAL/GL_EQUAL depth tests at reduced precision. - * - * With the PMA stall workaround in place, Z16 is faster than Z24, as it - * should be. - */ - if (devinfo->ver >= 8) - screen->mesa_format_supports_texture[MESA_FORMAT_Z_UNORM16] = true; - - /* The RGBX formats are not renderable. Normally these get mapped - * internally to RGBA formats when rendering. However on Gfx9+ when this - * internal override is used fast clears don't work so they are disabled in - * brw_meta_fast_clear. To avoid this problem we can just pretend not to - * support RGBX formats at all. This will cause the upper layers of Mesa to - * pick the RGBA formats instead. This works fine because when it is used - * as a texture source the swizzle state is programmed to force the alpha - * channel to 1.0 anyway. We could also do this for all gens except that - * it's a bit more difficult when the hardware doesn't support texture - * swizzling. Gens using the blorp have further problems because that - * doesn't implement this swizzle override. We don't need to do this for - * BGRX because that actually is supported natively on Gfx8+. - */ - if (devinfo->ver >= 9) { - static const mesa_format rgbx_formats[] = { - MESA_FORMAT_R8G8B8X8_UNORM, - MESA_FORMAT_R8G8B8X8_SRGB, - MESA_FORMAT_RGBX_UNORM16, - MESA_FORMAT_RGBX_FLOAT16, - MESA_FORMAT_RGBX_FLOAT32 - }; - - for (int i = 0; i < ARRAY_SIZE(rgbx_formats); i++) { - screen->mesa_format_supports_texture[rgbx_formats[i]] = false; - screen->mesa_format_supports_render[rgbx_formats[i]] = false; - } - } - - /* On hardware that lacks support for ETC1, we map ETC1 to RGBX - * during glCompressedTexImage2D(). See brw_mipmap_tree::wraps_etc1. - */ - screen->mesa_format_supports_texture[MESA_FORMAT_ETC1_RGB8] = true; - - /* On hardware that lacks support for ETC2, we map ETC2 to a suitable - * MESA_FORMAT during glCompressedTexImage2D(). - * See brw_mipmap_tree::wraps_etc2. - */ - screen->mesa_format_supports_texture[MESA_FORMAT_ETC2_RGB8] = true; - screen->mesa_format_supports_texture[MESA_FORMAT_ETC2_SRGB8] = true; - screen->mesa_format_supports_texture[MESA_FORMAT_ETC2_RGBA8_EAC] = true; - screen->mesa_format_supports_texture[MESA_FORMAT_ETC2_SRGB8_ALPHA8_EAC] = true; - screen->mesa_format_supports_texture[MESA_FORMAT_ETC2_R11_EAC] = true; - screen->mesa_format_supports_texture[MESA_FORMAT_ETC2_RG11_EAC] = true; - screen->mesa_format_supports_texture[MESA_FORMAT_ETC2_SIGNED_R11_EAC] = true; - screen->mesa_format_supports_texture[MESA_FORMAT_ETC2_SIGNED_RG11_EAC] = true; - screen->mesa_format_supports_texture[MESA_FORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1] = true; - screen->mesa_format_supports_texture[MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1] = true; -} - -void -brw_init_surface_formats(struct brw_context *brw) -{ - struct brw_screen *screen = brw->screen; - struct gl_context *ctx = &brw->ctx; - - brw->mesa_format_supports_render = screen->mesa_format_supports_render; - brw->mesa_to_isl_render_format = screen->mesa_to_isl_render_format; - - STATIC_ASSERT(ARRAY_SIZE(ctx->TextureFormatSupported) == - ARRAY_SIZE(screen->mesa_format_supports_texture)); - - for (unsigned i = 0; i < ARRAY_SIZE(ctx->TextureFormatSupported); ++i) { - ctx->TextureFormatSupported[i] = screen->mesa_format_supports_texture[i]; - } -} - -bool -brw_render_target_supported(struct brw_context *brw, - struct gl_renderbuffer *rb) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - mesa_format format = rb->Format; - - /* Many integer formats are promoted to RGBA (like XRGB8888 is), which means - * we would consider them renderable even though we don't have surface - * support for their alpha behavior and don't have the blending unit - * available to fake it like we do for XRGB8888. Force them to being - * unsupported. - */ - if (_mesa_is_format_integer_color(format) && - rb->_BaseFormat != GL_RGBA && - rb->_BaseFormat != GL_RG && - rb->_BaseFormat != GL_RED) - return false; - - /* Under some conditions, MSAA is not supported for formats whose width is - * more than 64 bits. - */ - if (devinfo->ver < 8 && - rb->NumSamples > 0 && _mesa_get_format_bytes(format) > 8) { - /* Gfx6: MSAA on >64 bit formats is unsupported. */ - if (devinfo->ver <= 6) - return false; - - /* Gfx7: 8x MSAA on >64 bit formats is unsupported. */ - if (rb->NumSamples >= 8) - return false; - } - - return brw->mesa_format_supports_render[format]; -} - -enum isl_format -translate_tex_format(struct brw_context *brw, - mesa_format mesa_format, - GLenum srgb_decode) -{ - struct gl_context *ctx = &brw->ctx; - if (srgb_decode == GL_SKIP_DECODE_EXT) - mesa_format = _mesa_get_srgb_format_linear(mesa_format); - - switch( mesa_format ) { - - case MESA_FORMAT_Z_UNORM16: - return ISL_FORMAT_R16_UNORM; - - case MESA_FORMAT_Z24_UNORM_S8_UINT: - case MESA_FORMAT_Z24_UNORM_X8_UINT: - return ISL_FORMAT_R24_UNORM_X8_TYPELESS; - - case MESA_FORMAT_Z_FLOAT32: - return ISL_FORMAT_R32_FLOAT; - - case MESA_FORMAT_Z32_FLOAT_S8X24_UINT: - return ISL_FORMAT_R32_FLOAT_X8X24_TYPELESS; - - case MESA_FORMAT_RGBA_FLOAT32: - /* The value of this ISL surface format is 0, which tricks the - * assertion below. - */ - return ISL_FORMAT_R32G32B32A32_FLOAT; - - case MESA_FORMAT_RGBA_ASTC_4x4: - case MESA_FORMAT_RGBA_ASTC_5x4: - case MESA_FORMAT_RGBA_ASTC_5x5: - case MESA_FORMAT_RGBA_ASTC_6x5: - case MESA_FORMAT_RGBA_ASTC_6x6: - case MESA_FORMAT_RGBA_ASTC_8x5: - case MESA_FORMAT_RGBA_ASTC_8x6: - case MESA_FORMAT_RGBA_ASTC_8x8: - case MESA_FORMAT_RGBA_ASTC_10x5: - case MESA_FORMAT_RGBA_ASTC_10x6: - case MESA_FORMAT_RGBA_ASTC_10x8: - case MESA_FORMAT_RGBA_ASTC_10x10: - case MESA_FORMAT_RGBA_ASTC_12x10: - case MESA_FORMAT_RGBA_ASTC_12x12: { - enum isl_format isl_fmt = - brw_isl_format_for_mesa_format(mesa_format); - - /** - * It is possible to process these formats using the LDR Profile - * or the Full Profile mode of the hardware. Because, it isn't - * possible to determine if an HDR or LDR texture is being rendered, we - * can't determine which mode to enable in the hardware. Therefore, to - * handle all cases, always default to Full profile unless we are - * processing sRGBs, which are incompatible with this mode. - */ - if (ctx->Extensions.KHR_texture_compression_astc_hdr) - isl_fmt |= GFX9_SURFACE_ASTC_HDR_FORMAT_BIT; - - return isl_fmt; - } - - default: - return brw_isl_format_for_mesa_format(mesa_format); - } -} - -/** - * Convert a MESA_FORMAT to the corresponding BRW_DEPTHFORMAT enum. - */ -uint32_t -brw_depth_format(struct brw_context *brw, mesa_format format) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - switch (format) { - case MESA_FORMAT_Z_UNORM16: - return BRW_DEPTHFORMAT_D16_UNORM; - case MESA_FORMAT_Z_FLOAT32: - return BRW_DEPTHFORMAT_D32_FLOAT; - case MESA_FORMAT_Z24_UNORM_X8_UINT: - if (devinfo->ver >= 6) { - return BRW_DEPTHFORMAT_D24_UNORM_X8_UINT; - } else { - /* Use D24_UNORM_S8, not D24_UNORM_X8. - * - * D24_UNORM_X8 was not introduced until Gfx5. (See the Ironlake PRM, - * Volume 2, Part 1, Section 8.4.6 "Depth/Stencil Buffer State", Bits - * 3DSTATE_DEPTH_BUFFER.Surface_Format). - * - * However, on Gfx5, D24_UNORM_X8 may be used only if separate - * stencil is enabled, and we never enable it. From the Ironlake PRM, - * same section as above, 3DSTATE_DEPTH_BUFFER's - * "Separate Stencil Buffer Enable" bit: - * - * "If this field is disabled, the Surface Format of the depth - * buffer cannot be D24_UNORM_X8_UINT." - */ - return BRW_DEPTHFORMAT_D24_UNORM_S8_UINT; - } - case MESA_FORMAT_Z24_UNORM_S8_UINT: - return BRW_DEPTHFORMAT_D24_UNORM_S8_UINT; - case MESA_FORMAT_Z32_FLOAT_S8X24_UINT: - return BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT; - default: - unreachable("Unexpected depth format."); - } -} diff --git a/src/mesa/drivers/dri/i965/brw_sync.c b/src/mesa/drivers/dri/i965/brw_sync.c deleted file mode 100644 index 97cb9e7..0000000 --- a/src/mesa/drivers/dri/i965/brw_sync.c +++ /dev/null @@ -1,642 +0,0 @@ -/* - * Copyright © 2008 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Eric Anholt - * - */ - -/** - * \file - * \brief Support for GL_ARB_sync and EGL_KHR_fence_sync. - * - * GL_ARB_sync is implemented by flushing the current batchbuffer and keeping a - * reference on it. We can then check for completion or wait for completion - * using the normal buffer object mechanisms. This does mean that if an - * application is using many sync objects, it will emit small batchbuffers - * which may end up being a significant overhead. In other tests of removing - * gratuitous batchbuffer syncs in Mesa, it hasn't appeared to be a significant - * performance bottleneck, though. - */ - -#include /* Requires Android or libdrm-2.4.72 */ - -#include "util/os_file.h" -#include "util/u_memory.h" -#include - -#include "brw_context.h" -#include "brw_batch.h" -#include "mesa/main/externalobjects.h" - -struct brw_fence { - struct brw_context *brw; - - enum brw_fence_type { - /** The fence waits for completion of brw_fence::batch_bo. */ - BRW_FENCE_TYPE_BO_WAIT, - - /** The fence waits for brw_fence::sync_fd to signal. */ - BRW_FENCE_TYPE_SYNC_FD, - } type; - - union { - struct brw_bo *batch_bo; - - /* This struct owns the fd. */ - int sync_fd; - }; - - mtx_t mutex; - bool signalled; -}; - -struct brw_gl_sync { - struct gl_sync_object gl; - struct brw_fence fence; -}; - -struct intel_semaphore_object { - struct gl_semaphore_object Base; - struct drm_syncobj_handle *syncobj; -}; - -static inline struct intel_semaphore_object * -intel_semaphore_object(struct gl_semaphore_object *sem_obj) { - return (struct intel_semaphore_object*) sem_obj; -} - -static struct gl_semaphore_object * -intel_semaphoreobj_alloc(struct gl_context *ctx, GLuint name) -{ - struct intel_semaphore_object *is_obj = CALLOC_STRUCT(intel_semaphore_object); - if (!is_obj) - return NULL; - - _mesa_initialize_semaphore_object(ctx, &is_obj->Base, name); - return &is_obj->Base; -} - -static void -intel_semaphoreobj_free(struct gl_context *ctx, - struct gl_semaphore_object *semObj) -{ - _mesa_delete_semaphore_object(ctx, semObj); -} - -static void -intel_semaphoreobj_import(struct gl_context *ctx, - struct gl_semaphore_object *semObj, - int fd) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_screen *screen = brw->screen; - struct intel_semaphore_object *iSemObj = intel_semaphore_object(semObj); - iSemObj->syncobj = CALLOC_STRUCT(drm_syncobj_handle); - iSemObj->syncobj->fd = fd; - - if (drmIoctl(screen->fd, DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, iSemObj->syncobj) < 0) { - fprintf(stderr, "DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE failed: %s\n", - strerror(errno)); - free(iSemObj->syncobj); - } -} - -static void -intel_semaphoreobj_signal(struct gl_context *ctx, - struct gl_semaphore_object *semObj, - GLuint numBufferBarriers, - struct gl_buffer_object **bufObjs, - GLuint numTextureBarriers, - struct gl_texture_object **texObjs, - const GLenum *dstLayouts) -{ - struct brw_context *brw = brw_context(ctx); - struct intel_semaphore_object *iSemObj = intel_semaphore_object(semObj); - struct drm_i915_gem_exec_fence *fence = - util_dynarray_grow(&brw->batch.exec_fences, struct drm_i915_gem_exec_fence *, 1); - fence->flags = I915_EXEC_FENCE_SIGNAL; - fence->handle = iSemObj->syncobj->handle; - brw->batch.contains_fence_signal = true; -} - -static void -intel_semaphoreobj_wait(struct gl_context *ctx, - struct gl_semaphore_object *semObj, - GLuint numBufferBarriers, - struct gl_buffer_object **bufObjs, - GLuint numTextureBarriers, - struct gl_texture_object **texObjs, - const GLenum *srcLayouts) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_screen *screen = brw->screen; - struct intel_semaphore_object *iSemObj = intel_semaphore_object(semObj); - struct drm_syncobj_wait args = { - .handles = (uintptr_t)&iSemObj->syncobj->handle, - .count_handles = 1, - }; - - drmIoctl(screen->fd, DRM_IOCTL_SYNCOBJ_WAIT, &args); -} - -static void -brw_fence_init(struct brw_context *brw, struct brw_fence *fence, - enum brw_fence_type type) -{ - fence->brw = brw; - fence->type = type; - mtx_init(&fence->mutex, mtx_plain); - - switch (type) { - case BRW_FENCE_TYPE_BO_WAIT: - fence->batch_bo = NULL; - break; - case BRW_FENCE_TYPE_SYNC_FD: - fence->sync_fd = -1; - break; - } -} - -static void -brw_fence_finish(struct brw_fence *fence) -{ - switch (fence->type) { - case BRW_FENCE_TYPE_BO_WAIT: - if (fence->batch_bo) - brw_bo_unreference(fence->batch_bo); - break; - case BRW_FENCE_TYPE_SYNC_FD: - if (fence->sync_fd != -1) - close(fence->sync_fd); - break; - } - - mtx_destroy(&fence->mutex); -} - -static bool MUST_CHECK -brw_fence_insert_locked(struct brw_context *brw, struct brw_fence *fence) -{ - __DRIcontext *driContext = brw->driContext; - __DRIdrawable *driDrawable = driContext->driDrawablePriv; - - /* - * From KHR_fence_sync: - * - * When the condition of the sync object is satisfied by the fence - * command, the sync is signaled by the associated client API context, - * causing any eglClientWaitSyncKHR commands (see below) blocking on - * to unblock. The only condition currently supported is - * EGL_SYNC_PRIOR_COMMANDS_COMPLETE_KHR, which is satisfied by - * completion of the fence command corresponding to the sync object, - * and all preceding commands in the associated client API context's - * command stream. The sync object will not be signaled until all - * effects from these commands on the client API's internal and - * framebuffer state are fully realized. No other state is affected by - * execution of the fence command. - * - * Note the emphasis there on ensuring that the framebuffer is fully - * realised before the fence is signaled. We cannot just flush the batch, - * but must also resolve the drawable first. The importance of this is, - * for example, in creating a fence for a frame to be passed to a - * remote compositor. Without us flushing the drawable explicitly, the - * resolve will be in a following batch (when the client finally calls - * SwapBuffers, or triggers a resolve via some other path) and so the - * compositor may read the incomplete framebuffer instead. - */ - if (driDrawable) - brw_resolve_for_dri2_flush(brw, driDrawable); - brw_emit_mi_flush(brw); - - switch (fence->type) { - case BRW_FENCE_TYPE_BO_WAIT: - assert(!fence->batch_bo); - assert(!fence->signalled); - - fence->batch_bo = brw->batch.batch.bo; - brw_bo_reference(fence->batch_bo); - - if (brw_batch_flush(brw) < 0) { - brw_bo_unreference(fence->batch_bo); - fence->batch_bo = NULL; - return false; - } - break; - case BRW_FENCE_TYPE_SYNC_FD: - assert(!fence->signalled); - - if (fence->sync_fd == -1) { - /* Create an out-fence that signals after all pending commands - * complete. - */ - if (brw_batch_flush_fence(brw, -1, &fence->sync_fd) < 0) - return false; - assert(fence->sync_fd != -1); - } else { - /* Wait on the in-fence before executing any subsequently submitted - * commands. - */ - if (brw_batch_flush(brw) < 0) - return false; - - /* Emit a dummy batch just for the fence. */ - brw_emit_mi_flush(brw); - if (brw_batch_flush_fence(brw, fence->sync_fd, NULL) < 0) - return false; - } - break; - } - - return true; -} - -static bool MUST_CHECK -brw_fence_insert(struct brw_context *brw, struct brw_fence *fence) -{ - bool ret; - - mtx_lock(&fence->mutex); - ret = brw_fence_insert_locked(brw, fence); - mtx_unlock(&fence->mutex); - - return ret; -} - -static bool -brw_fence_has_completed_locked(struct brw_fence *fence) -{ - if (fence->signalled) - return true; - - switch (fence->type) { - case BRW_FENCE_TYPE_BO_WAIT: - if (!fence->batch_bo) { - /* There may be no batch if brw_batch_flush() failed. */ - return false; - } - - if (brw_bo_busy(fence->batch_bo)) - return false; - - brw_bo_unreference(fence->batch_bo); - fence->batch_bo = NULL; - fence->signalled = true; - - return true; - - case BRW_FENCE_TYPE_SYNC_FD: - assert(fence->sync_fd != -1); - - if (sync_wait(fence->sync_fd, 0) == -1) - return false; - - fence->signalled = true; - - return true; - } - - return false; -} - -static bool -brw_fence_has_completed(struct brw_fence *fence) -{ - bool ret; - - mtx_lock(&fence->mutex); - ret = brw_fence_has_completed_locked(fence); - mtx_unlock(&fence->mutex); - - return ret; -} - -static bool -brw_fence_client_wait_locked(struct brw_context *brw, struct brw_fence *fence, - uint64_t timeout) -{ - int32_t timeout_i32; - - if (fence->signalled) - return true; - - switch (fence->type) { - case BRW_FENCE_TYPE_BO_WAIT: - if (!fence->batch_bo) { - /* There may be no batch if brw_batch_flush() failed. */ - return false; - } - - /* DRM_IOCTL_I915_GEM_WAIT uses a signed 64 bit timeout and returns - * immediately for timeouts <= 0. The best we can do is to clamp the - * timeout to INT64_MAX. This limits the maximum timeout from 584 years to - * 292 years - likely not a big deal. - */ - if (timeout > INT64_MAX) - timeout = INT64_MAX; - - if (brw_bo_wait(fence->batch_bo, timeout) != 0) - return false; - - fence->signalled = true; - brw_bo_unreference(fence->batch_bo); - fence->batch_bo = NULL; - - return true; - case BRW_FENCE_TYPE_SYNC_FD: - if (fence->sync_fd == -1) - return false; - - if (timeout > INT32_MAX) - timeout_i32 = -1; - else - timeout_i32 = timeout; - - if (sync_wait(fence->sync_fd, timeout_i32) == -1) - return false; - - fence->signalled = true; - return true; - } - - assert(!"bad enum brw_fence_type"); - return false; -} - -/** - * Return true if the function successfully signals or has already signalled. - * (This matches the behavior expected from __DRI2fence::client_wait_sync). - */ -static bool -brw_fence_client_wait(struct brw_context *brw, struct brw_fence *fence, - uint64_t timeout) -{ - bool ret; - - mtx_lock(&fence->mutex); - ret = brw_fence_client_wait_locked(brw, fence, timeout); - mtx_unlock(&fence->mutex); - - return ret; -} - -static void -brw_fence_server_wait(struct brw_context *brw, struct brw_fence *fence) -{ - switch (fence->type) { - case BRW_FENCE_TYPE_BO_WAIT: - /* We have nothing to do for WaitSync. Our GL command stream is sequential, - * so given that the sync object has already flushed the batchbuffer, any - * batchbuffers coming after this waitsync will naturally not occur until - * the previous one is done. - */ - break; - case BRW_FENCE_TYPE_SYNC_FD: - assert(fence->sync_fd != -1); - - /* The user wants explicit synchronization, so give them what they want. */ - if (!brw_fence_insert(brw, fence)) { - /* FIXME: There exists no way yet to report an error here. If an error - * occurs, continue silently and hope for the best. - */ - } - break; - } -} - -static struct gl_sync_object * -brw_gl_new_sync(struct gl_context *ctx) -{ - struct brw_gl_sync *sync; - - sync = calloc(1, sizeof(*sync)); - if (!sync) - return NULL; - - return &sync->gl; -} - -static void -brw_gl_delete_sync(struct gl_context *ctx, struct gl_sync_object *_sync) -{ - struct brw_gl_sync *sync = (struct brw_gl_sync *) _sync; - - brw_fence_finish(&sync->fence); - free(sync->gl.Label); - free(sync); -} - -static void -brw_gl_fence_sync(struct gl_context *ctx, struct gl_sync_object *_sync, - GLenum condition, GLbitfield flags) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_gl_sync *sync = (struct brw_gl_sync *) _sync; - - /* brw_fence_insert_locked() assumes it must do a complete flush */ - assert(condition == GL_SYNC_GPU_COMMANDS_COMPLETE); - - brw_fence_init(brw, &sync->fence, BRW_FENCE_TYPE_BO_WAIT); - - if (!brw_fence_insert_locked(brw, &sync->fence)) { - /* FIXME: There exists no way to report a GL error here. If an error - * occurs, continue silently and hope for the best. - */ - } -} - -static void -brw_gl_client_wait_sync(struct gl_context *ctx, struct gl_sync_object *_sync, - GLbitfield flags, GLuint64 timeout) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_gl_sync *sync = (struct brw_gl_sync *) _sync; - - if (brw_fence_client_wait(brw, &sync->fence, timeout)) - sync->gl.StatusFlag = 1; -} - -static void -brw_gl_server_wait_sync(struct gl_context *ctx, struct gl_sync_object *_sync, - GLbitfield flags, GLuint64 timeout) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_gl_sync *sync = (struct brw_gl_sync *) _sync; - - brw_fence_server_wait(brw, &sync->fence); -} - -static void -brw_gl_check_sync(struct gl_context *ctx, struct gl_sync_object *_sync) -{ - struct brw_gl_sync *sync = (struct brw_gl_sync *) _sync; - - if (brw_fence_has_completed(&sync->fence)) - sync->gl.StatusFlag = 1; -} - -void -brw_init_syncobj_functions(struct dd_function_table *functions) -{ - functions->NewSyncObject = brw_gl_new_sync; - functions->DeleteSyncObject = brw_gl_delete_sync; - functions->FenceSync = brw_gl_fence_sync; - functions->CheckSync = brw_gl_check_sync; - functions->ClientWaitSync = brw_gl_client_wait_sync; - functions->ServerWaitSync = brw_gl_server_wait_sync; - functions->NewSemaphoreObject = intel_semaphoreobj_alloc; - functions->DeleteSemaphoreObject = intel_semaphoreobj_free; - functions->ImportSemaphoreFd = intel_semaphoreobj_import; - functions->ServerSignalSemaphoreObject = intel_semaphoreobj_signal; - functions->ServerWaitSemaphoreObject = intel_semaphoreobj_wait; -} - -static void * -brw_dri_create_fence(__DRIcontext *ctx) -{ - struct brw_context *brw = ctx->driverPrivate; - struct brw_fence *fence; - - fence = calloc(1, sizeof(*fence)); - if (!fence) - return NULL; - - brw_fence_init(brw, fence, BRW_FENCE_TYPE_BO_WAIT); - - if (!brw_fence_insert_locked(brw, fence)) { - brw_fence_finish(fence); - free(fence); - return NULL; - } - - return fence; -} - -static void -brw_dri_destroy_fence(__DRIscreen *dri_screen, void *_fence) -{ - struct brw_fence *fence = _fence; - - brw_fence_finish(fence); - free(fence); -} - -static GLboolean -brw_dri_client_wait_sync(__DRIcontext *ctx, void *_fence, unsigned flags, - uint64_t timeout) -{ - struct brw_fence *fence = _fence; - - return brw_fence_client_wait(fence->brw, fence, timeout); -} - -static void -brw_dri_server_wait_sync(__DRIcontext *ctx, void *_fence, unsigned flags) -{ - struct brw_fence *fence = _fence; - - /* We might be called here with a NULL fence as a result of WaitSyncKHR - * on a EGL_KHR_reusable_sync fence. Nothing to do here in such case. - */ - if (!fence) - return; - - brw_fence_server_wait(fence->brw, fence); -} - -static unsigned -brw_dri_get_capabilities(__DRIscreen *dri_screen) -{ - struct brw_screen *screen = dri_screen->driverPrivate; - unsigned caps = 0; - - if (screen->has_exec_fence) - caps |= __DRI_FENCE_CAP_NATIVE_FD; - - return caps; -} - -static void * -brw_dri_create_fence_fd(__DRIcontext *dri_ctx, int fd) -{ - struct brw_context *brw = dri_ctx->driverPrivate; - struct brw_fence *fence; - - assert(brw->screen->has_exec_fence); - - fence = calloc(1, sizeof(*fence)); - if (!fence) - return NULL; - - brw_fence_init(brw, fence, BRW_FENCE_TYPE_SYNC_FD); - - if (fd == -1) { - /* Create an out-fence fd */ - if (!brw_fence_insert_locked(brw, fence)) - goto fail; - } else { - /* Import the sync fd as an in-fence. */ - fence->sync_fd = os_dupfd_cloexec(fd); - } - - assert(fence->sync_fd != -1); - - return fence; - -fail: - brw_fence_finish(fence); - free(fence); - return NULL; -} - -static int -brw_dri_get_fence_fd_locked(struct brw_fence *fence) -{ - assert(fence->type == BRW_FENCE_TYPE_SYNC_FD); - return os_dupfd_cloexec(fence->sync_fd); -} - -static int -brw_dri_get_fence_fd(__DRIscreen *dri_screen, void *_fence) -{ - struct brw_fence *fence = _fence; - int fd; - - mtx_lock(&fence->mutex); - fd = brw_dri_get_fence_fd_locked(fence); - mtx_unlock(&fence->mutex); - - return fd; -} - -const __DRI2fenceExtension brwFenceExtension = { - .base = { __DRI2_FENCE, 2 }, - - .create_fence = brw_dri_create_fence, - .destroy_fence = brw_dri_destroy_fence, - .client_wait_sync = brw_dri_client_wait_sync, - .server_wait_sync = brw_dri_server_wait_sync, - .get_fence_from_cl_event = NULL, - .get_capabilities = brw_dri_get_capabilities, - .create_fence_fd = brw_dri_create_fence_fd, - .get_fence_fd = brw_dri_get_fence_fd, -}; diff --git a/src/mesa/drivers/dri/i965/brw_tcs.c b/src/mesa/drivers/dri/i965/brw_tcs.c deleted file mode 100644 index 5eaa3ed..0000000 --- a/src/mesa/drivers/dri/i965/brw_tcs.c +++ /dev/null @@ -1,295 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file brw_tcs.c - * - * Tessellation control shader state upload code. - */ - -#include "brw_context.h" -#include "compiler/brw_nir.h" -#include "brw_program.h" -#include "brw_state.h" -#include "program/prog_parameter.h" -#include "nir_builder.h" - -static bool -brw_codegen_tcs_prog(struct brw_context *brw, struct brw_program *tcp, - struct brw_program *tep, struct brw_tcs_prog_key *key) -{ - struct gl_context *ctx = &brw->ctx; - const struct brw_compiler *compiler = brw->screen->compiler; - const struct intel_device_info *devinfo = compiler->devinfo; - struct brw_stage_state *stage_state = &brw->tcs.base; - nir_shader *nir; - struct brw_tcs_prog_data prog_data; - bool start_busy = false; - double start_time = 0; - - void *mem_ctx = ralloc_context(NULL); - if (tcp) { - nir = nir_shader_clone(mem_ctx, tcp->program.nir); - } else { - const nir_shader_compiler_options *options = - ctx->Const.ShaderCompilerOptions[MESA_SHADER_TESS_CTRL].NirOptions; - nir = brw_nir_create_passthrough_tcs(mem_ctx, compiler, options, key); - } - - memset(&prog_data, 0, sizeof(prog_data)); - - if (tcp) { - brw_assign_common_binding_table_offsets(devinfo, &tcp->program, - &prog_data.base.base, 0); - - brw_nir_setup_glsl_uniforms(mem_ctx, nir, &tcp->program, - &prog_data.base.base, - compiler->scalar_stage[MESA_SHADER_TESS_CTRL]); - if (brw->can_push_ubos) { - brw_nir_analyze_ubo_ranges(compiler, nir, NULL, - prog_data.base.base.ubo_ranges); - } - } else { - /* Upload the Patch URB Header as the first two uniforms. - * Do the annoying scrambling so the shader doesn't have to. - */ - assert(nir->num_uniforms == 32); - prog_data.base.base.param = rzalloc_array(mem_ctx, uint32_t, 8); - prog_data.base.base.nr_params = 8; - - uint32_t *param = prog_data.base.base.param; - for (int i = 0; i < 8; i++) - param[i] = BRW_PARAM_BUILTIN_ZERO; - - if (key->tes_primitive_mode == GL_QUADS) { - for (int i = 0; i < 4; i++) - param[7 - i] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i; - - param[3] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X; - param[2] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_Y; - } else if (key->tes_primitive_mode == GL_TRIANGLES) { - for (int i = 0; i < 3; i++) - param[7 - i] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i; - - param[4] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X; - } else { - assert(key->tes_primitive_mode == GL_ISOLINES); - param[7] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_Y; - param[6] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X; - } - } - - int st_index = -1; - if (INTEL_DEBUG(DEBUG_SHADER_TIME) && tep) - st_index = brw_get_shader_time_index(brw, &tep->program, ST_TCS, true); - - if (unlikely(brw->perf_debug)) { - start_busy = brw->batch.last_bo && brw_bo_busy(brw->batch.last_bo); - start_time = get_time(); - } - - char *error_str; - const unsigned *program = - brw_compile_tcs(compiler, brw, mem_ctx, key, &prog_data, nir, st_index, - NULL, &error_str); - if (program == NULL) { - if (tep) { - tep->program.sh.data->LinkStatus = LINKING_FAILURE; - ralloc_strcat(&tep->program.sh.data->InfoLog, error_str); - } - - _mesa_problem(NULL, "Failed to compile tessellation control shader: " - "%s\n", error_str); - - ralloc_free(mem_ctx); - return false; - } - - if (unlikely(brw->perf_debug)) { - if (tcp) { - if (tcp->compiled_once) { - brw_debug_recompile(brw, MESA_SHADER_TESS_CTRL, tcp->program.Id, - &key->base); - } - tcp->compiled_once = true; - } - - if (start_busy && !brw_bo_busy(brw->batch.last_bo)) { - perf_debug("TCS compile took %.03f ms and stalled the GPU\n", - (get_time() - start_time) * 1000); - } - } - - /* Scratch space is used for register spilling */ - brw_alloc_stage_scratch(brw, stage_state, - prog_data.base.base.total_scratch); - - /* The param and pull_param arrays will be freed by the shader cache. */ - ralloc_steal(NULL, prog_data.base.base.param); - ralloc_steal(NULL, prog_data.base.base.pull_param); - brw_upload_cache(&brw->cache, BRW_CACHE_TCS_PROG, - key, sizeof(*key), - program, prog_data.base.base.program_size, - &prog_data, sizeof(prog_data), - &stage_state->prog_offset, &brw->tcs.base.prog_data); - ralloc_free(mem_ctx); - - return true; -} - -void -brw_tcs_populate_key(struct brw_context *brw, - struct brw_tcs_prog_key *key) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - const struct brw_compiler *compiler = brw->screen->compiler; - struct brw_program *tcp = - (struct brw_program *) brw->programs[MESA_SHADER_TESS_CTRL]; - struct brw_program *tep = - (struct brw_program *) brw->programs[MESA_SHADER_TESS_EVAL]; - struct gl_program *tes_prog = &tep->program; - - uint64_t per_vertex_slots = tes_prog->info.inputs_read; - uint32_t per_patch_slots = tes_prog->info.patch_inputs_read; - - memset(key, 0, sizeof(*key)); - - if (tcp) { - struct gl_program *prog = &tcp->program; - per_vertex_slots |= prog->info.outputs_written; - per_patch_slots |= prog->info.patch_outputs_written; - } - - if (devinfo->ver < 8 || !tcp || compiler->use_tcs_8_patch) - key->input_vertices = brw->ctx.TessCtrlProgram.patch_vertices; - key->outputs_written = per_vertex_slots; - key->patch_outputs_written = per_patch_slots; - - /* We need to specialize our code generation for tessellation levels - * based on the domain the DS is expecting to tessellate. - */ - key->tes_primitive_mode = tep->program.info.tess.primitive_mode; - key->quads_workaround = devinfo->ver < 9 && - tep->program.info.tess.primitive_mode == GL_QUADS && - tep->program.info.tess.spacing == TESS_SPACING_EQUAL; - - if (tcp) { - /* _NEW_TEXTURE */ - brw_populate_base_prog_key(&brw->ctx, tcp, &key->base); - } -} - -void -brw_upload_tcs_prog(struct brw_context *brw) -{ - struct brw_stage_state *stage_state = &brw->tcs.base; - struct brw_tcs_prog_key key; - /* BRW_NEW_TESS_PROGRAMS */ - struct brw_program *tcp = - (struct brw_program *) brw->programs[MESA_SHADER_TESS_CTRL]; - ASSERTED struct brw_program *tep = - (struct brw_program *) brw->programs[MESA_SHADER_TESS_EVAL]; - assert(tep); - - if (!brw_state_dirty(brw, - _NEW_TEXTURE, - BRW_NEW_PATCH_PRIMITIVE | - BRW_NEW_TESS_PROGRAMS)) - return; - - brw_tcs_populate_key(brw, &key); - - if (brw_search_cache(&brw->cache, BRW_CACHE_TCS_PROG, &key, sizeof(key), - &stage_state->prog_offset, &brw->tcs.base.prog_data, - true)) - return; - - if (brw_disk_cache_upload_program(brw, MESA_SHADER_TESS_CTRL)) - return; - - tcp = (struct brw_program *) brw->programs[MESA_SHADER_TESS_CTRL]; - if (tcp) - tcp->id = key.base.program_string_id; - - ASSERTED bool success = brw_codegen_tcs_prog(brw, tcp, tep, &key); - assert(success); -} - -void -brw_tcs_populate_default_key(const struct brw_compiler *compiler, - struct brw_tcs_prog_key *key, - struct gl_shader_program *sh_prog, - struct gl_program *prog) -{ - const struct intel_device_info *devinfo = compiler->devinfo; - struct brw_program *btcp = brw_program(prog); - const struct gl_linked_shader *tes = - sh_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL]; - - memset(key, 0, sizeof(*key)); - - brw_populate_default_base_prog_key(devinfo, btcp, &key->base); - - /* Guess that the input and output patches have the same dimensionality. */ - if (devinfo->ver < 8 || compiler->use_tcs_8_patch) - key->input_vertices = prog->info.tess.tcs_vertices_out; - - if (tes) { - key->tes_primitive_mode = tes->Program->info.tess.primitive_mode; - key->quads_workaround = devinfo->ver < 9 && - tes->Program->info.tess.primitive_mode == GL_QUADS && - tes->Program->info.tess.spacing == TESS_SPACING_EQUAL; - } else { - key->tes_primitive_mode = GL_TRIANGLES; - } - - key->outputs_written = prog->nir->info.outputs_written; - key->patch_outputs_written = prog->nir->info.patch_outputs_written; -} - -bool -brw_tcs_precompile(struct gl_context *ctx, - struct gl_shader_program *shader_prog, - struct gl_program *prog) -{ - struct brw_context *brw = brw_context(ctx); - const struct brw_compiler *compiler = brw->screen->compiler; - struct brw_tcs_prog_key key; - uint32_t old_prog_offset = brw->tcs.base.prog_offset; - struct brw_stage_prog_data *old_prog_data = brw->tcs.base.prog_data; - bool success; - - struct brw_program *btcp = brw_program(prog); - const struct gl_linked_shader *tes = - shader_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL]; - struct brw_program *btep = tes ? brw_program(tes->Program) : NULL; - - brw_tcs_populate_default_key(compiler, &key, shader_prog, prog); - - success = brw_codegen_tcs_prog(brw, btcp, btep, &key); - - brw->tcs.base.prog_offset = old_prog_offset; - brw->tcs.base.prog_data = old_prog_data; - - return success; -} diff --git a/src/mesa/drivers/dri/i965/brw_tcs_surface_state.c b/src/mesa/drivers/dri/i965/brw_tcs_surface_state.c deleted file mode 100644 index 73179c0..0000000 --- a/src/mesa/drivers/dri/i965/brw_tcs_surface_state.c +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Copyright © 2013 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include "main/mtypes.h" -#include "program/prog_parameter.h" -#include "main/shaderapi.h" - -#include "brw_context.h" -#include "brw_state.h" - - -/* Creates a new TCS constant buffer reflecting the current TCS program's - * constants, if needed by the TCS program. - * - * Otherwise, constants go through the CURBEs using the brw_constant_buffer - * state atom. - */ -static void -brw_upload_tcs_pull_constants(struct brw_context *brw) -{ - struct brw_stage_state *stage_state = &brw->tcs.base; - - /* BRW_NEW_TESS_PROGRAMS */ - struct brw_program *tcp = - (struct brw_program *) brw->programs[MESA_SHADER_TESS_CTRL]; - - if (!tcp) - return; - - /* BRW_NEW_TCS_PROG_DATA */ - const struct brw_stage_prog_data *prog_data = brw->tcs.base.prog_data; - - _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_TESS_CTRL); - /* _NEW_PROGRAM_CONSTANTS */ - brw_upload_pull_constants(brw, BRW_NEW_TCS_CONSTBUF, &tcp->program, - stage_state, prog_data); -} - -const struct brw_tracked_state brw_tcs_pull_constants = { - .dirty = { - .mesa = _NEW_PROGRAM_CONSTANTS, - .brw = BRW_NEW_BATCH | - BRW_NEW_TCS_PROG_DATA | - BRW_NEW_TESS_PROGRAMS, - }, - .emit = brw_upload_tcs_pull_constants, -}; - -static void -brw_upload_tcs_ubo_surfaces(struct brw_context *brw) -{ - struct gl_context *ctx = &brw->ctx; - - /* _NEW_PROGRAM */ - struct gl_program *prog = - ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_CTRL]; - - /* BRW_NEW_TCS_PROG_DATA */ - struct brw_stage_prog_data *prog_data = brw->tcs.base.prog_data; - - brw_upload_ubo_surfaces(brw, prog, &brw->tcs.base, prog_data); -} - -const struct brw_tracked_state brw_tcs_ubo_surfaces = { - .dirty = { - .mesa = _NEW_PROGRAM, - .brw = BRW_NEW_BATCH | - BRW_NEW_TCS_PROG_DATA | - BRW_NEW_UNIFORM_BUFFER, - }, - .emit = brw_upload_tcs_ubo_surfaces, -}; - -static void -brw_upload_tcs_image_surfaces(struct brw_context *brw) -{ - /* BRW_NEW_TESS_PROGRAMS */ - const struct gl_program *tcp = brw->programs[MESA_SHADER_TESS_CTRL]; - - if (tcp) { - /* BRW_NEW_TCS_PROG_DATA, BRW_NEW_IMAGE_UNITS */ - brw_upload_image_surfaces(brw, tcp, &brw->tcs.base, - brw->tcs.base.prog_data); - } -} - -const struct brw_tracked_state brw_tcs_image_surfaces = { - .dirty = { - .brw = BRW_NEW_BATCH | - BRW_NEW_AUX_STATE | - BRW_NEW_IMAGE_UNITS | - BRW_NEW_TCS_PROG_DATA | - BRW_NEW_TESS_PROGRAMS, - }, - .emit = brw_upload_tcs_image_surfaces, -}; diff --git a/src/mesa/drivers/dri/i965/brw_tes.c b/src/mesa/drivers/dri/i965/brw_tes.c deleted file mode 100644 index 19114c5..0000000 --- a/src/mesa/drivers/dri/i965/brw_tes.c +++ /dev/null @@ -1,233 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file brw_tes.c - * - * Tessellation evaluation shader state upload code. - */ - -#include "brw_context.h" -#include "compiler/brw_nir.h" -#include "brw_program.h" -#include "brw_state.h" -#include "program/prog_parameter.h" - -static bool -brw_codegen_tes_prog(struct brw_context *brw, - struct brw_program *tep, - struct brw_tes_prog_key *key) -{ - const struct brw_compiler *compiler = brw->screen->compiler; - const struct intel_device_info *devinfo = &brw->screen->devinfo; - struct brw_stage_state *stage_state = &brw->tes.base; - struct brw_tes_prog_data prog_data; - bool start_busy = false; - double start_time = 0; - - memset(&prog_data, 0, sizeof(prog_data)); - - void *mem_ctx = ralloc_context(NULL); - - nir_shader *nir = nir_shader_clone(mem_ctx, tep->program.nir); - - brw_assign_common_binding_table_offsets(devinfo, &tep->program, - &prog_data.base.base, 0); - - brw_nir_setup_glsl_uniforms(mem_ctx, nir, &tep->program, - &prog_data.base.base, - compiler->scalar_stage[MESA_SHADER_TESS_EVAL]); - if (brw->can_push_ubos) { - brw_nir_analyze_ubo_ranges(compiler, nir, NULL, - prog_data.base.base.ubo_ranges); - } - - int st_index = -1; - if (INTEL_DEBUG(DEBUG_SHADER_TIME)) - st_index = brw_get_shader_time_index(brw, &tep->program, ST_TES, true); - - if (unlikely(brw->perf_debug)) { - start_busy = brw->batch.last_bo && brw_bo_busy(brw->batch.last_bo); - start_time = get_time(); - } - - struct brw_vue_map input_vue_map; - brw_compute_tess_vue_map(&input_vue_map, key->inputs_read, - key->patch_inputs_read); - - char *error_str; - const unsigned *program = - brw_compile_tes(compiler, brw, mem_ctx, key, &input_vue_map, &prog_data, - nir, st_index, NULL, &error_str); - if (program == NULL) { - tep->program.sh.data->LinkStatus = LINKING_FAILURE; - ralloc_strcat(&tep->program.sh.data->InfoLog, error_str); - - _mesa_problem(NULL, "Failed to compile tessellation evaluation shader: " - "%s\n", error_str); - - ralloc_free(mem_ctx); - return false; - } - - if (unlikely(brw->perf_debug)) { - if (tep->compiled_once) { - brw_debug_recompile(brw, MESA_SHADER_TESS_EVAL, tep->program.Id, - &key->base); - } - if (start_busy && !brw_bo_busy(brw->batch.last_bo)) { - perf_debug("TES compile took %.03f ms and stalled the GPU\n", - (get_time() - start_time) * 1000); - } - tep->compiled_once = true; - } - - /* Scratch space is used for register spilling */ - brw_alloc_stage_scratch(brw, stage_state, - prog_data.base.base.total_scratch); - - /* The param and pull_param arrays will be freed by the shader cache. */ - ralloc_steal(NULL, prog_data.base.base.param); - ralloc_steal(NULL, prog_data.base.base.pull_param); - brw_upload_cache(&brw->cache, BRW_CACHE_TES_PROG, - key, sizeof(*key), - program, prog_data.base.base.program_size, - &prog_data, sizeof(prog_data), - &stage_state->prog_offset, &brw->tes.base.prog_data); - ralloc_free(mem_ctx); - - return true; -} - -void -brw_tes_populate_key(struct brw_context *brw, - struct brw_tes_prog_key *key) -{ - struct brw_program *tcp = - (struct brw_program *) brw->programs[MESA_SHADER_TESS_CTRL]; - struct brw_program *tep = - (struct brw_program *) brw->programs[MESA_SHADER_TESS_EVAL]; - struct gl_program *prog = &tep->program; - - uint64_t per_vertex_slots = prog->info.inputs_read; - uint32_t per_patch_slots = prog->info.patch_inputs_read; - - memset(key, 0, sizeof(*key)); - - /* _NEW_TEXTURE */ - brw_populate_base_prog_key(&brw->ctx, tep, &key->base); - - /* The TCS may have additional outputs which aren't read by the - * TES (possibly for cross-thread communication). These need to - * be stored in the Patch URB Entry as well. - */ - if (tcp) { - struct gl_program *tcp_prog = &tcp->program; - per_vertex_slots |= tcp_prog->info.outputs_written & - ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER); - per_patch_slots |= tcp_prog->info.patch_outputs_written; - } - - key->inputs_read = per_vertex_slots; - key->patch_inputs_read = per_patch_slots; -} - -void -brw_upload_tes_prog(struct brw_context *brw) -{ - struct brw_stage_state *stage_state = &brw->tes.base; - struct brw_tes_prog_key key; - /* BRW_NEW_TESS_PROGRAMS */ - struct brw_program *tep = - (struct brw_program *) brw->programs[MESA_SHADER_TESS_EVAL]; - - if (!brw_state_dirty(brw, - _NEW_TEXTURE, - BRW_NEW_TESS_PROGRAMS)) - return; - - brw_tes_populate_key(brw, &key); - - if (brw_search_cache(&brw->cache, BRW_CACHE_TES_PROG, &key, sizeof(key), - &stage_state->prog_offset, &brw->tes.base.prog_data, - true)) - return; - - if (brw_disk_cache_upload_program(brw, MESA_SHADER_TESS_EVAL)) - return; - - tep = (struct brw_program *) brw->programs[MESA_SHADER_TESS_EVAL]; - tep->id = key.base.program_string_id; - - ASSERTED bool success = brw_codegen_tes_prog(brw, tep, &key); - assert(success); -} - -void -brw_tes_populate_default_key(const struct brw_compiler *compiler, - struct brw_tes_prog_key *key, - struct gl_shader_program *sh_prog, - struct gl_program *prog) -{ - const struct intel_device_info *devinfo = compiler->devinfo; - struct brw_program *btep = brw_program(prog); - - memset(key, 0, sizeof(*key)); - - brw_populate_default_base_prog_key(devinfo, btep, &key->base); - - key->inputs_read = prog->nir->info.inputs_read; - key->patch_inputs_read = prog->nir->info.patch_inputs_read; - - if (sh_prog->_LinkedShaders[MESA_SHADER_TESS_CTRL]) { - struct gl_program *tcp = - sh_prog->_LinkedShaders[MESA_SHADER_TESS_CTRL]->Program; - key->inputs_read |= tcp->nir->info.outputs_written & - ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER); - key->patch_inputs_read |= tcp->nir->info.patch_outputs_written; - } -} - -bool -brw_tes_precompile(struct gl_context *ctx, - struct gl_shader_program *shader_prog, - struct gl_program *prog) -{ - struct brw_context *brw = brw_context(ctx); - const struct brw_compiler *compiler = brw->screen->compiler; - struct brw_tes_prog_key key; - uint32_t old_prog_offset = brw->tes.base.prog_offset; - struct brw_stage_prog_data *old_prog_data = brw->tes.base.prog_data; - bool success; - - struct brw_program *btep = brw_program(prog); - - brw_tes_populate_default_key(compiler, &key, shader_prog, prog); - - success = brw_codegen_tes_prog(brw, btep, &key); - - brw->tes.base.prog_offset = old_prog_offset; - brw->tes.base.prog_data = old_prog_data; - - return success; -} diff --git a/src/mesa/drivers/dri/i965/brw_tes_surface_state.c b/src/mesa/drivers/dri/i965/brw_tes_surface_state.c deleted file mode 100644 index 6e9e58a..0000000 --- a/src/mesa/drivers/dri/i965/brw_tes_surface_state.c +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Copyright © 2013 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include "main/mtypes.h" -#include "program/prog_parameter.h" -#include "main/shaderapi.h" - -#include "brw_context.h" -#include "brw_state.h" - - -/* Creates a new TES constant buffer reflecting the current TES program's - * constants, if needed by the TES program. - * - * Otherwise, constants go through the CURBEs using the brw_constant_buffer - * state atom. - */ -static void -brw_upload_tes_pull_constants(struct brw_context *brw) -{ - struct brw_stage_state *stage_state = &brw->tes.base; - - /* BRW_NEW_TESS_PROGRAMS */ - struct brw_program *dp = - (struct brw_program *) brw->programs[MESA_SHADER_TESS_EVAL]; - - if (!dp) - return; - - /* BRW_NEW_TES_PROG_DATA */ - const struct brw_stage_prog_data *prog_data = brw->tes.base.prog_data; - - _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_TESS_EVAL); - /* _NEW_PROGRAM_CONSTANTS */ - brw_upload_pull_constants(brw, BRW_NEW_TES_CONSTBUF, &dp->program, - stage_state, prog_data); -} - -const struct brw_tracked_state brw_tes_pull_constants = { - .dirty = { - .mesa = _NEW_PROGRAM_CONSTANTS, - .brw = BRW_NEW_BATCH | - BRW_NEW_TES_PROG_DATA | - BRW_NEW_TESS_PROGRAMS, - }, - .emit = brw_upload_tes_pull_constants, -}; - -static void -brw_upload_tes_ubo_surfaces(struct brw_context *brw) -{ - struct gl_context *ctx = &brw->ctx; - - /* _NEW_PROGRAM */ - struct gl_program *prog = - ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_EVAL]; - - /* BRW_NEW_TES_PROG_DATA */ - struct brw_stage_prog_data *prog_data = brw->tes.base.prog_data; - - brw_upload_ubo_surfaces(brw, prog, &brw->tes.base, prog_data); -} - -const struct brw_tracked_state brw_tes_ubo_surfaces = { - .dirty = { - .mesa = _NEW_PROGRAM, - .brw = BRW_NEW_BATCH | - BRW_NEW_TES_PROG_DATA | - BRW_NEW_UNIFORM_BUFFER, - }, - .emit = brw_upload_tes_ubo_surfaces, -}; - -static void -brw_upload_tes_image_surfaces(struct brw_context *brw) -{ - /* BRW_NEW_TESS_PROGRAMS */ - const struct gl_program *tep = brw->programs[MESA_SHADER_TESS_EVAL]; - - if (tep) { - /* BRW_NEW_TES_PROG_DATA, BRW_NEW_IMAGE_UNITS */ - brw_upload_image_surfaces(brw, tep, &brw->tes.base, - brw->tes.base.prog_data); - } -} - -const struct brw_tracked_state brw_tes_image_surfaces = { - .dirty = { - .brw = BRW_NEW_BATCH | - BRW_NEW_AUX_STATE | - BRW_NEW_IMAGE_UNITS | - BRW_NEW_TESS_PROGRAMS | - BRW_NEW_TES_PROG_DATA, - }, - .emit = brw_upload_tes_image_surfaces, -}; diff --git a/src/mesa/drivers/dri/i965/brw_tex.c b/src/mesa/drivers/dri/i965/brw_tex.c deleted file mode 100644 index cbb4cd2..0000000 --- a/src/mesa/drivers/dri/i965/brw_tex.c +++ /dev/null @@ -1,415 +0,0 @@ -#include "swrast/swrast.h" -#include "main/renderbuffer.h" -#include "main/texobj.h" -#include "main/teximage.h" -#include "main/mipmap.h" -#include "drivers/common/meta.h" -#include "brw_context.h" -#include "brw_defines.h" -#include "brw_buffer_objects.h" -#include "brw_mipmap_tree.h" -#include "brw_tex.h" -#include "brw_fbo.h" -#include "brw_state.h" -#include "util/u_memory.h" - -#define FILE_DEBUG_FLAG DEBUG_TEXTURE - -static struct gl_texture_image * -brw_new_texture_image(struct gl_context *ctx) -{ - DBG("%s\n", __func__); - (void) ctx; - return (struct gl_texture_image *) CALLOC_STRUCT(brw_texture_image); -} - -static void -brw_delete_texture_image(struct gl_context *ctx, struct gl_texture_image *img) -{ - /* nothing special (yet) for brw_texture_image */ - _mesa_delete_texture_image(ctx, img); -} - - -static struct gl_texture_object * -brw_new_texture_object(struct gl_context *ctx, GLuint name, GLenum target) -{ - struct brw_texture_object *obj = CALLOC_STRUCT(brw_texture_object); - - (void) ctx; - - DBG("%s\n", __func__); - - if (obj == NULL) - return NULL; - - _mesa_initialize_texture_object(ctx, &obj->base, name, target); - - obj->needs_validate = true; - - return &obj->base; -} - -static void -brw_delete_texture_object(struct gl_context *ctx, - struct gl_texture_object *texObj) -{ - struct brw_texture_object *brw_obj = brw_texture_object(texObj); - - brw_miptree_release(&brw_obj->mt); - _mesa_delete_texture_object(ctx, texObj); -} - -static GLboolean -brw_alloc_texture_image_buffer(struct gl_context *ctx, - struct gl_texture_image *image) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_texture_image *intel_image = brw_texture_image(image); - struct gl_texture_object *texobj = image->TexObject; - struct brw_texture_object *intel_texobj = brw_texture_object(texobj); - - assert(image->Border == 0); - - /* Quantize sample count */ - if (image->NumSamples) { - image->NumSamples = brw_quantize_num_samples(brw->screen, image->NumSamples); - if (!image->NumSamples) - return false; - } - - /* Because the driver uses AllocTextureImageBuffer() internally, it may end - * up mismatched with FreeTextureImageBuffer(), but that is safe to call - * multiple times. - */ - ctx->Driver.FreeTextureImageBuffer(ctx, image); - - if (!_swrast_init_texture_image(image)) - return false; - - if (intel_texobj->mt && - brw_miptree_match_image(intel_texobj->mt, image)) { - brw_miptree_reference(&intel_image->mt, intel_texobj->mt); - DBG("%s: alloc obj %p level %d %dx%dx%d using object's miptree %p\n", - __func__, texobj, image->Level, - image->Width, image->Height, image->Depth, intel_texobj->mt); - } else { - intel_image->mt = brw_miptree_create_for_teximage(brw, intel_texobj, - intel_image, - MIPTREE_CREATE_DEFAULT); - if (!intel_image->mt) - return false; - - /* Even if the object currently has a mipmap tree associated - * with it, this one is a more likely candidate to represent the - * whole object since our level didn't fit what was there - * before, and any lower levels would fit into our miptree. - */ - brw_miptree_reference(&intel_texobj->mt, intel_image->mt); - - DBG("%s: alloc obj %p level %d %dx%dx%d using new miptree %p\n", - __func__, texobj, image->Level, - image->Width, image->Height, image->Depth, intel_image->mt); - } - - intel_texobj->needs_validate = true; - - return true; -} - -/** - * ctx->Driver.AllocTextureStorage() handler. - * - * Compare this to _mesa_AllocTextureStorage_sw, which would call into - * brw_alloc_texture_image_buffer() above. - */ -static GLboolean -brw_alloc_texture_storage(struct gl_context *ctx, - struct gl_texture_object *texobj, - GLsizei levels, GLsizei width, - GLsizei height, GLsizei depth) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_texture_object *intel_texobj = brw_texture_object(texobj); - struct gl_texture_image *first_image = texobj->Image[0][0]; - int num_samples = brw_quantize_num_samples(brw->screen, - first_image->NumSamples); - const int numFaces = _mesa_num_tex_faces(texobj->Target); - int face; - int level; - - /* If the object's current miptree doesn't match what we need, make a new - * one. - */ - if (!intel_texobj->mt || - !brw_miptree_match_image(intel_texobj->mt, first_image) || - intel_texobj->mt->last_level != levels - 1) { - brw_miptree_release(&intel_texobj->mt); - - brw_get_image_dims(first_image, &width, &height, &depth); - intel_texobj->mt = brw_miptree_create(brw, texobj->Target, - first_image->TexFormat, - 0, levels - 1, - width, height, depth, - MAX2(num_samples, 1), - MIPTREE_CREATE_DEFAULT); - - if (intel_texobj->mt == NULL) { - return false; - } - } - - for (face = 0; face < numFaces; face++) { - for (level = 0; level < levels; level++) { - struct gl_texture_image *image = texobj->Image[face][level]; - struct brw_texture_image *intel_image = brw_texture_image(image); - - image->NumSamples = num_samples; - - _swrast_free_texture_image_buffer(ctx, image); - if (!_swrast_init_texture_image(image)) - return false; - - brw_miptree_reference(&intel_image->mt, intel_texobj->mt); - } - } - - /* The miptree is in a validated state, so no need to check later. */ - intel_texobj->needs_validate = false; - intel_texobj->validated_first_level = 0; - intel_texobj->validated_last_level = levels - 1; - intel_texobj->_Format = first_image->TexFormat; - - return true; -} - - -static void -brw_free_texture_image_buffer(struct gl_context * ctx, - struct gl_texture_image *texImage) -{ - struct brw_texture_image *brw_image = brw_texture_image(texImage); - - DBG("%s\n", __func__); - - brw_miptree_release(&brw_image->mt); - - _swrast_free_texture_image_buffer(ctx, texImage); -} - -/** - * Map texture memory/buffer into user space. - * Note: the region of interest parameters are ignored here. - * \param mode bitmask of GL_MAP_READ_BIT, GL_MAP_WRITE_BIT - * \param mapOut returns start of mapping of region of interest - * \param rowStrideOut returns row stride in bytes - */ -static void -brw_map_texture_image(struct gl_context *ctx, - struct gl_texture_image *tex_image, - GLuint slice, - GLuint x, GLuint y, GLuint w, GLuint h, - GLbitfield mode, - GLubyte **map, - GLint *out_stride) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_texture_image *intel_image = brw_texture_image(tex_image); - struct brw_mipmap_tree *mt = intel_image->mt; - ptrdiff_t stride; - - /* Our texture data is always stored in a miptree. */ - assert(mt); - - /* Check that our caller wasn't confused about how to map a 1D texture. */ - assert(tex_image->TexObject->Target != GL_TEXTURE_1D_ARRAY || h == 1); - - /* brw_miptree_map operates on a unified "slice" number that references the - * cube face, since it's all just slices to the miptree code. - */ - if (tex_image->TexObject->Target == GL_TEXTURE_CUBE_MAP) - slice = tex_image->Face; - - brw_miptree_map(brw, mt, - tex_image->Level + tex_image->TexObject->Attrib.MinLevel, - slice + tex_image->TexObject->Attrib.MinLayer, - x, y, w, h, mode, - (void **)map, &stride); - - *out_stride = stride; -} - -static void -brw_unmap_texture_image(struct gl_context *ctx, - struct gl_texture_image *tex_image, GLuint slice) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_texture_image *intel_image = brw_texture_image(tex_image); - struct brw_mipmap_tree *mt = intel_image->mt; - - if (tex_image->TexObject->Target == GL_TEXTURE_CUBE_MAP) - slice = tex_image->Face; - - brw_miptree_unmap(brw, mt, - tex_image->Level + tex_image->TexObject->Attrib.MinLevel, - slice + tex_image->TexObject->Attrib.MinLayer); -} - -static GLboolean -brw_texture_view(struct gl_context *ctx, - struct gl_texture_object *texObj, - struct gl_texture_object *origTexObj) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_texture_object *intel_tex = brw_texture_object(texObj); - struct brw_texture_object *intel_orig_tex = brw_texture_object(origTexObj); - - assert(intel_orig_tex->mt); - brw_miptree_reference(&intel_tex->mt, intel_orig_tex->mt); - - /* Since we can only make views of immutable-format textures, - * we can assume that everything is in origTexObj's miptree. - * - * Mesa core has already made us a copy of all the teximage objects, - * except it hasn't copied our mt pointers, etc. - */ - const int numFaces = _mesa_num_tex_faces(texObj->Target); - const int numLevels = texObj->Attrib.NumLevels; - - int face; - int level; - - for (face = 0; face < numFaces; face++) { - for (level = 0; level < numLevels; level++) { - struct gl_texture_image *image = texObj->Image[face][level]; - struct brw_texture_image *intel_image = brw_texture_image(image); - - brw_miptree_reference(&intel_image->mt, intel_orig_tex->mt); - } - } - - /* The miptree is in a validated state, so no need to check later. */ - intel_tex->needs_validate = false; - intel_tex->validated_first_level = 0; - intel_tex->validated_last_level = numLevels - 1; - - /* Set the validated texture format, with the same adjustments that - * would have been applied to determine the underlying texture's - * mt->format. - */ - intel_tex->_Format = brw_depth_format_for_depthstencil_format( - brw_lower_compressed_format(brw, texObj->Image[0][0]->TexFormat)); - - return GL_TRUE; -} - -static void -brw_texture_barrier(struct gl_context *ctx) -{ - struct brw_context *brw = brw_context(ctx); - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - if (devinfo->ver >= 6) { - brw_emit_pipe_control_flush(brw, - PIPE_CONTROL_DEPTH_CACHE_FLUSH | - PIPE_CONTROL_RENDER_TARGET_FLUSH | - PIPE_CONTROL_CS_STALL); - - brw_emit_pipe_control_flush(brw, - PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE); - } else { - brw_emit_mi_flush(brw); - } -} - -/* Return the usual surface usage flags for the given format. */ -static isl_surf_usage_flags_t -isl_surf_usage(mesa_format format) -{ - switch(_mesa_get_format_base_format(format)) { - case GL_DEPTH_COMPONENT: - return ISL_SURF_USAGE_DEPTH_BIT | ISL_SURF_USAGE_TEXTURE_BIT; - case GL_DEPTH_STENCIL: - return ISL_SURF_USAGE_DEPTH_BIT | ISL_SURF_USAGE_STENCIL_BIT | - ISL_SURF_USAGE_TEXTURE_BIT; - case GL_STENCIL_INDEX: - return ISL_SURF_USAGE_STENCIL_BIT | ISL_SURF_USAGE_TEXTURE_BIT; - default: - return ISL_SURF_USAGE_RENDER_TARGET_BIT | ISL_SURF_USAGE_TEXTURE_BIT; - } -} - -static GLboolean -intel_texture_for_memory_object(struct gl_context *ctx, - struct gl_texture_object *tex_obj, - struct gl_memory_object *mem_obj, - GLsizei levels, GLsizei width, - GLsizei height, GLsizei depth, - GLuint64 offset) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_memory_object *intel_memobj = brw_memory_object(mem_obj); - struct brw_texture_object *intel_texobj = brw_texture_object(tex_obj); - struct gl_texture_image *image = tex_obj->Image[0][0]; - struct isl_surf surf; - - /* Only color formats are supported. */ - if (!_mesa_is_format_color_format(image->TexFormat)) - return GL_FALSE; - - isl_tiling_flags_t tiling_flags = ISL_TILING_ANY_MASK; - if (tex_obj->TextureTiling == GL_LINEAR_TILING_EXT) - tiling_flags = ISL_TILING_LINEAR_BIT; - - UNUSED const bool isl_surf_created_successfully = - isl_surf_init(&brw->screen->isl_dev, &surf, - .dim = get_isl_surf_dim(tex_obj->Target), - .format = brw_isl_format_for_mesa_format(image->TexFormat), - .width = width, - .height = height, - .depth = depth, - .levels = levels, - .array_len = tex_obj->Target == GL_TEXTURE_3D ? 1 : depth, - .samples = MAX2(image->NumSamples, 1), - .usage = isl_surf_usage(image->TexFormat), - .tiling_flags = tiling_flags); - - assert(isl_surf_created_successfully); - - intel_texobj->mt = brw_miptree_create_for_bo(brw, - intel_memobj->bo, - image->TexFormat, - offset, - width, - height, - depth, - surf.row_pitch_B, - surf.tiling, - MIPTREE_CREATE_NO_AUX); - assert(intel_texobj->mt); - brw_alloc_texture_image_buffer(ctx, image); - - intel_texobj->needs_validate = false; - intel_texobj->validated_first_level = 0; - intel_texobj->validated_last_level = levels - 1; - intel_texobj->_Format = image->TexFormat; - - return GL_TRUE; -} - -void -brw_init_texture_functions(struct dd_function_table *functions) -{ - functions->NewTextureObject = brw_new_texture_object; - functions->NewTextureImage = brw_new_texture_image; - functions->DeleteTextureImage = brw_delete_texture_image; - functions->DeleteTexture = brw_delete_texture_object; - functions->AllocTextureImageBuffer = brw_alloc_texture_image_buffer; - functions->FreeTextureImageBuffer = brw_free_texture_image_buffer; - functions->AllocTextureStorage = brw_alloc_texture_storage; - functions->MapTextureImage = brw_map_texture_image; - functions->UnmapTextureImage = brw_unmap_texture_image; - functions->TextureView = brw_texture_view; - functions->TextureBarrier = brw_texture_barrier; - functions->SetTextureStorageForMemoryObject = intel_texture_for_memory_object; -} diff --git a/src/mesa/drivers/dri/i965/brw_tex.h b/src/mesa/drivers/dri/i965/brw_tex.h deleted file mode 100644 index ee0837e..0000000 --- a/src/mesa/drivers/dri/i965/brw_tex.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright 2003 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef INTELTEX_INC -#define INTELTEX_INC - -#include "main/mtypes.h" -#include "main/formats.h" -#include "brw_context.h" -#include "brw_mipmap_tree.h" - -void brw_init_texture_functions(struct dd_function_table *functions); - -void brw_init_texture_image_functions(struct dd_function_table *functions); - -void brw_init_texture_copy_image_functions(struct dd_function_table *functs); - -void brw_init_copy_image_functions(struct dd_function_table *functions); - -void brw_set_texbuffer(__DRIcontext *pDRICtx, - GLint target, __DRIdrawable *pDraw); -void brw_set_texbuffer2(__DRIcontext *pDRICtx, - GLint target, GLint format, __DRIdrawable *pDraw); -void brw_release_texbuffer(__DRIcontext *pDRICtx, GLint target, - __DRIdrawable *dPriv); - -struct brw_mipmap_tree * -brw_miptree_create_for_teximage(struct brw_context *brw, - struct brw_texture_object *brw_obj, - struct brw_texture_image *brw_image, - enum brw_miptree_create_flags flags); - -void brw_finalize_mipmap_tree(struct brw_context *brw, - struct gl_texture_object *tex_obj); - -#endif diff --git a/src/mesa/drivers/dri/i965/brw_tex_copy.c b/src/mesa/drivers/dri/i965/brw_tex_copy.c deleted file mode 100644 index 26c1fcd..0000000 --- a/src/mesa/drivers/dri/i965/brw_tex_copy.c +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright 2003 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "main/mtypes.h" -#include "main/enums.h" -#include "main/image.h" -#include "main/teximage.h" -#include "main/texobj.h" -#include "main/texstate.h" -#include "main/fbobject.h" - -#include "drivers/common/meta.h" - -#include "brw_screen.h" -#include "brw_mipmap_tree.h" -#include "brw_fbo.h" -#include "brw_tex.h" -#include "brw_context.h" - -#define FILE_DEBUG_FLAG DEBUG_TEXTURE - - -static void -brw_copytexsubimage(struct gl_context *ctx, GLuint dims, - struct gl_texture_image *texImage, - GLint xoffset, GLint yoffset, GLint slice, - struct gl_renderbuffer *rb, - GLint x, GLint y, - GLsizei width, GLsizei height) -{ - struct brw_context *brw = brw_context(ctx); - - /* Try BLORP first. It can handle almost everything. */ - if (brw_blorp_copytexsubimage(brw, rb, texImage, slice, x, y, - xoffset, yoffset, width, height)) - return; - - /* Finally, fall back to meta. This will likely be slow. */ - perf_debug("%s - fallback to swrast\n", __func__); - _mesa_meta_CopyTexSubImage(ctx, dims, texImage, - xoffset, yoffset, slice, - rb, x, y, width, height); -} - - -void -brw_init_texture_copy_image_functions(struct dd_function_table *functions) -{ - functions->CopyTexSubImage = brw_copytexsubimage; -} diff --git a/src/mesa/drivers/dri/i965/brw_tex_image.c b/src/mesa/drivers/dri/i965/brw_tex_image.c deleted file mode 100644 index 7abe848..0000000 --- a/src/mesa/drivers/dri/i965/brw_tex_image.c +++ /dev/null @@ -1,992 +0,0 @@ - -#include "main/macros.h" -#include "main/mtypes.h" -#include "main/enums.h" -#include "main/bufferobj.h" -#include "main/context.h" -#include "main/formats.h" -#include "main/glformats.h" -#include "main/image.h" -#include "main/pbo.h" -#include "main/renderbuffer.h" -#include "main/texcompress.h" -#include "main/texgetimage.h" -#include "main/texobj.h" -#include "main/teximage.h" -#include "main/texstore.h" -#include "main/glthread.h" - -#include "drivers/common/meta.h" - -#include "brw_mipmap_tree.h" -#include "brw_buffer_objects.h" -#include "brw_batch.h" -#include "brw_tex.h" -#include "brw_fbo.h" -#include "brw_image.h" -#include "brw_context.h" -#include "brw_blorp.h" - -#define FILE_DEBUG_FLAG DEBUG_TEXTURE - -/* Make sure one doesn't end up shrinking base level zero unnecessarily. - * Determining the base level dimension by shifting higher level dimension - * ends up in off-by-one value in case base level has NPOT size (for example, - * 293 != 146 << 1). - * Choose the original base level dimension when shifted dimensions agree. - * Otherwise assume real resize is intended and use the new shifted value. - */ -static unsigned -get_base_dim(unsigned old_base_dim, unsigned new_level_dim, unsigned level) -{ - const unsigned old_level_dim = old_base_dim >> level; - const unsigned new_base_dim = new_level_dim << level; - - return old_level_dim == new_level_dim ? old_base_dim : new_base_dim; -} - -/* Work back from the specified level of the image to the baselevel and create a - * miptree of that size. - */ -struct brw_mipmap_tree * -brw_miptree_create_for_teximage(struct brw_context *brw, - struct brw_texture_object *brw_obj, - struct brw_texture_image *brw_image, - enum brw_miptree_create_flags flags) -{ - GLuint lastLevel; - int width, height, depth; - unsigned old_width = 0, old_height = 0, old_depth = 0; - const struct brw_mipmap_tree *old_mt = brw_obj->mt; - const unsigned level = brw_image->base.Base.Level; - - brw_get_image_dims(&brw_image->base.Base, &width, &height, &depth); - - if (old_mt) { - old_width = old_mt->surf.logical_level0_px.width; - old_height = old_mt->surf.logical_level0_px.height; - old_depth = old_mt->surf.dim == ISL_SURF_DIM_3D ? - old_mt->surf.logical_level0_px.depth : - old_mt->surf.logical_level0_px.array_len; - } - - DBG("%s\n", __func__); - - /* Figure out image dimensions at start level. */ - switch(brw_obj->base.Target) { - case GL_TEXTURE_2D_MULTISAMPLE: - case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: - case GL_TEXTURE_RECTANGLE: - case GL_TEXTURE_EXTERNAL_OES: - assert(level == 0); - break; - case GL_TEXTURE_3D: - depth = old_mt ? get_base_dim(old_depth, depth, level) : - depth << level; - FALLTHROUGH; - case GL_TEXTURE_2D: - case GL_TEXTURE_2D_ARRAY: - case GL_TEXTURE_CUBE_MAP: - case GL_TEXTURE_CUBE_MAP_ARRAY: - height = old_mt ? get_base_dim(old_height, height, level) : - height << level; - FALLTHROUGH; - case GL_TEXTURE_1D: - case GL_TEXTURE_1D_ARRAY: - width = old_mt ? get_base_dim(old_width, width, level) : - width << level; - break; - default: - unreachable("Unexpected target"); - } - - /* Guess a reasonable value for lastLevel. This is probably going - * to be wrong fairly often and might mean that we have to look at - * resizable buffers, or require that buffers implement lazy - * pagetable arrangements. - */ - if ((brw_obj->base.Sampler.Attrib.MinFilter == GL_NEAREST || - brw_obj->base.Sampler.Attrib.MinFilter == GL_LINEAR) && - brw_image->base.Base.Level == 0 && - !brw_obj->base.Attrib.GenerateMipmap) { - lastLevel = 0; - } else { - lastLevel = _mesa_get_tex_max_num_levels(brw_obj->base.Target, - width, height, depth) - 1; - } - - return brw_miptree_create(brw, - brw_obj->base.Target, - brw_image->base.Base.TexFormat, - 0, - lastLevel, - width, - height, - depth, - MAX2(brw_image->base.Base.NumSamples, 1), - flags); -} - -static bool -brw_texsubimage_blorp(struct brw_context *brw, GLuint dims, - struct gl_texture_image *tex_image, - unsigned x, unsigned y, unsigned z, - unsigned width, unsigned height, unsigned depth, - GLenum format, GLenum type, const void *pixels, - const struct gl_pixelstore_attrib *packing) -{ - struct brw_texture_image *intel_image = brw_texture_image(tex_image); - const unsigned mt_level = tex_image->Level + tex_image->TexObject->Attrib.MinLevel; - const unsigned mt_z = tex_image->TexObject->Attrib.MinLayer + tex_image->Face + z; - - /* The blorp path can't understand crazy format hackery */ - if (_mesa_base_tex_format(&brw->ctx, tex_image->InternalFormat) != - _mesa_get_format_base_format(tex_image->TexFormat)) - return false; - - return brw_blorp_upload_miptree(brw, intel_image->mt, tex_image->TexFormat, - mt_level, x, y, mt_z, width, height, depth, - tex_image->TexObject->Target, format, type, - pixels, packing); -} - -/** - * \brief A fast path for glTexImage and glTexSubImage. - * - * This fast path is taken when the texture format is BGRA, RGBA, - * A or L and when the texture memory is X- or Y-tiled. It uploads - * the texture data by mapping the texture memory without a GTT fence, thus - * acquiring a tiled view of the memory, and then copying sucessive - * spans within each tile. - * - * This is a performance win over the conventional texture upload path because - * it avoids the performance penalty of writing through the write-combine - * buffer. In the conventional texture upload path, - * texstore.c:store_texsubimage(), the texture memory is mapped through a GTT - * fence, thus acquiring a linear view of the memory, then each row in the - * image is memcpy'd. In this fast path, we replace each row's copy with - * a sequence of copies over each linear span in tile. - * - * One use case is Google Chrome's paint rectangles. Chrome (as - * of version 21) renders each page as a tiling of 256x256 GL_BGRA textures. - * Each page's content is initially uploaded with glTexImage2D and damaged - * regions are updated with glTexSubImage2D. On some workloads, the - * performance gain of this fastpath on Sandybridge is over 5x. - */ -static bool -brw_texsubimage_tiled_memcpy(struct gl_context * ctx, - GLuint dims, - struct gl_texture_image *texImage, - GLint xoffset, GLint yoffset, GLint zoffset, - GLsizei width, GLsizei height, GLsizei depth, - GLenum format, GLenum type, - const GLvoid *pixels, - const struct gl_pixelstore_attrib *packing) -{ - struct brw_context *brw = brw_context(ctx); - const struct intel_device_info *devinfo = &brw->screen->devinfo; - struct brw_texture_image *image = brw_texture_image(texImage); - int src_pitch; - - /* The miptree's buffer. */ - struct brw_bo *bo; - - uint32_t cpp; - isl_memcpy_type copy_type; - - /* This fastpath is restricted to specific texture types: - * a 2D BGRA, RGBA, L8 or A8 texture. It could be generalized to support - * more types. - * - * FINISHME: The restrictions below on packing alignment and packing row - * length are likely unneeded now because we calculate the source stride - * with _mesa_image_row_stride. However, before removing the restrictions - * we need tests. - */ - if (!devinfo->has_llc || - !(type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) || - !(texImage->TexObject->Target == GL_TEXTURE_2D || - texImage->TexObject->Target == GL_TEXTURE_RECTANGLE) || - pixels == NULL || - packing->BufferObj || - packing->Alignment > 4 || - packing->SkipPixels > 0 || - packing->SkipRows > 0 || - (packing->RowLength != 0 && packing->RowLength != width) || - packing->SwapBytes || - packing->LsbFirst || - packing->Invert) - return false; - - /* Only a simple blit, no scale, bias or other mapping. */ - if (ctx->_ImageTransferState) - return false; - - copy_type = brw_miptree_get_memcpy_type(texImage->TexFormat, format, type, - &cpp); - if (copy_type == ISL_MEMCPY_INVALID) - return false; - - /* If this is a nontrivial texture view, let another path handle it instead. */ - if (texImage->TexObject->Attrib.MinLayer) - return false; - - if (!image->mt || - (image->mt->surf.tiling != ISL_TILING_X && - image->mt->surf.tiling != ISL_TILING_Y0)) { - /* The algorithm is written only for X- or Y-tiled memory. */ - return false; - } - - /* linear_to_tiled() assumes that if the object is swizzled, it is using - * I915_BIT6_SWIZZLE_9_10 for X and I915_BIT6_SWIZZLE_9 for Y. This is only - * true on gfx5 and above. - * - * The killer on top is that some gfx4 have an L-shaped swizzle mode, where - * parts of the memory aren't swizzled at all. Userspace just can't handle - * that. - */ - if (devinfo->ver < 5 && devinfo->has_bit6_swizzle) - return false; - - int level = texImage->Level + texImage->TexObject->Attrib.MinLevel; - - /* Since we are going to write raw data to the miptree, we need to resolve - * any pending fast color clears before we start. - */ - assert(image->mt->surf.logical_level0_px.depth == 1); - assert(image->mt->surf.logical_level0_px.array_len == 1); - - brw_miptree_access_raw(brw, image->mt, level, 0, true); - - bo = image->mt->bo; - - if (brw_batch_references(&brw->batch, bo)) { - perf_debug("Flushing before mapping a referenced bo.\n"); - brw_batch_flush(brw); - } - - void *map = brw_bo_map(brw, bo, MAP_WRITE | MAP_RAW); - if (map == NULL) { - DBG("%s: failed to map bo\n", __func__); - return false; - } - - src_pitch = _mesa_image_row_stride(packing, width, format, type); - - /* We postponed printing this message until having committed to executing - * the function. - */ - DBG("%s: level=%d offset=(%d,%d) (w,h)=(%d,%d) format=0x%x type=0x%x " - "mesa_format=0x%x tiling=%d " - "packing=(alignment=%d row_length=%d skip_pixels=%d skip_rows=%d) ", - __func__, texImage->Level, xoffset, yoffset, width, height, - format, type, texImage->TexFormat, image->mt->surf.tiling, - packing->Alignment, packing->RowLength, packing->SkipPixels, - packing->SkipRows); - - /* Adjust x and y offset based on miplevel */ - unsigned level_x, level_y; - brw_miptree_get_image_offset(image->mt, level, 0, &level_x, &level_y); - xoffset += level_x; - yoffset += level_y; - - isl_memcpy_linear_to_tiled( - xoffset * cpp, (xoffset + width) * cpp, - yoffset, yoffset + height, - map, - pixels, - image->mt->surf.row_pitch_B, src_pitch, - devinfo->has_bit6_swizzle, - image->mt->surf.tiling, - copy_type - ); - - brw_bo_unmap(bo); - return true; -} - - -static void -brw_upload_tex(struct gl_context * ctx, - GLuint dims, - struct gl_texture_image *texImage, - GLint xoffset, GLint yoffset, GLint zoffset, - GLsizei width, GLsizei height, GLsizei depth, - GLenum format, GLenum type, - const GLvoid * pixels, - const struct gl_pixelstore_attrib *packing) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_mipmap_tree *mt = brw_texture_image(texImage)->mt; - bool ok; - - /* Check that there is actually data to store. */ - if (pixels == NULL && !packing->BufferObj) - return; - - bool tex_busy = mt && - (brw_batch_references(&brw->batch, mt->bo) || brw_bo_busy(mt->bo)); - - if (packing->BufferObj || tex_busy || - mt->aux_usage == ISL_AUX_USAGE_CCS_E) { - ok = brw_texsubimage_blorp(brw, dims, texImage, - xoffset, yoffset, zoffset, - width, height, depth, format, type, - pixels, packing); - if (ok) - return; - } - - ok = brw_texsubimage_tiled_memcpy(ctx, dims, texImage, - xoffset, yoffset, zoffset, - width, height, depth, - format, type, pixels, packing); - if (ok) - return; - - _mesa_store_texsubimage(ctx, dims, texImage, - xoffset, yoffset, zoffset, - width, height, depth, - format, type, pixels, packing); -} - - -static void -brw_teximage(struct gl_context * ctx, - GLuint dims, - struct gl_texture_image *texImage, - GLenum format, GLenum type, const void *pixels, - const struct gl_pixelstore_attrib *unpack) -{ - DBG("%s mesa_format %s target %s format %s type %s level %d %dx%dx%d\n", - __func__, _mesa_get_format_name(texImage->TexFormat), - _mesa_enum_to_string(texImage->TexObject->Target), - _mesa_enum_to_string(format), _mesa_enum_to_string(type), - texImage->Level, texImage->Width, texImage->Height, texImage->Depth); - - /* Allocate storage for texture data. */ - if (!ctx->Driver.AllocTextureImageBuffer(ctx, texImage)) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage%uD", dims); - return; - } - - assert(brw_texture_image(texImage)->mt); - - brw_upload_tex(ctx, dims, texImage, 0, 0, 0, - texImage->Width, texImage->Height, texImage->Depth, - format, type, pixels, unpack); -} - - -static void -brw_texsubimage(struct gl_context * ctx, - GLuint dims, - struct gl_texture_image *texImage, - GLint xoffset, GLint yoffset, GLint zoffset, - GLsizei width, GLsizei height, GLsizei depth, - GLenum format, GLenum type, - const GLvoid * pixels, - const struct gl_pixelstore_attrib *packing) -{ - DBG("%s mesa_format %s target %s format %s type %s level %d %dx%dx%d\n", - __func__, _mesa_get_format_name(texImage->TexFormat), - _mesa_enum_to_string(texImage->TexObject->Target), - _mesa_enum_to_string(format), _mesa_enum_to_string(type), - texImage->Level, texImage->Width, texImage->Height, texImage->Depth); - - brw_upload_tex(ctx, dims, texImage, xoffset, yoffset, zoffset, - width, height, depth, format, type, pixels, packing); -} - - -static void -brw_set_texture_image_mt(struct brw_context *brw, - struct gl_texture_image *image, - GLenum internal_format, - mesa_format format, - struct brw_mipmap_tree *mt) - -{ - struct gl_texture_object *texobj = image->TexObject; - struct brw_texture_object *intel_texobj = brw_texture_object(texobj); - struct brw_texture_image *intel_image = brw_texture_image(image); - - _mesa_init_teximage_fields(&brw->ctx, image, - mt->surf.logical_level0_px.width, - mt->surf.logical_level0_px.height, 1, - 0, internal_format, format); - - brw->ctx.Driver.FreeTextureImageBuffer(&brw->ctx, image); - - intel_texobj->needs_validate = true; - intel_image->base.RowStride = mt->surf.row_pitch_B / mt->cpp; - assert(mt->surf.row_pitch_B % mt->cpp == 0); - - brw_miptree_reference(&intel_image->mt, mt); - - /* Immediately validate the image to the object. */ - brw_miptree_reference(&intel_texobj->mt, mt); -} - - -void -brw_set_texbuffer2(__DRIcontext *pDRICtx, GLint target, - GLint texture_format, - __DRIdrawable *dPriv) -{ - struct gl_framebuffer *fb = dPriv->driverPrivate; - struct brw_context *brw = pDRICtx->driverPrivate; - struct gl_context *ctx = &brw->ctx; - struct brw_renderbuffer *rb; - struct gl_texture_object *texObj; - struct gl_texture_image *texImage; - mesa_format texFormat = MESA_FORMAT_NONE; - GLenum internal_format = 0; - - _mesa_glthread_finish(ctx); - - texObj = _mesa_get_current_tex_object(ctx, target); - - if (!texObj) - return; - - if (dPriv->lastStamp != dPriv->dri2.stamp || - !pDRICtx->driScreenPriv->dri2.useInvalidate) - brw_update_renderbuffers(pDRICtx, dPriv); - - rb = brw_get_renderbuffer(fb, BUFFER_FRONT_LEFT); - /* If the miptree isn't set, then intel_update_renderbuffers was unable - * to get the BO for the drawable from the window system. - */ - if (!rb || !rb->mt) - return; - - /* Neither the EGL and GLX texture_from_pixmap specs say anything about - * sRGB. They are both from a time where sRGB was considered an extra - * encoding step you did as part of rendering/blending and not a format. - * Even though we have concept of sRGB visuals, X has classically assumed - * that your data is just bits and sRGB rendering is entirely a client-side - * rendering construct. The assumption is that the result of BindTexImage - * is a texture with a linear format even if it was rendered with sRGB - * encoding enabled. - */ - texFormat = _mesa_get_srgb_format_linear(brw_rb_format(rb)); - - if (rb->mt->cpp == 4) { - /* The extra texture_format parameter indicates whether the alpha - * channel should be respected or ignored. If we set internal_format to - * GL_RGB, the texture handling code is smart enough to swap the format - * or apply a swizzle if the underlying format is RGBA so we don't need - * to stomp it to RGBX or anything like that. - */ - if (texture_format == __DRI_TEXTURE_FORMAT_RGB) - internal_format = GL_RGB; - else - internal_format = GL_RGBA; - } else if (rb->mt->cpp == 2) { - internal_format = GL_RGB; - } - - brw_miptree_finish_external(brw, rb->mt); - - _mesa_lock_texture(&brw->ctx, texObj); - texImage = _mesa_get_tex_image(ctx, texObj, target, 0); - brw_set_texture_image_mt(brw, texImage, internal_format, - texFormat, rb->mt); - _mesa_unlock_texture(&brw->ctx, texObj); -} - -void -brw_release_texbuffer(__DRIcontext *pDRICtx, GLint target, - __DRIdrawable *dPriv) -{ - struct brw_context *brw = pDRICtx->driverPrivate; - struct gl_context *ctx = &brw->ctx; - struct gl_texture_object *tex_obj; - struct brw_texture_object *intel_tex; - - tex_obj = _mesa_get_current_tex_object(ctx, target); - if (!tex_obj) - return; - - _mesa_lock_texture(&brw->ctx, tex_obj); - - intel_tex = brw_texture_object(tex_obj); - if (!intel_tex->mt) { - _mesa_unlock_texture(&brw->ctx, tex_obj); - return; - } - - /* The brw_miptree_prepare_external below as well as the finish_external - * above in brw_set_texbuffer2 *should* do nothing. The BindTexImage call - * from both GLX and EGL has TexImage2D and not TexSubImage2D semantics so - * the texture is not immutable. This means that the user cannot create a - * texture view of the image with a different format. Since the only three - * formats available when using BindTexImage are all UNORM, we can never - * end up with an sRGB format being used for texturing and so we shouldn't - * get any format-related resolves when texturing from it. - * - * While very unlikely, it is possible that the client could use the bound - * texture with GL_ARB_image_load_store. In that case, we'll do a resolve - * but that's not actually a problem as it just means that we lose - * compression on this texture until the next time it's used as a render - * target. - * - * The only other way we could end up with an unexpected aux usage would be - * if we rendered to the image from the same context as we have it bound as - * a texture between BindTexImage and ReleaseTexImage. However, the spec - * clearly calls this case out and says you shouldn't do that. It doesn't - * explicitly prevent binding the texture to a framebuffer but it says the - * results of trying to render to it while bound are undefined. - * - * Just to keep everything safe and sane, we do a prepare_external but it - * should be a no-op in almost all cases. On the off chance that someone - * ever triggers this, we should at least warn them. - */ - if (intel_tex->mt->aux_buf && - brw_miptree_get_aux_state(intel_tex->mt, 0, 0) != - isl_drm_modifier_get_default_aux_state(intel_tex->mt->drm_modifier)) { - _mesa_warning(ctx, "Aux state changed between BindTexImage and " - "ReleaseTexImage. Most likely someone tried to draw " - "to the pixmap bound in BindTexImage or used it with " - "image_load_store."); - } - - brw_miptree_prepare_external(brw, intel_tex->mt); - - _mesa_unlock_texture(&brw->ctx, tex_obj); -} - -static GLboolean -brw_bind_renderbuffer_tex_image(struct gl_context *ctx, - struct gl_renderbuffer *rb, - struct gl_texture_image *image) -{ - struct brw_renderbuffer *irb = brw_renderbuffer(rb); - struct brw_texture_image *intel_image = brw_texture_image(image); - struct gl_texture_object *texobj = image->TexObject; - struct brw_texture_object *intel_texobj = brw_texture_object(texobj); - - /* We can only handle RB allocated with AllocRenderbufferStorage, or - * window-system renderbuffers. - */ - assert(!rb->TexImage); - - if (!irb->mt) - return false; - - _mesa_lock_texture(ctx, texobj); - _mesa_init_teximage_fields(ctx, image, rb->Width, rb->Height, 1, 0, - rb->InternalFormat, rb->Format); - image->NumSamples = rb->NumSamples; - - brw_miptree_reference(&intel_image->mt, irb->mt); - - /* Immediately validate the image to the object. */ - brw_miptree_reference(&intel_texobj->mt, intel_image->mt); - - intel_texobj->needs_validate = true; - _mesa_unlock_texture(ctx, texobj); - - return true; -} - -void -brw_set_texbuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv) -{ - /* The old interface didn't have the format argument, so copy our - * implementation's behavior at the time. - */ - brw_set_texbuffer2(pDRICtx, target, __DRI_TEXTURE_FORMAT_RGBA, dPriv); -} - -static void -brw_image_target_texture(struct gl_context *ctx, GLenum target, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage, - GLeglImageOES image_handle, - bool storage) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_mipmap_tree *mt; - __DRIscreen *dri_screen = brw->screen->driScrnPriv; - __DRIimage *image; - - image = dri_screen->dri2.image->lookupEGLImage(dri_screen, image_handle, - dri_screen->loaderPrivate); - if (image == NULL) - return; - - /* Disallow depth/stencil textures: we don't have a way to pass the - * separate stencil miptree of a GL_DEPTH_STENCIL texture through. - */ - if (image->has_depthstencil) { - _mesa_error(ctx, GL_INVALID_OPERATION, __func__); - return; - } - - mt = brw_miptree_create_for_dri_image(brw, image, target, image->format, - false); - if (mt == NULL) - return; - - struct brw_texture_object *intel_texobj = brw_texture_object(texObj); - intel_texobj->planar_format = image->planar_format; - intel_texobj->yuv_color_space = image->yuv_color_space; - - GLenum internal_format = - image->internal_format != 0 ? - image->internal_format : _mesa_get_format_base_format(mt->format); - - /* Fix the internal format when _mesa_get_format_base_format(mt->format) - * isn't a valid one for that particular format. - */ - if (brw->mesa_format_supports_render[image->format]) { - if (image->format == MESA_FORMAT_R10G10B10A2_UNORM || - image->format == MESA_FORMAT_R10G10B10X2_UNORM || - image->format == MESA_FORMAT_B10G10R10A2_UNORM || - image->format == MESA_FORMAT_B10G10R10X2_UNORM) - internal_format = GL_RGB10_A2; - } - - /* Guess sized internal format for dma-bufs, as specified by - * EXT_EGL_image_storage. - */ - if (storage && target == GL_TEXTURE_2D && image->imported_dmabuf) { - internal_format = driGLFormatToSizedInternalGLFormat(image->format); - if (internal_format == GL_NONE) { - _mesa_error(ctx, GL_INVALID_OPERATION, __func__); - return; - } - } - - brw_set_texture_image_mt(brw, texImage, internal_format, mt->format, mt); - brw_miptree_release(&mt); -} - -static void -brw_image_target_texture_2d(struct gl_context *ctx, GLenum target, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage, - GLeglImageOES image_handle) -{ - brw_image_target_texture(ctx, target, texObj, texImage, image_handle, - false); -} - -static void -brw_image_target_tex_storage(struct gl_context *ctx, GLenum target, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage, - GLeglImageOES image_handle) -{ - struct brw_texture_object *intel_texobj = brw_texture_object(texObj); - brw_image_target_texture(ctx, target, texObj, texImage, image_handle, - true); - - /* The miptree is in a validated state, so no need to check later. */ - intel_texobj->needs_validate = false; - intel_texobj->validated_first_level = 0; - intel_texobj->validated_last_level = 0; - intel_texobj->_Format = texImage->TexFormat; -} - -static bool -brw_gettexsubimage_blorp(struct brw_context *brw, - struct gl_texture_image *tex_image, - unsigned x, unsigned y, unsigned z, - unsigned width, unsigned height, unsigned depth, - GLenum format, GLenum type, const void *pixels, - const struct gl_pixelstore_attrib *packing) -{ - struct brw_texture_image *intel_image = brw_texture_image(tex_image); - const unsigned mt_level = tex_image->Level + tex_image->TexObject->Attrib.MinLevel; - const unsigned mt_z = tex_image->TexObject->Attrib.MinLayer + tex_image->Face + z; - - /* The blorp path can't understand crazy format hackery */ - if (_mesa_base_tex_format(&brw->ctx, tex_image->InternalFormat) != - _mesa_get_format_base_format(tex_image->TexFormat)) - return false; - - return brw_blorp_download_miptree(brw, intel_image->mt, - tex_image->TexFormat, SWIZZLE_XYZW, - mt_level, x, y, mt_z, - width, height, depth, - tex_image->TexObject->Target, - format, type, false, pixels, packing); -} - -/** - * \brief A fast path for glGetTexImage. - * - * \see brw_readpixels_tiled_memcpy() - */ -static bool -brw_gettexsubimage_tiled_memcpy(struct gl_context *ctx, - struct gl_texture_image *texImage, - GLint xoffset, GLint yoffset, - GLsizei width, GLsizei height, - GLenum format, GLenum type, - GLvoid *pixels, - const struct gl_pixelstore_attrib *packing) -{ - struct brw_context *brw = brw_context(ctx); - const struct intel_device_info *devinfo = &brw->screen->devinfo; - struct brw_texture_image *image = brw_texture_image(texImage); - int dst_pitch; - - /* The miptree's buffer. */ - struct brw_bo *bo; - - uint32_t cpp; - isl_memcpy_type copy_type; - - /* This fastpath is restricted to specific texture types: - * a 2D BGRA, RGBA, L8 or A8 texture. It could be generalized to support - * more types. - * - * FINISHME: The restrictions below on packing alignment and packing row - * length are likely unneeded now because we calculate the destination stride - * with _mesa_image_row_stride. However, before removing the restrictions - * we need tests. - */ - if (!devinfo->has_llc || - !(type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) || - !(texImage->TexObject->Target == GL_TEXTURE_2D || - texImage->TexObject->Target == GL_TEXTURE_RECTANGLE) || - pixels == NULL || - packing->BufferObj || - packing->Alignment > 4 || - packing->SkipPixels > 0 || - packing->SkipRows > 0 || - (packing->RowLength != 0 && packing->RowLength != width) || - packing->SwapBytes || - packing->LsbFirst || - packing->Invert) - return false; - - /* We can't handle copying from RGBX or BGRX because the tiled_memcpy - * function doesn't set the last channel to 1. Note this checks BaseFormat - * rather than TexFormat in case the RGBX format is being simulated with an - * RGBA format. - */ - if (texImage->_BaseFormat == GL_RGB) - return false; - - copy_type = brw_miptree_get_memcpy_type(texImage->TexFormat, format, type, - &cpp); - if (copy_type == ISL_MEMCPY_INVALID) - return false; - - /* If this is a nontrivial texture view, let another path handle it instead. */ - if (texImage->TexObject->Attrib.MinLayer) - return false; - - if (!image->mt || - (image->mt->surf.tiling != ISL_TILING_X && - image->mt->surf.tiling != ISL_TILING_Y0)) { - /* The algorithm is written only for X- or Y-tiled memory. */ - return false; - } - - /* tiled_to_linear() assumes that if the object is swizzled, it is using - * I915_BIT6_SWIZZLE_9_10 for X and I915_BIT6_SWIZZLE_9 for Y. This is only - * true on gfx5 and above. - * - * The killer on top is that some gfx4 have an L-shaped swizzle mode, where - * parts of the memory aren't swizzled at all. Userspace just can't handle - * that. - */ - if (devinfo->ver < 5 && devinfo->has_bit6_swizzle) - return false; - - int level = texImage->Level + texImage->TexObject->Attrib.MinLevel; - - /* Since we are going to write raw data to the miptree, we need to resolve - * any pending fast color clears before we start. - */ - assert(image->mt->surf.logical_level0_px.depth == 1); - assert(image->mt->surf.logical_level0_px.array_len == 1); - - brw_miptree_access_raw(brw, image->mt, level, 0, true); - - bo = image->mt->bo; - - if (brw_batch_references(&brw->batch, bo)) { - perf_debug("Flushing before mapping a referenced bo.\n"); - brw_batch_flush(brw); - } - - void *map = brw_bo_map(brw, bo, MAP_READ | MAP_RAW); - if (map == NULL) { - DBG("%s: failed to map bo\n", __func__); - return false; - } - - dst_pitch = _mesa_image_row_stride(packing, width, format, type); - - DBG("%s: level=%d x,y=(%d,%d) (w,h)=(%d,%d) format=0x%x type=0x%x " - "mesa_format=0x%x tiling=%d " - "packing=(alignment=%d row_length=%d skip_pixels=%d skip_rows=%d)\n", - __func__, texImage->Level, xoffset, yoffset, width, height, - format, type, texImage->TexFormat, image->mt->surf.tiling, - packing->Alignment, packing->RowLength, packing->SkipPixels, - packing->SkipRows); - - /* Adjust x and y offset based on miplevel */ - unsigned level_x, level_y; - brw_miptree_get_image_offset(image->mt, level, 0, &level_x, &level_y); - xoffset += level_x; - yoffset += level_y; - - isl_memcpy_tiled_to_linear( - xoffset * cpp, (xoffset + width) * cpp, - yoffset, yoffset + height, - pixels, - map, - dst_pitch, image->mt->surf.row_pitch_B, - devinfo->has_bit6_swizzle, - image->mt->surf.tiling, - copy_type - ); - - brw_bo_unmap(bo); - return true; -} - -static void -brw_get_tex_sub_image(struct gl_context *ctx, - GLint xoffset, GLint yoffset, GLint zoffset, - GLsizei width, GLsizei height, GLint depth, - GLenum format, GLenum type, GLvoid *pixels, - struct gl_texture_image *texImage) -{ - struct brw_context *brw = brw_context(ctx); - bool ok; - - DBG("%s\n", __func__); - - if (ctx->Pack.BufferObj) { - if (brw_gettexsubimage_blorp(brw, texImage, - xoffset, yoffset, zoffset, - width, height, depth, format, type, - pixels, &ctx->Pack)) - return; - - perf_debug("%s: fallback to CPU mapping in PBO case\n", __func__); - } - - ok = brw_gettexsubimage_tiled_memcpy(ctx, texImage, xoffset, yoffset, - width, height, - format, type, pixels, &ctx->Pack); - - if(ok) - return; - - _mesa_meta_GetTexSubImage(ctx, xoffset, yoffset, zoffset, - width, height, depth, - format, type, pixels, texImage); - - DBG("%s - DONE\n", __func__); -} - -static void -flush_astc_denorms(struct gl_context *ctx, GLuint dims, - struct gl_texture_image *texImage, - GLint xoffset, GLint yoffset, GLint zoffset, - GLsizei width, GLsizei height, GLsizei depth) -{ - struct compressed_pixelstore store; - _mesa_compute_compressed_pixelstore(dims, texImage->TexFormat, - width, height, depth, - &ctx->Unpack, &store); - - for (int slice = 0; slice < store.CopySlices; slice++) { - - /* Map dest texture buffer */ - GLubyte *dstMap; - GLint dstRowStride; - ctx->Driver.MapTextureImage(ctx, texImage, slice + zoffset, - xoffset, yoffset, width, height, - GL_MAP_READ_BIT | GL_MAP_WRITE_BIT, - &dstMap, &dstRowStride); - if (!dstMap) - continue; - - for (int i = 0; i < store.CopyRowsPerSlice; i++) { - - /* An ASTC block is stored in little endian mode. The byte that - * contains bits 0..7 is stored at the lower address in memory. - */ - struct astc_void_extent { - uint16_t header : 12; - uint16_t dontcare[3]; - uint16_t R; - uint16_t G; - uint16_t B; - uint16_t A; - } *blocks = (struct astc_void_extent*) dstMap; - - /* Iterate over every copied block in the row */ - for (int j = 0; j < store.CopyBytesPerRow / 16; j++) { - - /* Check if the header matches that of an LDR void-extent block */ - if (blocks[j].header == 0xDFC) { - - /* Flush UNORM16 values that would be denormalized */ - if (blocks[j].A < 4) blocks[j].A = 0; - if (blocks[j].B < 4) blocks[j].B = 0; - if (blocks[j].G < 4) blocks[j].G = 0; - if (blocks[j].R < 4) blocks[j].R = 0; - } - } - - dstMap += dstRowStride; - } - - ctx->Driver.UnmapTextureImage(ctx, texImage, slice + zoffset); - } -} - - -static void -brw_compressedtexsubimage(struct gl_context *ctx, GLuint dims, - struct gl_texture_image *texImage, - GLint xoffset, GLint yoffset, GLint zoffset, - GLsizei width, GLsizei height, GLsizei depth, - GLenum format, - GLsizei imageSize, const GLvoid *data) -{ - /* Upload the compressed data blocks */ - _mesa_store_compressed_texsubimage(ctx, dims, texImage, - xoffset, yoffset, zoffset, - width, height, depth, - format, imageSize, data); - - /* Fix up copied ASTC blocks if necessary */ - GLenum gl_format = _mesa_compressed_format_to_glenum(ctx, - texImage->TexFormat); - bool is_linear_astc = _mesa_is_astc_format(gl_format) && - !_mesa_is_srgb_format(gl_format); - struct brw_context *brw = (struct brw_context*) ctx; - const struct intel_device_info *devinfo = &brw->screen->devinfo; - if (devinfo->ver == 9 && - !intel_device_info_is_9lp(devinfo) && - is_linear_astc) - flush_astc_denorms(ctx, dims, texImage, - xoffset, yoffset, zoffset, - width, height, depth); -} - -void -brw_init_texture_image_functions(struct dd_function_table *functions) -{ - functions->TexImage = brw_teximage; - functions->TexSubImage = brw_texsubimage; - functions->CompressedTexSubImage = brw_compressedtexsubimage; - functions->EGLImageTargetTexture2D = brw_image_target_texture_2d; - functions->EGLImageTargetTexStorage = brw_image_target_tex_storage; - functions->BindRenderbufferTexImage = brw_bind_renderbuffer_tex_image; - functions->GetTexSubImage = brw_get_tex_sub_image; -} diff --git a/src/mesa/drivers/dri/i965/brw_tex_obj.h b/src/mesa/drivers/dri/i965/brw_tex_obj.h deleted file mode 100644 index 7946851..0000000 --- a/src/mesa/drivers/dri/i965/brw_tex_obj.h +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright 2003 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef _BRW_TEX_OBJ_H -#define _BRW_TEX_OBJ_H - -#include "swrast/s_context.h" - -#ifdef __cplusplus -extern "C" { -#endif - -struct brw_texture_object -{ - struct gl_texture_object base; - - /* This is a mirror of base._MaxLevel, updated at validate time, - * except that we don't bother with the non-base levels for - * non-mipmapped textures. - */ - unsigned int _MaxLevel; - - unsigned int validated_first_level; - unsigned int validated_last_level; - - /* The miptree of pixel data for the texture (if !needs_validate). After - * validation, the images will also have references to the same mt. - */ - struct brw_mipmap_tree *mt; - - /** - * Set when mipmap trees in the texture images of this texture object - * might not all be the mipmap tree above. - */ - bool needs_validate; - - /* Mesa format for the validated texture object. For non-views this - * will always be the same as texObj->Image[0][0].TexFormat. For views, it - * may differ since the mt is shared across views with differing formats. - */ - mesa_format _Format; - - const struct brw_image_format *planar_format; - unsigned int yuv_color_space; -}; - - -/** - * brw_texture_image is a subclass of swrast_texture_image because we - * sometimes fall back to using the swrast module for software rendering. - */ -struct brw_texture_image -{ - struct swrast_texture_image base; - - /* If brw_image->mt != NULL, image data is stored here. - * Else if brw_image->base.Buffer != NULL, image is stored there. - * Else there is no image data. - */ - struct brw_mipmap_tree *mt; -}; - -static inline struct brw_texture_object * -brw_texture_object(struct gl_texture_object *obj) -{ - return (struct brw_texture_object *) obj; -} - -static inline struct brw_texture_image * -brw_texture_image(struct gl_texture_image *img) -{ - return (struct brw_texture_image *) img; -} - -#ifdef __cplusplus -} -#endif - -#endif /* _BRW_TEX_OBJ_H */ diff --git a/src/mesa/drivers/dri/i965/brw_tex_validate.c b/src/mesa/drivers/dri/i965/brw_tex_validate.c deleted file mode 100644 index 36803cc..0000000 --- a/src/mesa/drivers/dri/i965/brw_tex_validate.c +++ /dev/null @@ -1,223 +0,0 @@ -/* - * Copyright © 2013 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "main/mtypes.h" -#include "main/macros.h" -#include "main/samplerobj.h" -#include "main/teximage.h" -#include "main/texobj.h" - -#include "brw_context.h" -#include "brw_mipmap_tree.h" -#include "brw_tex.h" - -#define FILE_DEBUG_FLAG DEBUG_TEXTURE - -/** - * Sets our driver-specific variant of tObj->_MaxLevel for later surface state - * upload. - * - * If we're only ensuring that there is storage for the first miplevel of a - * texture, then in texture setup we're going to have to make sure we don't - * allow sampling beyond level 0. - */ -static void -brw_update_max_level(struct gl_texture_object *tObj, - struct gl_sampler_object *sampler) -{ - struct brw_texture_object *brw_obj = brw_texture_object(tObj); - - if (!tObj->_MipmapComplete || - (tObj->_RenderToTexture && - (sampler->Attrib.MinFilter == GL_NEAREST || - sampler->Attrib.MinFilter == GL_LINEAR))) { - brw_obj->_MaxLevel = tObj->Attrib.BaseLevel; - } else { - brw_obj->_MaxLevel = tObj->_MaxLevel; - } -} - -/** - * At rendering-from-a-texture time, make sure that the texture object has a - * miptree that can hold the entire texture based on - * BaseLevel/MaxLevel/filtering, and copy in any texture images that are - * stored in other miptrees. - */ -void -brw_finalize_mipmap_tree(struct brw_context *brw, - struct gl_texture_object *tObj) -{ - struct brw_texture_object *brw_obj = brw_texture_object(tObj); - GLuint face, i; - GLuint nr_faces = 0; - struct brw_texture_image *firstImage; - int width, height, depth; - - /* TBOs require no validation -- they always just point to their BO. */ - if (tObj->Target == GL_TEXTURE_BUFFER) - return; - - /* What levels does this validated texture image require? */ - int validate_first_level = tObj->Attrib.BaseLevel; - int validate_last_level = brw_obj->_MaxLevel; - - /* Skip the loop over images in the common case of no images having - * changed. But if the GL_BASE_LEVEL or GL_MAX_LEVEL change to something we - * haven't looked at, then we do need to look at those new images. - */ - if (!brw_obj->needs_validate && - validate_first_level >= brw_obj->validated_first_level && - validate_last_level <= brw_obj->validated_last_level) { - return; - } - - /* On recent generations, immutable textures should not get this far - * -- they should have been created in a validated state, and nothing - * can invalidate them. - * - * Unfortunately, this is not true on pre-Sandybridge hardware -- when - * rendering into an immutable-format depth texture we may have to rebase - * the rendered levels to meet alignment requirements. - * - * FINISHME: Avoid doing this. - */ - assert(!tObj->Immutable || brw->screen->devinfo.ver < 6); - - firstImage = brw_texture_image(tObj->Image[0][tObj->Attrib.BaseLevel]); - if (!firstImage) - return; - - /* Check tree can hold all active levels. Check tree matches - * target, imageFormat, etc. - */ - if (brw_obj->mt && - (!brw_miptree_match_image(brw_obj->mt, &firstImage->base.Base) || - validate_first_level < brw_obj->mt->first_level || - validate_last_level > brw_obj->mt->last_level)) { - brw_miptree_release(&brw_obj->mt); - } - - - /* May need to create a new tree: - */ - if (!brw_obj->mt) { - const unsigned level = firstImage->base.Base.Level; - brw_get_image_dims(&firstImage->base.Base, &width, &height, &depth); - /* Figure out image dimensions at start level. */ - switch(brw_obj->base.Target) { - case GL_TEXTURE_2D_MULTISAMPLE: - case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: - case GL_TEXTURE_RECTANGLE: - case GL_TEXTURE_EXTERNAL_OES: - assert(level == 0); - break; - case GL_TEXTURE_3D: - depth = depth << level; - FALLTHROUGH; - case GL_TEXTURE_2D: - case GL_TEXTURE_2D_ARRAY: - case GL_TEXTURE_CUBE_MAP: - case GL_TEXTURE_CUBE_MAP_ARRAY: - height = height << level; - FALLTHROUGH; - case GL_TEXTURE_1D: - case GL_TEXTURE_1D_ARRAY: - width = width << level; - break; - default: - unreachable("Unexpected target"); - } - perf_debug("Creating new %s %dx%dx%d %d-level miptree to handle " - "finalized texture miptree.\n", - _mesa_get_format_name(firstImage->base.Base.TexFormat), - width, height, depth, validate_last_level + 1); - - brw_obj->mt = brw_miptree_create(brw, - brw_obj->base.Target, - firstImage->base.Base.TexFormat, - 0, /* first_level */ - validate_last_level, - width, - height, - depth, - 1 /* num_samples */, - MIPTREE_CREATE_BUSY); - if (!brw_obj->mt) - return; - } - - /* Pull in any images not in the object's tree: - */ - nr_faces = _mesa_num_tex_faces(brw_obj->base.Target); - for (face = 0; face < nr_faces; face++) { - for (i = validate_first_level; i <= validate_last_level; i++) { - struct brw_texture_image *brw_image = - brw_texture_image(brw_obj->base.Image[face][i]); - /* skip too small size mipmap */ - if (brw_image == NULL) - break; - - if (brw_obj->mt != brw_image->mt) - brw_miptree_copy_teximage(brw, brw_image, brw_obj->mt); - - /* After we're done, we'd better agree that our layout is - * appropriate, or we'll end up hitting this function again on the - * next draw - */ - assert(brw_miptree_match_image(brw_obj->mt, &brw_image->base.Base)); - } - } - - brw_obj->validated_first_level = validate_first_level; - brw_obj->validated_last_level = validate_last_level; - brw_obj->_Format = firstImage->base.Base.TexFormat, - brw_obj->needs_validate = false; -} - -/** - * Finalizes all textures, completing any rendering that needs to be done - * to prepare them. - */ -void -brw_validate_textures(struct brw_context *brw) -{ - struct gl_context *ctx = &brw->ctx; - const int max_enabled_unit = ctx->Texture._MaxEnabledTexImageUnit; - - for (int unit = 0; unit <= max_enabled_unit; unit++) { - struct gl_texture_object *tex_obj = ctx->Texture.Unit[unit]._Current; - - if (!tex_obj) - continue; - - struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit); - - /* We know that this is true by now, and if it wasn't, we might have - * mismatched level sizes and the copies would fail. - */ - assert(tex_obj->_BaseComplete); - - brw_update_max_level(tex_obj, sampler); - brw_finalize_mipmap_tree(brw, tex_obj); - } -} diff --git a/src/mesa/drivers/dri/i965/brw_upload.c b/src/mesa/drivers/dri/i965/brw_upload.c deleted file mode 100644 index 8f7acf0..0000000 --- a/src/mesa/drivers/dri/i965/brw_upload.c +++ /dev/null @@ -1,134 +0,0 @@ -/* - * Copyright 2003 VMware, Inc. - * Copyright © 2007 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -/** - * @file intel_upload.c - * - * Batched upload via BOs. - */ - -#include "main/macros.h" -#include "brw_bufmgr.h" -#include "brw_context.h" -#include "brw_buffer_objects.h" - -void -brw_upload_finish(struct brw_uploader *upload) -{ - assert((upload->bo == NULL) == (upload->map == NULL)); - if (!upload->bo) - return; - - brw_bo_unmap(upload->bo); - brw_bo_unreference(upload->bo); - upload->bo = NULL; - upload->map = NULL; - upload->next_offset = 0; -} - -/** - * Interface for getting memory for uploading streamed data to the GPU - * - * In most cases, streamed data (for GPU state structures, for example) is - * uploaded through brw_state_batch(), since that interface allows relocations - * from the streamed space returned to other BOs. However, that interface has - * the restriction that the amount of space allocated has to be "small". - * - * This interface, on the other hand, is able to handle arbitrary sized - * allocation requests, though it will batch small allocations into the same - * BO for efficiency and reduced memory footprint. - * - * \note The returned pointer is valid only until brw_upload_finish(). - * - * \param out_bo Pointer to a BO, which must point to a valid BO or NULL on - * entry, and will have a reference to the new BO containing the state on - * return. - * - * \param out_offset Offset within the buffer object that the data will land. - */ -void * -brw_upload_space(struct brw_uploader *upload, - uint32_t size, - uint32_t alignment, - struct brw_bo **out_bo, - uint32_t *out_offset) -{ - uint32_t offset; - - offset = ALIGN_NPOT(upload->next_offset, alignment); - if (upload->bo && offset + size > upload->bo->size) { - brw_upload_finish(upload); - offset = 0; - } - - assert((upload->bo == NULL) == (upload->map == NULL)); - if (!upload->bo) { - upload->bo = brw_bo_alloc(upload->bufmgr, "streamed data", - MAX2(upload->default_size, size), - BRW_MEMZONE_OTHER); - upload->map = brw_bo_map(NULL, upload->bo, - MAP_READ | MAP_WRITE | - MAP_PERSISTENT | MAP_ASYNC); - } - - upload->next_offset = offset + size; - - *out_offset = offset; - if (*out_bo != upload->bo) { - brw_bo_unreference(*out_bo); - *out_bo = upload->bo; - brw_bo_reference(upload->bo); - } - - return upload->map + offset; -} - -/** - * Handy interface to upload some data to temporary GPU memory quickly. - * - * References to this memory should not be retained across batch flushes. - */ -void -brw_upload_data(struct brw_uploader *upload, - const void *data, - uint32_t size, - uint32_t alignment, - struct brw_bo **out_bo, - uint32_t *out_offset) -{ - void *dst = brw_upload_space(upload, size, alignment, out_bo, out_offset); - memcpy(dst, data, size); -} - -void -brw_upload_init(struct brw_uploader *upload, - struct brw_bufmgr *bufmgr, - unsigned default_size) -{ - upload->bufmgr = bufmgr; - upload->bo = NULL; - upload->map = NULL; - upload->next_offset = 0; - upload->default_size = default_size; -} diff --git a/src/mesa/drivers/dri/i965/brw_urb.c b/src/mesa/drivers/dri/i965/brw_urb.c deleted file mode 100644 index 7f9b4cc..0000000 --- a/src/mesa/drivers/dri/i965/brw_urb.c +++ /dev/null @@ -1,268 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - - - -#include "brw_batch.h" -#include "brw_context.h" -#include "brw_state.h" -#include "brw_defines.h" - -#define VS 0 -#define GS 1 -#define CLP 2 -#define SF 3 -#define CS 4 - -/** @file brw_urb.c - * - * Manages the division of the URB space between the various fixed-function - * units. - * - * See the Thread Initiation Management section of the GFX4 B-Spec, and - * the individual *_STATE structures for restrictions on numbers of - * entries and threads. - */ - -/* - * Generally, a unit requires a min_nr_entries based on how many entries - * it produces before the downstream unit gets unblocked and can use and - * dereference some of its handles. - * - * The SF unit preallocates a PUE at the start of thread dispatch, and only - * uses that one. So it requires one entry per thread. - * - * For CLIP, the SF unit will hold the previous primitive while the - * next is getting assembled, meaning that linestrips require 3 CLIP VUEs - * (vertices) to ensure continued processing, trifans require 4, and tristrips - * require 5. There can be 1 or 2 threads, and each has the same requirement. - * - * GS has the same requirement as CLIP, but it never handles tristrips, - * so we can lower the minimum to 4 for the POLYGONs (trifans) it produces. - * We only run it single-threaded. - * - * For VS, the number of entries may be 8, 12, 16, or 32 (or 64 on G4X). - * Each thread processes 2 preallocated VUEs (vertices) at a time, and they - * get streamed down as soon as threads processing earlier vertices get - * theirs accepted. - * - * Each unit will take the number of URB entries we give it (based on the - * entry size calculated in brw_vs_emit.c for VUEs, brw_sf_emit.c for PUEs, - * and brw_curbe.c for the CURBEs) and decide its maximum number of - * threads it can support based on that. in brw_*_state.c. - * - * XXX: Are the min_entry_size numbers useful? - * XXX: Verify min_nr_entries, esp for VS. - * XXX: Verify SF min_entry_size. - */ -static const struct { - GLuint min_nr_entries; - GLuint preferred_nr_entries; - GLuint min_entry_size; - GLuint max_entry_size; -} limits[CS+1] = { - { 16, 32, 1, 5 }, /* vs */ - { 4, 8, 1, 5 }, /* gs */ - { 5, 10, 1, 5 }, /* clp */ - { 1, 8, 1, 12 }, /* sf */ - { 1, 4, 1, 32 } /* cs */ -}; - - -static bool check_urb_layout(struct brw_context *brw) -{ - brw->urb.vs_start = 0; - brw->urb.gs_start = brw->urb.nr_vs_entries * brw->urb.vsize; - brw->urb.clip_start = brw->urb.gs_start + brw->urb.nr_gs_entries * brw->urb.vsize; - brw->urb.sf_start = brw->urb.clip_start + brw->urb.nr_clip_entries * brw->urb.vsize; - brw->urb.cs_start = brw->urb.sf_start + brw->urb.nr_sf_entries * brw->urb.sfsize; - - return brw->urb.cs_start + brw->urb.nr_cs_entries * - brw->urb.csize <= brw->urb.size; -} - -/* Most minimal update, forces re-emit of URB fence packet after GS - * unit turned on/off. - */ -void -brw_calculate_urb_fence(struct brw_context *brw, unsigned csize, - unsigned vsize, unsigned sfsize) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - if (csize < limits[CS].min_entry_size) - csize = limits[CS].min_entry_size; - - if (vsize < limits[VS].min_entry_size) - vsize = limits[VS].min_entry_size; - - if (sfsize < limits[SF].min_entry_size) - sfsize = limits[SF].min_entry_size; - - if (brw->urb.vsize < vsize || - brw->urb.sfsize < sfsize || - brw->urb.csize < csize || - (brw->urb.constrained && (brw->urb.vsize > vsize || - brw->urb.sfsize > sfsize || - brw->urb.csize > csize))) { - - - brw->urb.csize = csize; - brw->urb.sfsize = sfsize; - brw->urb.vsize = vsize; - - brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries; - brw->urb.nr_gs_entries = limits[GS].preferred_nr_entries; - brw->urb.nr_clip_entries = limits[CLP].preferred_nr_entries; - brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries; - brw->urb.nr_cs_entries = limits[CS].preferred_nr_entries; - - brw->urb.constrained = 0; - - if (devinfo->ver == 5) { - brw->urb.nr_vs_entries = 128; - brw->urb.nr_sf_entries = 48; - if (check_urb_layout(brw)) { - goto done; - } else { - brw->urb.constrained = 1; - brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries; - brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries; - } - } else if (devinfo->verx10 == 45) { - brw->urb.nr_vs_entries = 64; - if (check_urb_layout(brw)) { - goto done; - } else { - brw->urb.constrained = 1; - brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries; - } - } - - if (!check_urb_layout(brw)) { - brw->urb.nr_vs_entries = limits[VS].min_nr_entries; - brw->urb.nr_gs_entries = limits[GS].min_nr_entries; - brw->urb.nr_clip_entries = limits[CLP].min_nr_entries; - brw->urb.nr_sf_entries = limits[SF].min_nr_entries; - brw->urb.nr_cs_entries = limits[CS].min_nr_entries; - - /* Mark us as operating with constrained nr_entries, so that next - * time we recalculate we'll resize the fences in the hope of - * escaping constrained mode and getting back to normal performance. - */ - brw->urb.constrained = 1; - - if (!check_urb_layout(brw)) { - /* This is impossible, given the maximal sizes of urb - * entries and the values for minimum nr of entries - * provided above. - */ - fprintf(stderr, "couldn't calculate URB layout!\n"); - exit(1); - } - - if (INTEL_DEBUG(DEBUG_URB|DEBUG_PERF)) - fprintf(stderr, "URB CONSTRAINED\n"); - } - -done: - if (INTEL_DEBUG(DEBUG_URB)) - fprintf(stderr, - "URB fence: %d ..VS.. %d ..GS.. %d ..CLP.. %d ..SF.. %d ..CS.. %d\n", - brw->urb.vs_start, - brw->urb.gs_start, - brw->urb.clip_start, - brw->urb.sf_start, - brw->urb.cs_start, - brw->urb.size); - - brw->ctx.NewDriverState |= BRW_NEW_URB_FENCE; - } -} - -static void recalculate_urb_fence( struct brw_context *brw ) -{ - brw_calculate_urb_fence(brw, brw->curbe.total_size, - brw_vue_prog_data(brw->vs.base.prog_data)->urb_entry_size, - brw->sf.prog_data->urb_entry_size); -} - - -const struct brw_tracked_state brw_recalculate_urb_fence = { - .dirty = { - .mesa = 0, - .brw = BRW_NEW_BLORP | - BRW_NEW_PUSH_CONSTANT_ALLOCATION | - BRW_NEW_SF_PROG_DATA | - BRW_NEW_VS_PROG_DATA, - }, - .emit = recalculate_urb_fence -}; - - - - - -void brw_upload_urb_fence(struct brw_context *brw) -{ - struct brw_urb_fence uf; - memset(&uf, 0, sizeof(uf)); - - uf.header.opcode = CMD_URB_FENCE; - uf.header.length = sizeof(uf)/4-2; - uf.header.vs_realloc = 1; - uf.header.gs_realloc = 1; - uf.header.clp_realloc = 1; - uf.header.sf_realloc = 1; - uf.header.vfe_realloc = 1; - uf.header.cs_realloc = 1; - - /* The ordering below is correct, not the layout in the - * instruction. - * - * There are 256/384 urb reg pairs in total. - */ - uf.bits0.vs_fence = brw->urb.gs_start; - uf.bits0.gs_fence = brw->urb.clip_start; - uf.bits0.clp_fence = brw->urb.sf_start; - uf.bits1.sf_fence = brw->urb.cs_start; - uf.bits1.cs_fence = brw->urb.size; - - /* erratum: URB_FENCE must not cross a 64byte cacheline */ - if ((USED_BATCH(brw->batch) & 15) > 12) { - int pad = 16 - (USED_BATCH(brw->batch) & 15); - do - *brw->batch.map_next++ = MI_NOOP; - while (--pad); - } - - brw_batch_data(brw, &uf, sizeof(uf)); -} diff --git a/src/mesa/drivers/dri/i965/brw_util.c b/src/mesa/drivers/dri/i965/brw_util.c deleted file mode 100644 index 90aff43..0000000 --- a/src/mesa/drivers/dri/i965/brw_util.c +++ /dev/null @@ -1,125 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - - -#include "brw_util.h" -#include "brw_defines.h" -#include "compiler/brw_eu_defines.h" - -GLuint brw_translate_blend_equation( GLenum mode ) -{ - switch (mode) { - case GL_FUNC_ADD: - return BRW_BLENDFUNCTION_ADD; - case GL_MIN: - return BRW_BLENDFUNCTION_MIN; - case GL_MAX: - return BRW_BLENDFUNCTION_MAX; - case GL_FUNC_SUBTRACT: - return BRW_BLENDFUNCTION_SUBTRACT; - case GL_FUNC_REVERSE_SUBTRACT: - return BRW_BLENDFUNCTION_REVERSE_SUBTRACT; - default: - unreachable("not reached"); - } -} - -GLuint brw_translate_blend_factor( GLenum factor ) -{ - switch(factor) { - case GL_ZERO: - return BRW_BLENDFACTOR_ZERO; - case GL_SRC_ALPHA: - return BRW_BLENDFACTOR_SRC_ALPHA; - case GL_ONE: - return BRW_BLENDFACTOR_ONE; - case GL_SRC_COLOR: - return BRW_BLENDFACTOR_SRC_COLOR; - case GL_ONE_MINUS_SRC_COLOR: - return BRW_BLENDFACTOR_INV_SRC_COLOR; - case GL_DST_COLOR: - return BRW_BLENDFACTOR_DST_COLOR; - case GL_ONE_MINUS_DST_COLOR: - return BRW_BLENDFACTOR_INV_DST_COLOR; - case GL_ONE_MINUS_SRC_ALPHA: - return BRW_BLENDFACTOR_INV_SRC_ALPHA; - case GL_DST_ALPHA: - return BRW_BLENDFACTOR_DST_ALPHA; - case GL_ONE_MINUS_DST_ALPHA: - return BRW_BLENDFACTOR_INV_DST_ALPHA; - case GL_SRC_ALPHA_SATURATE: - return BRW_BLENDFACTOR_SRC_ALPHA_SATURATE; - case GL_CONSTANT_COLOR: - return BRW_BLENDFACTOR_CONST_COLOR; - case GL_ONE_MINUS_CONSTANT_COLOR: - return BRW_BLENDFACTOR_INV_CONST_COLOR; - case GL_CONSTANT_ALPHA: - return BRW_BLENDFACTOR_CONST_ALPHA; - case GL_ONE_MINUS_CONSTANT_ALPHA: - return BRW_BLENDFACTOR_INV_CONST_ALPHA; - - case GL_SRC1_COLOR: - return BRW_BLENDFACTOR_SRC1_COLOR; - case GL_SRC1_ALPHA: - return BRW_BLENDFACTOR_SRC1_ALPHA; - case GL_ONE_MINUS_SRC1_COLOR: - return BRW_BLENDFACTOR_INV_SRC1_COLOR; - case GL_ONE_MINUS_SRC1_ALPHA: - return BRW_BLENDFACTOR_INV_SRC1_ALPHA; - - default: - unreachable("not reached"); - } -} - -static const GLuint prim_to_hw_prim[GL_TRIANGLE_STRIP_ADJACENCY+1] = { - [GL_POINTS] =_3DPRIM_POINTLIST, - [GL_LINES] = _3DPRIM_LINELIST, - [GL_LINE_LOOP] = _3DPRIM_LINELOOP, - [GL_LINE_STRIP] = _3DPRIM_LINESTRIP, - [GL_TRIANGLES] = _3DPRIM_TRILIST, - [GL_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP, - [GL_TRIANGLE_FAN] = _3DPRIM_TRIFAN, - [GL_QUADS] = _3DPRIM_QUADLIST, - [GL_QUAD_STRIP] = _3DPRIM_QUADSTRIP, - [GL_POLYGON] = _3DPRIM_POLYGON, - [GL_LINES_ADJACENCY] = _3DPRIM_LINELIST_ADJ, - [GL_LINE_STRIP_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ, - [GL_TRIANGLES_ADJACENCY] = _3DPRIM_TRILIST_ADJ, - [GL_TRIANGLE_STRIP_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ, -}; - -uint32_t -get_hw_prim_for_gl_prim(int mode) -{ - assert(mode < ARRAY_SIZE(prim_to_hw_prim)); - return prim_to_hw_prim[mode]; -} diff --git a/src/mesa/drivers/dri/i965/brw_util.h b/src/mesa/drivers/dri/i965/brw_util.h deleted file mode 100644 index 095c43a..0000000 --- a/src/mesa/drivers/dri/i965/brw_util.h +++ /dev/null @@ -1,74 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - - -#ifndef BRW_UTIL_H -#define BRW_UTIL_H - -#include "brw_context.h" -#include "main/framebuffer.h" - -extern GLuint brw_translate_blend_factor( GLenum factor ); -extern GLuint brw_translate_blend_equation( GLenum mode ); - -static inline float -brw_get_line_width(struct brw_context *brw) -{ - /* From the OpenGL 4.4 spec: - * - * "The actual width of non-antialiased lines is determined by rounding - * the supplied width to the nearest integer, then clamping it to the - * implementation-dependent maximum non-antialiased line width." - */ - float line_width = - CLAMP(!_mesa_is_multisample_enabled(&brw->ctx) && !brw->ctx.Line.SmoothFlag - ? roundf(brw->ctx.Line.Width) : brw->ctx.Line.Width, - 0.125f, brw->ctx.Const.MaxLineWidth); - - if (!_mesa_is_multisample_enabled(&brw->ctx) && brw->ctx.Line.SmoothFlag && line_width < 1.5f) { - /* For 1 pixel line thickness or less, the general - * anti-aliasing algorithm gives up, and a garbage line is - * generated. Setting a Line Width of 0.0 specifies the - * rasterization of the "thinnest" (one-pixel-wide), - * non-antialiased lines. - * - * Lines rendered with zero Line Width are rasterized using - * Grid Intersection Quantization rules as specified by - * bspec section 6.3.12.1 Zero-Width (Cosmetic) Line - * Rasterization. - */ - line_width = 0.0f; - } - - return line_width; -} - -#endif diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c deleted file mode 100644 index 1d22c0d..0000000 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ /dev/null @@ -1,369 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - - -#include "util/compiler.h" -#include "main/context.h" -#include "brw_context.h" -#include "brw_vs.h" -#include "brw_util.h" -#include "brw_state.h" -#include "program/prog_print.h" -#include "program/prog_parameter.h" -#include "compiler/brw_nir.h" -#include "brw_program.h" - -#include "util/ralloc.h" - -/** - * Decide which set of clip planes should be used when clipping via - * gl_Position or gl_ClipVertex. - */ -gl_clip_plane * -brw_select_clip_planes(struct gl_context *ctx) -{ - if (ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX]) { - /* There is currently a GLSL vertex shader, so clip according to GLSL - * rules, which means compare gl_ClipVertex (or gl_Position, if - * gl_ClipVertex wasn't assigned) against the eye-coordinate clip planes - * that were stored in EyeUserPlane at the time the clip planes were - * specified. - */ - return ctx->Transform.EyeUserPlane; - } else { - /* Either we are using fixed function or an ARB vertex program. In - * either case the clip planes are going to be compared against - * gl_Position (which is in clip coordinates) so we have to clip using - * _ClipUserPlane, which was transformed into clip coordinates by Mesa - * core. - */ - return ctx->Transform._ClipUserPlane; - } -} - -static GLbitfield64 -brw_vs_outputs_written(struct brw_context *brw, struct brw_vs_prog_key *key, - GLbitfield64 user_varyings) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - GLbitfield64 outputs_written = user_varyings; - - if (devinfo->ver < 6) { - /* Put dummy slots into the VUE for the SF to put the replaced - * point sprite coords in. We shouldn't need these dummy slots, - * which take up precious URB space, but it would mean that the SF - * doesn't get nice aligned pairs of input coords into output - * coords, which would be a pain to handle. - */ - for (unsigned i = 0; i < 8; i++) { - if (key->point_coord_replace & (1 << i)) - outputs_written |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + i); - } - - /* if back colors are written, allocate slots for front colors too */ - if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC0)) - outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL0); - if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC1)) - outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL1); - } - - /* In order for legacy clipping to work, we need to populate the clip - * distance varying slots whenever clipping is enabled, even if the vertex - * shader doesn't write to gl_ClipDistance. - */ - if (key->nr_userclip_plane_consts > 0) { - outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0); - outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1); - } - - return outputs_written; -} - -static bool -brw_codegen_vs_prog(struct brw_context *brw, - struct brw_program *vp, - struct brw_vs_prog_key *key) -{ - const struct brw_compiler *compiler = brw->screen->compiler; - const struct intel_device_info *devinfo = &brw->screen->devinfo; - const GLuint *program; - struct brw_vs_prog_data prog_data; - struct brw_stage_prog_data *stage_prog_data = &prog_data.base.base; - void *mem_ctx; - bool start_busy = false; - double start_time = 0; - - memset(&prog_data, 0, sizeof(prog_data)); - - /* Use ALT floating point mode for ARB programs so that 0^0 == 1. */ - if (vp->program.info.is_arb_asm) - stage_prog_data->use_alt_mode = true; - - mem_ctx = ralloc_context(NULL); - - nir_shader *nir = nir_shader_clone(mem_ctx, vp->program.nir); - - brw_assign_common_binding_table_offsets(devinfo, &vp->program, - &prog_data.base.base, 0); - - if (!vp->program.info.is_arb_asm) { - brw_nir_setup_glsl_uniforms(mem_ctx, nir, &vp->program, - &prog_data.base.base, - compiler->scalar_stage[MESA_SHADER_VERTEX]); - if (brw->can_push_ubos) { - brw_nir_analyze_ubo_ranges(compiler, nir, key, - prog_data.base.base.ubo_ranges); - } - } else { - brw_nir_setup_arb_uniforms(mem_ctx, nir, &vp->program, - &prog_data.base.base); - } - - if (key->nr_userclip_plane_consts > 0) { - brw_nir_lower_legacy_clipping(nir, key->nr_userclip_plane_consts, - &prog_data.base.base); - } - - if (key->copy_edgeflag) - nir_lower_passthrough_edgeflags(nir); - - uint64_t outputs_written = - brw_vs_outputs_written(brw, key, nir->info.outputs_written); - - brw_compute_vue_map(devinfo, - &prog_data.base.vue_map, outputs_written, - nir->info.separate_shader, 1); - - if (0) { - _mesa_fprint_program_opt(stderr, &vp->program, PROG_PRINT_DEBUG, true); - } - - if (unlikely(brw->perf_debug)) { - start_busy = (brw->batch.last_bo && - brw_bo_busy(brw->batch.last_bo)); - start_time = get_time(); - } - - if (INTEL_DEBUG(DEBUG_VS)) { - if (vp->program.info.is_arb_asm) - brw_dump_arb_asm("vertex", &vp->program); - } - - - /* Emit GFX4 code. - */ - struct brw_compile_vs_params params = { - .nir = nir, - .key = key, - .prog_data = &prog_data, - .log_data = brw, - }; - - if (INTEL_DEBUG(DEBUG_SHADER_TIME)) { - params.shader_time = true; - params.shader_time_index = - brw_get_shader_time_index(brw, &vp->program, ST_VS, - !vp->program.info.is_arb_asm); - } - - program = brw_compile_vs(compiler, mem_ctx, ¶ms); - if (program == NULL) { - if (!vp->program.info.is_arb_asm) { - vp->program.sh.data->LinkStatus = LINKING_FAILURE; - ralloc_strcat(&vp->program.sh.data->InfoLog, params.error_str); - } - - _mesa_problem(NULL, "Failed to compile vertex shader: %s\n", params.error_str); - - ralloc_free(mem_ctx); - return false; - } - - if (unlikely(brw->perf_debug)) { - if (vp->compiled_once) { - brw_debug_recompile(brw, MESA_SHADER_VERTEX, vp->program.Id, - &key->base); - } - if (start_busy && !brw_bo_busy(brw->batch.last_bo)) { - perf_debug("VS compile took %.03f ms and stalled the GPU\n", - (get_time() - start_time) * 1000); - } - vp->compiled_once = true; - } - - /* Scratch space is used for register spilling */ - brw_alloc_stage_scratch(brw, &brw->vs.base, - prog_data.base.base.total_scratch); - - /* The param and pull_param arrays will be freed by the shader cache. */ - ralloc_steal(NULL, prog_data.base.base.param); - ralloc_steal(NULL, prog_data.base.base.pull_param); - brw_upload_cache(&brw->cache, BRW_CACHE_VS_PROG, - key, sizeof(struct brw_vs_prog_key), - program, prog_data.base.base.program_size, - &prog_data, sizeof(prog_data), - &brw->vs.base.prog_offset, &brw->vs.base.prog_data); - ralloc_free(mem_ctx); - - return true; -} - -static bool -brw_vs_state_dirty(const struct brw_context *brw) -{ - return brw_state_dirty(brw, - _NEW_BUFFERS | - _NEW_LIGHT | - _NEW_POINT | - _NEW_POLYGON | - _NEW_TEXTURE | - _NEW_TRANSFORM, - BRW_NEW_VERTEX_PROGRAM | - BRW_NEW_VS_ATTRIB_WORKAROUNDS); -} - -void -brw_vs_populate_key(struct brw_context *brw, - struct brw_vs_prog_key *key) -{ - struct gl_context *ctx = &brw->ctx; - /* BRW_NEW_VERTEX_PROGRAM */ - struct gl_program *prog = brw->programs[MESA_SHADER_VERTEX]; - struct brw_program *vp = (struct brw_program *) prog; - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - memset(key, 0, sizeof(*key)); - - /* Just upload the program verbatim for now. Always send it all - * the inputs it asks for, whether they are varying or not. - */ - - /* _NEW_TEXTURE */ - brw_populate_base_prog_key(ctx, vp, &key->base); - - if (ctx->Transform.ClipPlanesEnabled != 0 && - (ctx->API == API_OPENGL_COMPAT || ctx->API == API_OPENGLES) && - vp->program.info.clip_distance_array_size == 0) { - key->nr_userclip_plane_consts = - util_logbase2(ctx->Transform.ClipPlanesEnabled) + 1; - } - - if (devinfo->ver < 6) { - /* _NEW_POLYGON */ - key->copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL || - ctx->Polygon.BackMode != GL_FILL); - - /* _NEW_POINT */ - if (ctx->Point.PointSprite) { - key->point_coord_replace = ctx->Point.CoordReplace & 0xff; - } - } - - if (prog->info.outputs_written & - (VARYING_BIT_COL0 | VARYING_BIT_COL1 | VARYING_BIT_BFC0 | - VARYING_BIT_BFC1)) { - /* _NEW_LIGHT | _NEW_BUFFERS */ - key->clamp_vertex_color = ctx->Light._ClampVertexColor; - } - - /* BRW_NEW_VS_ATTRIB_WORKAROUNDS */ - if (devinfo->verx10 <= 70) { - memcpy(key->gl_attrib_wa_flags, brw->vb.attrib_wa_flags, - sizeof(brw->vb.attrib_wa_flags)); - } -} - -void -brw_upload_vs_prog(struct brw_context *brw) -{ - struct brw_vs_prog_key key; - /* BRW_NEW_VERTEX_PROGRAM */ - struct brw_program *vp = - (struct brw_program *) brw->programs[MESA_SHADER_VERTEX]; - - if (!brw_vs_state_dirty(brw)) - return; - - brw_vs_populate_key(brw, &key); - - if (brw_search_cache(&brw->cache, BRW_CACHE_VS_PROG, &key, sizeof(key), - &brw->vs.base.prog_offset, &brw->vs.base.prog_data, - true)) - return; - - if (brw_disk_cache_upload_program(brw, MESA_SHADER_VERTEX)) - return; - - vp = (struct brw_program *) brw->programs[MESA_SHADER_VERTEX]; - vp->id = key.base.program_string_id; - - ASSERTED bool success = brw_codegen_vs_prog(brw, vp, &key); - assert(success); -} - -void -brw_vs_populate_default_key(const struct brw_compiler *compiler, - struct brw_vs_prog_key *key, - struct gl_program *prog) -{ - const struct intel_device_info *devinfo = compiler->devinfo; - struct brw_program *bvp = brw_program(prog); - - memset(key, 0, sizeof(*key)); - - brw_populate_default_base_prog_key(devinfo, bvp, &key->base); - - key->clamp_vertex_color = - (prog->info.outputs_written & - (VARYING_BIT_COL0 | VARYING_BIT_COL1 | VARYING_BIT_BFC0 | - VARYING_BIT_BFC1)); -} - -bool -brw_vs_precompile(struct gl_context *ctx, struct gl_program *prog) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_vs_prog_key key; - uint32_t old_prog_offset = brw->vs.base.prog_offset; - struct brw_stage_prog_data *old_prog_data = brw->vs.base.prog_data; - bool success; - - struct brw_program *bvp = brw_program(prog); - - brw_vs_populate_default_key(brw->screen->compiler, &key, prog); - - success = brw_codegen_vs_prog(brw, bvp, &key); - - brw->vs.base.prog_offset = old_prog_offset; - brw->vs.base.prog_data = old_prog_data; - - return success; -} diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h deleted file mode 100644 index 207853c..0000000 --- a/src/mesa/drivers/dri/i965/brw_vs.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - - -#ifndef BRW_VS_H -#define BRW_VS_H - -#include "brw_context.h" - -#ifdef __cplusplus -extern "C" { -#endif - -void -brw_upload_vs_prog(struct brw_context *brw); - -void -brw_vs_populate_key(struct brw_context *brw, - struct brw_vs_prog_key *key); -void -brw_vs_populate_default_key(const struct brw_compiler *compiler, - struct brw_vs_prog_key *key, - struct gl_program *prog); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c deleted file mode 100644 index 6fcf9c5..0000000 --- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c +++ /dev/null @@ -1,119 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - -#include "main/mtypes.h" -#include "program/prog_parameter.h" -#include "main/shaderapi.h" - -#include "brw_context.h" -#include "brw_state.h" -#include "brw_buffer_objects.h" - - -/* Creates a new VS constant buffer reflecting the current VS program's - * constants, if needed by the VS program. - * - * Otherwise, constants go through the CURBEs using the brw_constant_buffer - * state atom. - */ -static void -brw_upload_vs_pull_constants(struct brw_context *brw) -{ - struct brw_stage_state *stage_state = &brw->vs.base; - - /* BRW_NEW_VERTEX_PROGRAM */ - struct brw_program *vp = - (struct brw_program *) brw->programs[MESA_SHADER_VERTEX]; - - /* BRW_NEW_VS_PROG_DATA */ - const struct brw_stage_prog_data *prog_data = brw->vs.base.prog_data; - - _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_VERTEX); - /* _NEW_PROGRAM_CONSTANTS */ - brw_upload_pull_constants(brw, BRW_NEW_VS_CONSTBUF, &vp->program, - stage_state, prog_data); -} - -const struct brw_tracked_state brw_vs_pull_constants = { - .dirty = { - .mesa = _NEW_PROGRAM_CONSTANTS, - .brw = BRW_NEW_BATCH | - BRW_NEW_VERTEX_PROGRAM | - BRW_NEW_VS_PROG_DATA, - }, - .emit = brw_upload_vs_pull_constants, -}; - -static void -brw_upload_vs_ubo_surfaces(struct brw_context *brw) -{ - struct gl_context *ctx = &brw->ctx; - /* _NEW_PROGRAM */ - struct gl_program *prog = ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX]; - - /* BRW_NEW_VS_PROG_DATA */ - brw_upload_ubo_surfaces(brw, prog, &brw->vs.base, brw->vs.base.prog_data); -} - -const struct brw_tracked_state brw_vs_ubo_surfaces = { - .dirty = { - .mesa = _NEW_PROGRAM, - .brw = BRW_NEW_BATCH | - BRW_NEW_UNIFORM_BUFFER | - BRW_NEW_VS_PROG_DATA, - }, - .emit = brw_upload_vs_ubo_surfaces, -}; - -static void -brw_upload_vs_image_surfaces(struct brw_context *brw) -{ - /* BRW_NEW_VERTEX_PROGRAM */ - const struct gl_program *vp = brw->programs[MESA_SHADER_VERTEX]; - - if (vp) { - /* BRW_NEW_VS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */ - brw_upload_image_surfaces(brw, vp, &brw->vs.base, - brw->vs.base.prog_data); - } -} - -const struct brw_tracked_state brw_vs_image_surfaces = { - .dirty = { - .mesa = _NEW_TEXTURE, - .brw = BRW_NEW_BATCH | - BRW_NEW_AUX_STATE | - BRW_NEW_IMAGE_UNITS | - BRW_NEW_VERTEX_PROGRAM | - BRW_NEW_VS_PROG_DATA, - }, - .emit = brw_upload_vs_image_surfaces, -}; diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c deleted file mode 100644 index 5b43093..0000000 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ /dev/null @@ -1,639 +0,0 @@ -/* - * Copyright (C) Intel Corp. 2006. All Rights Reserved. - * Intel funded Tungsten Graphics to - * develop this 3D driver. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ -#include "brw_context.h" -#include "brw_wm.h" -#include "brw_state.h" -#include "main/enums.h" -#include "main/formats.h" -#include "main/fbobject.h" -#include "main/samplerobj.h" -#include "main/framebuffer.h" -#include "program/prog_parameter.h" -#include "program/program.h" -#include "brw_mipmap_tree.h" -#include "brw_image.h" -#include "brw_fbo.h" -#include "compiler/brw_nir.h" -#include "brw_program.h" - -#include "util/ralloc.h" -#include "util/u_math.h" - -static void -assign_fs_binding_table_offsets(const struct intel_device_info *devinfo, - const struct gl_program *prog, - const struct brw_wm_prog_key *key, - struct brw_wm_prog_data *prog_data) -{ - /* Render targets implicitly start at surface index 0. Even if there are - * no color regions, we still perform an FB write to a null render target, - * which will be surface 0. - */ - uint32_t next_binding_table_offset = MAX2(key->nr_color_regions, 1); - - next_binding_table_offset = - brw_assign_common_binding_table_offsets(devinfo, prog, &prog_data->base, - next_binding_table_offset); - - if (prog->nir->info.outputs_read && !key->coherent_fb_fetch) { - prog_data->binding_table.render_target_read_start = - next_binding_table_offset; - next_binding_table_offset += key->nr_color_regions; - } - - /* Update the binding table size */ - prog_data->base.binding_table.size_bytes = next_binding_table_offset * 4; -} - -static bool -brw_codegen_wm_prog(struct brw_context *brw, - struct brw_program *fp, - struct brw_wm_prog_key *key, - struct brw_vue_map *vue_map) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - void *mem_ctx = ralloc_context(NULL); - struct brw_wm_prog_data prog_data; - const GLuint *program; - bool start_busy = false; - double start_time = 0; - - nir_shader *nir = nir_shader_clone(mem_ctx, fp->program.nir); - - memset(&prog_data, 0, sizeof(prog_data)); - - /* Use ALT floating point mode for ARB programs so that 0^0 == 1. */ - if (fp->program.info.is_arb_asm) - prog_data.base.use_alt_mode = true; - - assign_fs_binding_table_offsets(devinfo, &fp->program, key, &prog_data); - - if (!fp->program.info.is_arb_asm) { - brw_nir_setup_glsl_uniforms(mem_ctx, nir, &fp->program, - &prog_data.base, true); - if (brw->can_push_ubos) { - brw_nir_analyze_ubo_ranges(brw->screen->compiler, nir, - NULL, prog_data.base.ubo_ranges); - } - } else { - brw_nir_setup_arb_uniforms(mem_ctx, nir, &fp->program, &prog_data.base); - - if (INTEL_DEBUG(DEBUG_WM)) - brw_dump_arb_asm("fragment", &fp->program); - } - - if (unlikely(brw->perf_debug)) { - start_busy = (brw->batch.last_bo && - brw_bo_busy(brw->batch.last_bo)); - start_time = get_time(); - } - - struct brw_compile_fs_params params = { - .nir = nir, - .key = key, - .prog_data = &prog_data, - - .allow_spilling = true, - .vue_map = vue_map, - - .log_data = brw, - }; - - if (INTEL_DEBUG(DEBUG_SHADER_TIME)) { - params.shader_time = true; - params.shader_time_index8 = - brw_get_shader_time_index(brw, &fp->program, ST_FS8, - !fp->program.info.is_arb_asm); - params.shader_time_index16 = - brw_get_shader_time_index(brw, &fp->program, ST_FS16, - !fp->program.info.is_arb_asm); - params.shader_time_index32 = - brw_get_shader_time_index(brw, &fp->program, ST_FS32, - !fp->program.info.is_arb_asm); - } - - program = brw_compile_fs(brw->screen->compiler, mem_ctx, ¶ms); - - if (program == NULL) { - if (!fp->program.info.is_arb_asm) { - fp->program.sh.data->LinkStatus = LINKING_FAILURE; - ralloc_strcat(&fp->program.sh.data->InfoLog, params.error_str); - } - - _mesa_problem(NULL, "Failed to compile fragment shader: %s\n", params.error_str); - - ralloc_free(mem_ctx); - return false; - } - - if (unlikely(brw->perf_debug)) { - if (fp->compiled_once) { - brw_debug_recompile(brw, MESA_SHADER_FRAGMENT, fp->program.Id, - &key->base); - } - fp->compiled_once = true; - - if (start_busy && !brw_bo_busy(brw->batch.last_bo)) { - perf_debug("FS compile took %.03f ms and stalled the GPU\n", - (get_time() - start_time) * 1000); - } - } - - brw_alloc_stage_scratch(brw, &brw->wm.base, prog_data.base.total_scratch); - - if (INTEL_DEBUG(DEBUG_WM) && fp->program.info.is_arb_asm) - fprintf(stderr, "\n"); - - /* The param and pull_param arrays will be freed by the shader cache. */ - ralloc_steal(NULL, prog_data.base.param); - ralloc_steal(NULL, prog_data.base.pull_param); - brw_upload_cache(&brw->cache, BRW_CACHE_FS_PROG, - key, sizeof(struct brw_wm_prog_key), - program, prog_data.base.program_size, - &prog_data, sizeof(prog_data), - &brw->wm.base.prog_offset, &brw->wm.base.prog_data); - - ralloc_free(mem_ctx); - - return true; -} - -static uint8_t -gfx6_gather_workaround(GLenum internalformat) -{ - switch (internalformat) { - case GL_R8I: return WA_SIGN | WA_8BIT; - case GL_R8UI: return WA_8BIT; - case GL_R16I: return WA_SIGN | WA_16BIT; - case GL_R16UI: return WA_16BIT; - default: - /* Note that even though GL_R32I and GL_R32UI have format overrides in - * the surface state, there is no shader w/a required. - */ - return 0; - } -} - -static void -brw_populate_sampler_prog_key_data(struct gl_context *ctx, - const struct gl_program *prog, - struct brw_sampler_prog_key_data *key) -{ - struct brw_context *brw = brw_context(ctx); - const struct intel_device_info *devinfo = &brw->screen->devinfo; - GLbitfield mask = prog->SamplersUsed; - - while (mask) { - const int s = u_bit_scan(&mask); - - key->swizzles[s] = SWIZZLE_NOOP; - key->scale_factors[s] = 0.0f; - - int unit_id = prog->SamplerUnits[s]; - const struct gl_texture_unit *unit = &ctx->Texture.Unit[unit_id]; - - if (unit->_Current && unit->_Current->Target != GL_TEXTURE_BUFFER) { - const struct gl_texture_object *t = unit->_Current; - const struct gl_texture_image *img = t->Image[0][t->Attrib.BaseLevel]; - struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit_id); - - const bool alpha_depth = t->Attrib.DepthMode == GL_ALPHA && - (img->_BaseFormat == GL_DEPTH_COMPONENT || - img->_BaseFormat == GL_DEPTH_STENCIL); - - /* Haswell handles texture swizzling as surface format overrides - * (except for GL_ALPHA); all other platforms need MOVs in the shader. - */ - if (alpha_depth || (devinfo->verx10 <= 70)) - key->swizzles[s] = brw_get_texture_swizzle(ctx, t); - - if (devinfo->ver < 8 && - sampler->Attrib.MinFilter != GL_NEAREST && - sampler->Attrib.MagFilter != GL_NEAREST) { - if (sampler->Attrib.WrapS == GL_CLAMP) - key->gl_clamp_mask[0] |= 1 << s; - if (sampler->Attrib.WrapT == GL_CLAMP) - key->gl_clamp_mask[1] |= 1 << s; - if (sampler->Attrib.WrapR == GL_CLAMP) - key->gl_clamp_mask[2] |= 1 << s; - } - - /* gather4 for RG32* is broken in multiple ways on Gfx7. */ - if (devinfo->ver == 7 && prog->info.uses_texture_gather) { - switch (img->InternalFormat) { - case GL_RG32I: - case GL_RG32UI: { - /* We have to override the format to R32G32_FLOAT_LD. - * This means that SCS_ALPHA and SCS_ONE will return 0x3f8 - * (1.0) rather than integer 1. This needs shader hacks. - * - * On Ivybridge, we whack W (alpha) to ONE in our key's - * swizzle. On Haswell, we look at the original texture - * swizzle, and use XYZW with channels overridden to ONE, - * leaving normal texture swizzling to SCS. - */ - unsigned src_swizzle = - devinfo->platform == INTEL_PLATFORM_HSW ? - t->Attrib._Swizzle : key->swizzles[s]; - for (int i = 0; i < 4; i++) { - unsigned src_comp = GET_SWZ(src_swizzle, i); - if (src_comp == SWIZZLE_ONE || src_comp == SWIZZLE_W) { - key->swizzles[i] &= ~(0x7 << (3 * i)); - key->swizzles[i] |= SWIZZLE_ONE << (3 * i); - } - } - } - FALLTHROUGH; - case GL_RG32F: - /* The channel select for green doesn't work - we have to - * request blue. Haswell can use SCS for this, but Ivybridge - * needs a shader workaround. - */ - if (devinfo->platform != INTEL_PLATFORM_HSW) - key->gather_channel_quirk_mask |= 1 << s; - break; - } - } - - /* Gfx6's gather4 is broken for UINT/SINT; we treat them as - * UNORM/FLOAT instead and fix it in the shader. - */ - if (devinfo->ver == 6 && prog->info.uses_texture_gather) { - key->gfx6_gather_wa[s] = gfx6_gather_workaround(img->InternalFormat); - } - - /* If this is a multisample sampler, and uses the CMS MSAA layout, - * then we need to emit slightly different code to first sample the - * MCS surface. - */ - struct brw_texture_object *intel_tex = - brw_texture_object((struct gl_texture_object *)t); - - /* From gfx9 onwards some single sampled buffers can also be - * compressed. These don't need ld2dms sampling along with mcs fetch. - */ - if (intel_tex->mt->aux_usage == ISL_AUX_USAGE_MCS) { - assert(devinfo->ver >= 7); - assert(intel_tex->mt->surf.samples > 1); - assert(intel_tex->mt->aux_buf); - assert(intel_tex->mt->surf.msaa_layout == ISL_MSAA_LAYOUT_ARRAY); - key->compressed_multisample_layout_mask |= 1 << s; - - if (intel_tex->mt->surf.samples >= 16) { - assert(devinfo->ver >= 9); - key->msaa_16 |= 1 << s; - } - } - - if (t->Target == GL_TEXTURE_EXTERNAL_OES && intel_tex->planar_format) { - - /* Setup possible scaling factor. */ - key->scale_factors[s] = intel_tex->planar_format->scaling_factor; - - switch (intel_tex->planar_format->components) { - case __DRI_IMAGE_COMPONENTS_Y_UV: - key->y_uv_image_mask |= 1 << s; - break; - case __DRI_IMAGE_COMPONENTS_Y_U_V: - key->y_u_v_image_mask |= 1 << s; - break; - case __DRI_IMAGE_COMPONENTS_Y_XUXV: - key->yx_xuxv_image_mask |= 1 << s; - break; - case __DRI_IMAGE_COMPONENTS_Y_UXVX: - key->xy_uxvx_image_mask |= 1 << s; - break; - case __DRI_IMAGE_COMPONENTS_AYUV: - key->ayuv_image_mask |= 1 << s; - break; - case __DRI_IMAGE_COMPONENTS_XYUV: - key->xyuv_image_mask |= 1 << s; - break; - default: - break; - } - - switch (intel_tex->yuv_color_space) { - case __DRI_YUV_COLOR_SPACE_ITU_REC709: - key->bt709_mask |= 1 << s; - break; - case __DRI_YUV_COLOR_SPACE_ITU_REC2020: - key->bt2020_mask |= 1 << s; - break; - default: - break; - } - } - - } - } -} - -void -brw_populate_base_prog_key(struct gl_context *ctx, - const struct brw_program *prog, - struct brw_base_prog_key *key) -{ - key->program_string_id = prog->id; - key->subgroup_size_type = BRW_SUBGROUP_SIZE_UNIFORM; - brw_populate_sampler_prog_key_data(ctx, &prog->program, &key->tex); -} - -void -brw_populate_default_base_prog_key(const struct intel_device_info *devinfo, - const struct brw_program *prog, - struct brw_base_prog_key *key) -{ - key->program_string_id = prog->id; - key->subgroup_size_type = BRW_SUBGROUP_SIZE_UNIFORM; - brw_setup_tex_for_precompile(devinfo, &key->tex, &prog->program); -} - -static bool -brw_wm_state_dirty(const struct brw_context *brw) -{ - return brw_state_dirty(brw, - _NEW_BUFFERS | - _NEW_COLOR | - _NEW_DEPTH | - _NEW_FRAG_CLAMP | - _NEW_HINT | - _NEW_LIGHT | - _NEW_LINE | - _NEW_MULTISAMPLE | - _NEW_POLYGON | - _NEW_STENCIL | - _NEW_TEXTURE, - BRW_NEW_FRAGMENT_PROGRAM | - BRW_NEW_REDUCED_PRIMITIVE | - BRW_NEW_STATS_WM | - BRW_NEW_VUE_MAP_GEOM_OUT); -} - -void -brw_wm_populate_key(struct brw_context *brw, struct brw_wm_prog_key *key) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - struct gl_context *ctx = &brw->ctx; - /* BRW_NEW_FRAGMENT_PROGRAM */ - const struct gl_program *prog = brw->programs[MESA_SHADER_FRAGMENT]; - const struct brw_program *fp = brw_program_const(prog); - GLuint lookup = 0; - GLuint line_aa; - - memset(key, 0, sizeof(*key)); - - /* Build the index for table lookup - */ - if (devinfo->ver < 6) { - struct brw_renderbuffer *depth_irb = - brw_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH); - - /* _NEW_COLOR */ - if (prog->info.fs.uses_discard || ctx->Color.AlphaEnabled) { - lookup |= BRW_WM_IZ_PS_KILL_ALPHATEST_BIT; - } - - if (prog->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) { - lookup |= BRW_WM_IZ_PS_COMPUTES_DEPTH_BIT; - } - - /* _NEW_DEPTH */ - if (depth_irb && ctx->Depth.Test) { - lookup |= BRW_WM_IZ_DEPTH_TEST_ENABLE_BIT; - - if (brw_depth_writes_enabled(brw)) - lookup |= BRW_WM_IZ_DEPTH_WRITE_ENABLE_BIT; - } - - /* _NEW_STENCIL | _NEW_BUFFERS */ - if (brw->stencil_enabled) { - lookup |= BRW_WM_IZ_STENCIL_TEST_ENABLE_BIT; - - if (ctx->Stencil.WriteMask[0] || - ctx->Stencil.WriteMask[ctx->Stencil._BackFace]) - lookup |= BRW_WM_IZ_STENCIL_WRITE_ENABLE_BIT; - } - key->iz_lookup = lookup; - } - - line_aa = BRW_WM_AA_NEVER; - - /* _NEW_LINE, _NEW_POLYGON, BRW_NEW_REDUCED_PRIMITIVE */ - if (ctx->Line.SmoothFlag) { - if (brw->reduced_primitive == GL_LINES) { - line_aa = BRW_WM_AA_ALWAYS; - } - else if (brw->reduced_primitive == GL_TRIANGLES) { - if (ctx->Polygon.FrontMode == GL_LINE) { - line_aa = BRW_WM_AA_SOMETIMES; - - if (ctx->Polygon.BackMode == GL_LINE || - (ctx->Polygon.CullFlag && - ctx->Polygon.CullFaceMode == GL_BACK)) - line_aa = BRW_WM_AA_ALWAYS; - } - else if (ctx->Polygon.BackMode == GL_LINE) { - line_aa = BRW_WM_AA_SOMETIMES; - - if ((ctx->Polygon.CullFlag && - ctx->Polygon.CullFaceMode == GL_FRONT)) - line_aa = BRW_WM_AA_ALWAYS; - } - } - } - - key->line_aa = line_aa; - - /* _NEW_HINT */ - key->high_quality_derivatives = - prog->info.uses_fddx_fddy && - ctx->Hint.FragmentShaderDerivative == GL_NICEST; - - if (devinfo->ver < 6) - key->stats_wm = brw->stats_wm; - - /* _NEW_LIGHT */ - key->flat_shade = - (prog->info.inputs_read & (VARYING_BIT_COL0 | VARYING_BIT_COL1)) && - (ctx->Light.ShadeModel == GL_FLAT); - - /* _NEW_FRAG_CLAMP | _NEW_BUFFERS */ - key->clamp_fragment_color = ctx->Color._ClampFragmentColor; - - /* _NEW_TEXTURE */ - brw_populate_base_prog_key(ctx, fp, &key->base); - - /* _NEW_BUFFERS */ - key->nr_color_regions = ctx->DrawBuffer->_NumColorDrawBuffers; - - /* _NEW_COLOR */ - key->force_dual_color_blend = brw->dual_color_blend_by_location && - (ctx->Color.BlendEnabled & 1) && ctx->Color._BlendUsesDualSrc & 0x1; - - /* _NEW_MULTISAMPLE, _NEW_BUFFERS */ - key->alpha_to_coverage = _mesa_is_alpha_to_coverage_enabled(ctx); - - /* _NEW_COLOR, _NEW_BUFFERS */ - key->alpha_test_replicate_alpha = - ctx->DrawBuffer->_NumColorDrawBuffers > 1 && - _mesa_is_alpha_test_enabled(ctx); - - /* _NEW_BUFFERS _NEW_MULTISAMPLE */ - /* Ignore sample qualifier while computing this flag. */ - if (ctx->Multisample.Enabled) { - key->persample_interp = - ctx->Multisample.SampleShading && - (ctx->Multisample.MinSampleShadingValue * - _mesa_geometric_samples(ctx->DrawBuffer) > 1); - - key->multisample_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1; - } - - key->ignore_sample_mask_out = !key->multisample_fbo; - - /* BRW_NEW_VUE_MAP_GEOM_OUT */ - if (devinfo->ver < 6 || util_bitcount64(prog->info.inputs_read & - BRW_FS_VARYING_INPUT_MASK) > 16) { - key->input_slots_valid = brw->vue_map_geom_out.slots_valid; - } - - /* _NEW_COLOR | _NEW_BUFFERS */ - /* Pre-gfx6, the hardware alpha test always used each render - * target's alpha to do alpha test, as opposed to render target 0's alpha - * like GL requires. Fix that by building the alpha test into the - * shader, and we'll skip enabling the fixed function alpha test. - */ - if (devinfo->ver < 6 && ctx->DrawBuffer->_NumColorDrawBuffers > 1 && - ctx->Color.AlphaEnabled) { - key->alpha_test_func = ctx->Color.AlphaFunc; - key->alpha_test_ref = ctx->Color.AlphaRef; - } - - /* Whether reads from the framebuffer should behave coherently. */ - key->coherent_fb_fetch = ctx->Extensions.EXT_shader_framebuffer_fetch; -} - -void -brw_upload_wm_prog(struct brw_context *brw) -{ - struct brw_wm_prog_key key; - struct brw_program *fp = - (struct brw_program *) brw->programs[MESA_SHADER_FRAGMENT]; - - if (!brw_wm_state_dirty(brw)) - return; - - brw_wm_populate_key(brw, &key); - - if (brw_search_cache(&brw->cache, BRW_CACHE_FS_PROG, &key, sizeof(key), - &brw->wm.base.prog_offset, &brw->wm.base.prog_data, - true)) - return; - - if (brw_disk_cache_upload_program(brw, MESA_SHADER_FRAGMENT)) - return; - - fp = (struct brw_program *) brw->programs[MESA_SHADER_FRAGMENT]; - fp->id = key.base.program_string_id; - - ASSERTED bool success = brw_codegen_wm_prog(brw, fp, &key, - &brw->vue_map_geom_out); - assert(success); -} - -void -brw_wm_populate_default_key(const struct brw_compiler *compiler, - struct brw_wm_prog_key *key, - struct gl_program *prog) -{ - const struct intel_device_info *devinfo = compiler->devinfo; - - memset(key, 0, sizeof(*key)); - - brw_populate_default_base_prog_key(devinfo, brw_program(prog), - &key->base); - - uint64_t outputs_written = prog->info.outputs_written; - - if (devinfo->ver < 6) { - if (prog->info.fs.uses_discard) - key->iz_lookup |= BRW_WM_IZ_PS_KILL_ALPHATEST_BIT; - - if (outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) - key->iz_lookup |= BRW_WM_IZ_PS_COMPUTES_DEPTH_BIT; - - /* Just assume depth testing. */ - key->iz_lookup |= BRW_WM_IZ_DEPTH_TEST_ENABLE_BIT; - key->iz_lookup |= BRW_WM_IZ_DEPTH_WRITE_ENABLE_BIT; - } - - if (devinfo->ver < 6 || util_bitcount64(prog->info.inputs_read & - BRW_FS_VARYING_INPUT_MASK) > 16) { - key->input_slots_valid = prog->info.inputs_read | VARYING_BIT_POS; - } - - key->nr_color_regions = util_bitcount64(outputs_written & - ~(BITFIELD64_BIT(FRAG_RESULT_DEPTH) | - BITFIELD64_BIT(FRAG_RESULT_STENCIL) | - BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK))); - - /* Whether reads from the framebuffer should behave coherently. */ - key->coherent_fb_fetch = devinfo->ver >= 9; -} - -bool -brw_fs_precompile(struct gl_context *ctx, struct gl_program *prog) -{ - struct brw_context *brw = brw_context(ctx); - const struct intel_device_info *devinfo = &brw->screen->devinfo; - struct brw_wm_prog_key key; - - struct brw_program *bfp = brw_program(prog); - - brw_wm_populate_default_key(brw->screen->compiler, &key, prog); - - /* check brw_wm_populate_default_key coherent_fb_fetch setting */ - assert(key.coherent_fb_fetch == - ctx->Extensions.EXT_shader_framebuffer_fetch); - - uint32_t old_prog_offset = brw->wm.base.prog_offset; - struct brw_stage_prog_data *old_prog_data = brw->wm.base.prog_data; - - struct brw_vue_map vue_map; - if (devinfo->ver < 6) { - brw_compute_vue_map(&brw->screen->devinfo, &vue_map, - prog->info.inputs_read | VARYING_BIT_POS, - false, 1); - } - - bool success = brw_codegen_wm_prog(brw, bfp, &key, &vue_map); - - brw->wm.base.prog_offset = old_prog_offset; - brw->wm.base.prog_data = old_prog_data; - - return success; -} diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h deleted file mode 100644 index 86980c3..0000000 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - - -#ifndef BRW_WM_H -#define BRW_WM_H - -#include - -#include "brw_context.h" - -#ifdef __cplusplus -extern "C" { -#endif - -void -brw_upload_wm_prog(struct brw_context *brw); - -void -brw_wm_populate_key(struct brw_context *brw, - struct brw_wm_prog_key *key); -void -brw_wm_populate_default_key(const struct brw_compiler *compiler, - struct brw_wm_prog_key *key, - struct gl_program *prog); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c deleted file mode 100644 index 08e9009..0000000 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ /dev/null @@ -1,1692 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - - -#include "compiler/nir/nir.h" -#include "main/context.h" -#include "main/blend.h" -#include "main/mtypes.h" -#include "main/samplerobj.h" -#include "main/shaderimage.h" -#include "main/teximage.h" -#include "program/prog_parameter.h" -#include "program/prog_instruction.h" -#include "main/framebuffer.h" -#include "main/shaderapi.h" - -#include "isl/isl.h" - -#include "brw_mipmap_tree.h" -#include "brw_batch.h" -#include "brw_tex.h" -#include "brw_fbo.h" -#include "brw_buffer_objects.h" - -#include "brw_context.h" -#include "brw_state.h" -#include "brw_defines.h" -#include "brw_wm.h" - -static void -get_isl_surf(struct brw_context *brw, struct brw_mipmap_tree *mt, - GLenum target, struct isl_view *view, - uint32_t *tile_x, uint32_t *tile_y, - uint32_t *offset, struct isl_surf *surf) -{ - *surf = mt->surf; - - const struct intel_device_info *devinfo = &brw->screen->devinfo; - const enum isl_dim_layout dim_layout = - get_isl_dim_layout(devinfo, mt->surf.tiling, target); - - surf->dim = get_isl_surf_dim(target); - - if (surf->dim_layout == dim_layout) - return; - - /* The layout of the specified texture target is not compatible with the - * actual layout of the miptree structure in memory -- You're entering - * dangerous territory, this can only possibly work if you only intended - * to access a single level and slice of the texture, and the hardware - * supports the tile offset feature in order to allow non-tile-aligned - * base offsets, since we'll have to point the hardware to the first - * texel of the level instead of relying on the usual base level/layer - * controls. - */ - assert(devinfo->has_surface_tile_offset); - assert(view->levels == 1 && view->array_len == 1); - assert(*tile_x == 0 && *tile_y == 0); - - *offset += brw_miptree_get_tile_offsets(mt, view->base_level, - view->base_array_layer, - tile_x, tile_y); - - /* Minify the logical dimensions of the texture. */ - const unsigned l = view->base_level - mt->first_level; - surf->logical_level0_px.width = minify(surf->logical_level0_px.width, l); - surf->logical_level0_px.height = surf->dim <= ISL_SURF_DIM_1D ? 1 : - minify(surf->logical_level0_px.height, l); - surf->logical_level0_px.depth = surf->dim <= ISL_SURF_DIM_2D ? 1 : - minify(surf->logical_level0_px.depth, l); - - /* Only the base level and layer can be addressed with the overridden - * layout. - */ - surf->logical_level0_px.array_len = 1; - surf->levels = 1; - surf->dim_layout = dim_layout; - - /* The requested slice of the texture is now at the base level and - * layer. - */ - view->base_level = 0; - view->base_array_layer = 0; -} - -static void -brw_emit_surface_state(struct brw_context *brw, - struct brw_mipmap_tree *mt, - GLenum target, struct isl_view view, - enum isl_aux_usage aux_usage, - uint32_t *surf_offset, int surf_index, - unsigned reloc_flags) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - uint32_t tile_x = mt->level[0].level_x; - uint32_t tile_y = mt->level[0].level_y; - uint32_t offset = mt->offset; - - struct isl_surf surf; - - get_isl_surf(brw, mt, target, &view, &tile_x, &tile_y, &offset, &surf); - - union isl_color_value clear_color = { .u32 = { 0, 0, 0, 0 } }; - - struct brw_bo *aux_bo = NULL; - struct isl_surf *aux_surf = NULL; - uint64_t aux_offset = 0; - struct brw_bo *clear_bo = NULL; - uint64_t clear_offset = 0; - - if (aux_usage != ISL_AUX_USAGE_NONE) { - aux_surf = &mt->aux_buf->surf; - aux_bo = mt->aux_buf->bo; - aux_offset = mt->aux_buf->offset; - - /* We only really need a clear color if we also have an auxiliary - * surface. Without one, it does nothing. - */ - clear_color = brw_miptree_get_clear_color(mt, &clear_bo, &clear_offset); - } - - void *state = brw_state_batch(brw, - brw->isl_dev.ss.size, - brw->isl_dev.ss.align, - surf_offset); - - isl_surf_fill_state(&brw->isl_dev, state, .surf = &surf, .view = &view, - .address = brw_state_reloc(&brw->batch, - *surf_offset + brw->isl_dev.ss.addr_offset, - mt->bo, offset, reloc_flags), - .aux_surf = aux_surf, .aux_usage = aux_usage, - .aux_address = aux_offset, - .mocs = brw_mocs(&brw->isl_dev, mt->bo), - .clear_color = clear_color, - .use_clear_address = clear_bo != NULL, - .clear_address = clear_offset, - .x_offset_sa = tile_x, .y_offset_sa = tile_y); - if (aux_surf) { - /* On gfx7 and prior, the upper 20 bits of surface state DWORD 6 are the - * upper 20 bits of the GPU address of the MCS buffer; the lower 12 bits - * contain other control information. Since buffer addresses are always - * on 4k boundaries (and thus have their lower 12 bits zero), we can use - * an ordinary reloc to do the necessary address translation. - * - * FIXME: move to the point of assignment. - */ - assert((aux_offset & 0xfff) == 0); - - if (devinfo->ver >= 8) { - uint64_t *aux_addr = state + brw->isl_dev.ss.aux_addr_offset; - *aux_addr = brw_state_reloc(&brw->batch, - *surf_offset + - brw->isl_dev.ss.aux_addr_offset, - aux_bo, *aux_addr, - reloc_flags); - } else { - uint32_t *aux_addr = state + brw->isl_dev.ss.aux_addr_offset; - *aux_addr = brw_state_reloc(&brw->batch, - *surf_offset + - brw->isl_dev.ss.aux_addr_offset, - aux_bo, *aux_addr, - reloc_flags); - - } - } - - if (clear_bo != NULL) { - /* Make sure the offset is aligned with a cacheline. */ - assert((clear_offset & 0x3f) == 0); - uint64_t *clear_address = - state + brw->isl_dev.ss.clear_color_state_offset; - *clear_address = brw_state_reloc(&brw->batch, - *surf_offset + - brw->isl_dev.ss.clear_color_state_offset, - clear_bo, *clear_address, reloc_flags); - } -} - -static uint32_t -gfx6_update_renderbuffer_surface(struct brw_context *brw, - struct gl_renderbuffer *rb, - unsigned unit, - uint32_t surf_index) -{ - struct gl_context *ctx = &brw->ctx; - struct brw_renderbuffer *irb = brw_renderbuffer(rb); - struct brw_mipmap_tree *mt = irb->mt; - - assert(brw_render_target_supported(brw, rb)); - - mesa_format rb_format = _mesa_get_render_format(ctx, brw_rb_format(irb)); - if (unlikely(!brw->mesa_format_supports_render[rb_format])) { - _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n", - __func__, _mesa_get_format_name(rb_format)); - } - enum isl_format isl_format = brw->mesa_to_isl_render_format[rb_format]; - - struct isl_view view = { - .format = isl_format, - .base_level = irb->mt_level - irb->mt->first_level, - .levels = 1, - .base_array_layer = irb->mt_layer, - .array_len = MAX2(irb->layer_count, 1), - .swizzle = ISL_SWIZZLE_IDENTITY, - .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT, - }; - - uint32_t offset; - brw_emit_surface_state(brw, mt, mt->target, view, - brw->draw_aux_usage[unit], - &offset, surf_index, - RELOC_WRITE); - return offset; -} - -GLuint -translate_tex_target(GLenum target) -{ - switch (target) { - case GL_TEXTURE_1D: - case GL_TEXTURE_1D_ARRAY_EXT: - return BRW_SURFACE_1D; - - case GL_TEXTURE_RECTANGLE_NV: - return BRW_SURFACE_2D; - - case GL_TEXTURE_2D: - case GL_TEXTURE_2D_ARRAY_EXT: - case GL_TEXTURE_EXTERNAL_OES: - case GL_TEXTURE_2D_MULTISAMPLE: - case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: - return BRW_SURFACE_2D; - - case GL_TEXTURE_3D: - return BRW_SURFACE_3D; - - case GL_TEXTURE_CUBE_MAP: - case GL_TEXTURE_CUBE_MAP_ARRAY: - return BRW_SURFACE_CUBE; - - default: - unreachable("not reached"); - } -} - -uint32_t -brw_get_surface_tiling_bits(enum isl_tiling tiling) -{ - switch (tiling) { - case ISL_TILING_X: - return BRW_SURFACE_TILED; - case ISL_TILING_Y0: - return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y; - default: - return 0; - } -} - - -uint32_t -brw_get_surface_num_multisamples(unsigned num_samples) -{ - if (num_samples > 1) - return BRW_SURFACE_MULTISAMPLECOUNT_4; - else - return BRW_SURFACE_MULTISAMPLECOUNT_1; -} - -/** - * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle - * swizzling. - */ -int -brw_get_texture_swizzle(const struct gl_context *ctx, - const struct gl_texture_object *t) -{ - const struct gl_texture_image *img = t->Image[0][t->Attrib.BaseLevel]; - - int swizzles[SWIZZLE_NIL + 1] = { - SWIZZLE_X, - SWIZZLE_Y, - SWIZZLE_Z, - SWIZZLE_W, - SWIZZLE_ZERO, - SWIZZLE_ONE, - SWIZZLE_NIL - }; - - if (img->_BaseFormat == GL_DEPTH_COMPONENT || - img->_BaseFormat == GL_DEPTH_STENCIL) { - GLenum depth_mode = t->Attrib.DepthMode; - - /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures - * with depth component data specified with a sized internal format. - * Otherwise, it's left at the old default, GL_LUMINANCE. - */ - if (_mesa_is_gles3(ctx) && - img->InternalFormat != GL_DEPTH_COMPONENT && - img->InternalFormat != GL_DEPTH_STENCIL) { - depth_mode = GL_RED; - } - - switch (depth_mode) { - case GL_ALPHA: - swizzles[0] = SWIZZLE_ZERO; - swizzles[1] = SWIZZLE_ZERO; - swizzles[2] = SWIZZLE_ZERO; - swizzles[3] = SWIZZLE_X; - break; - case GL_LUMINANCE: - swizzles[0] = SWIZZLE_X; - swizzles[1] = SWIZZLE_X; - swizzles[2] = SWIZZLE_X; - swizzles[3] = SWIZZLE_ONE; - break; - case GL_INTENSITY: - swizzles[0] = SWIZZLE_X; - swizzles[1] = SWIZZLE_X; - swizzles[2] = SWIZZLE_X; - swizzles[3] = SWIZZLE_X; - break; - case GL_RED: - swizzles[0] = SWIZZLE_X; - swizzles[1] = SWIZZLE_ZERO; - swizzles[2] = SWIZZLE_ZERO; - swizzles[3] = SWIZZLE_ONE; - break; - } - } - - GLenum datatype = _mesa_get_format_datatype(img->TexFormat); - - /* If the texture's format is alpha-only, force R, G, and B to - * 0.0. Similarly, if the texture's format has no alpha channel, - * force the alpha value read to 1.0. This allows for the - * implementation to use an RGBA texture for any of these formats - * without leaking any unexpected values. - */ - switch (img->_BaseFormat) { - case GL_ALPHA: - swizzles[0] = SWIZZLE_ZERO; - swizzles[1] = SWIZZLE_ZERO; - swizzles[2] = SWIZZLE_ZERO; - break; - case GL_LUMINANCE: - if (t->_IsIntegerFormat || datatype == GL_SIGNED_NORMALIZED) { - swizzles[0] = SWIZZLE_X; - swizzles[1] = SWIZZLE_X; - swizzles[2] = SWIZZLE_X; - swizzles[3] = SWIZZLE_ONE; - } - break; - case GL_LUMINANCE_ALPHA: - if (datatype == GL_SIGNED_NORMALIZED) { - swizzles[0] = SWIZZLE_X; - swizzles[1] = SWIZZLE_X; - swizzles[2] = SWIZZLE_X; - swizzles[3] = SWIZZLE_W; - } - break; - case GL_INTENSITY: - if (datatype == GL_SIGNED_NORMALIZED) { - swizzles[0] = SWIZZLE_X; - swizzles[1] = SWIZZLE_X; - swizzles[2] = SWIZZLE_X; - swizzles[3] = SWIZZLE_X; - } - break; - case GL_RED: - if (img->TexFormat == MESA_FORMAT_R_SRGB8) { - swizzles[0] = SWIZZLE_X; - swizzles[1] = SWIZZLE_ZERO; - swizzles[2] = SWIZZLE_ZERO; - swizzles[3] = SWIZZLE_ONE; - break; - } - FALLTHROUGH; - case GL_RG: - case GL_RGB: - if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0 || - img->TexFormat == MESA_FORMAT_RGB_DXT1 || - img->TexFormat == MESA_FORMAT_SRGB_DXT1) - swizzles[3] = SWIZZLE_ONE; - break; - } - - return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->Attrib._Swizzle, 0)], - swizzles[GET_SWZ(t->Attrib._Swizzle, 1)], - swizzles[GET_SWZ(t->Attrib._Swizzle, 2)], - swizzles[GET_SWZ(t->Attrib._Swizzle, 3)]); -} - -/** - * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gfx7.5+ - * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED). The mappings are - * - * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE - * 0 1 2 3 4 5 - * 4 5 6 7 0 1 - * SCS_RED, SCS_GREEN, SCS_BLUE, SCS_ALPHA, SCS_ZERO, SCS_ONE - * - * which is simply adding 4 then modding by 8 (or anding with 7). - * - * We then may need to apply workarounds for textureGather hardware bugs. - */ -static unsigned -swizzle_to_scs(GLenum swizzle, bool need_green_to_blue) -{ - unsigned scs = (swizzle + 4) & 7; - - return (need_green_to_blue && scs == HSW_SCS_GREEN) ? HSW_SCS_BLUE : scs; -} - -static void brw_update_texture_surface(struct gl_context *ctx, - unsigned unit, - uint32_t *surf_offset, - bool for_gather, - bool for_txf, - uint32_t plane) -{ - struct brw_context *brw = brw_context(ctx); - const struct intel_device_info *devinfo = &brw->screen->devinfo; - struct gl_texture_object *obj = ctx->Texture.Unit[unit]._Current; - - if (obj->Target == GL_TEXTURE_BUFFER) { - brw_update_buffer_texture_surface(ctx, unit, surf_offset); - - } else { - struct brw_texture_object *intel_obj = brw_texture_object(obj); - struct brw_mipmap_tree *mt = intel_obj->mt; - - if (plane > 0) { - if (mt->plane[plane - 1] == NULL) - return; - mt = mt->plane[plane - 1]; - } - - struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit); - /* If this is a view with restricted NumLayers, then our effective depth - * is not just the miptree depth. - */ - unsigned view_num_layers; - if (obj->Immutable && obj->Target != GL_TEXTURE_3D) { - view_num_layers = obj->Attrib.NumLayers; - } else { - view_num_layers = mt->surf.dim == ISL_SURF_DIM_3D ? - mt->surf.logical_level0_px.depth : - mt->surf.logical_level0_px.array_len; - } - - /* Handling GL_ALPHA as a surface format override breaks 1.30+ style - * texturing functions that return a float, as our code generation always - * selects the .x channel (which would always be 0). - */ - struct gl_texture_image *firstImage = obj->Image[0][obj->Attrib.BaseLevel]; - const bool alpha_depth = obj->Attrib.DepthMode == GL_ALPHA && - (firstImage->_BaseFormat == GL_DEPTH_COMPONENT || - firstImage->_BaseFormat == GL_DEPTH_STENCIL); - const unsigned swizzle = (unlikely(alpha_depth) ? SWIZZLE_XYZW : - brw_get_texture_swizzle(&brw->ctx, obj)); - - mesa_format mesa_fmt; - if (firstImage->_BaseFormat == GL_DEPTH_STENCIL || - firstImage->_BaseFormat == GL_DEPTH_COMPONENT) { - /* The format from intel_obj may be a combined depth stencil format - * when we just want depth. Pull it from the miptree instead. This - * is safe because texture views aren't allowed on depth/stencil. - */ - mesa_fmt = mt->format; - } else if (brw_miptree_has_etc_shadow(brw, mt)) { - mesa_fmt = mt->shadow_mt->format; - } else if (plane > 0) { - mesa_fmt = mt->format; - } else { - mesa_fmt = intel_obj->_Format; - } - enum isl_format format = translate_tex_format(brw, mesa_fmt, - for_txf ? GL_DECODE_EXT : - sampler->Attrib.sRGBDecode); - - /* Implement gfx6 and gfx7 gather work-around */ - bool need_green_to_blue = false; - if (for_gather) { - if (devinfo->ver == 7 && (format == ISL_FORMAT_R32G32_FLOAT || - format == ISL_FORMAT_R32G32_SINT || - format == ISL_FORMAT_R32G32_UINT)) { - format = ISL_FORMAT_R32G32_FLOAT_LD; - need_green_to_blue = devinfo->platform == INTEL_PLATFORM_HSW; - } else if (devinfo->ver == 6) { - /* Sandybridge's gather4 message is broken for integer formats. - * To work around this, we pretend the surface is UNORM for - * 8 or 16-bit formats, and emit shader instructions to recover - * the real INT/UINT value. For 32-bit formats, we pretend - * the surface is FLOAT, and simply reinterpret the resulting - * bits. - */ - switch (format) { - case ISL_FORMAT_R8_SINT: - case ISL_FORMAT_R8_UINT: - format = ISL_FORMAT_R8_UNORM; - break; - - case ISL_FORMAT_R16_SINT: - case ISL_FORMAT_R16_UINT: - format = ISL_FORMAT_R16_UNORM; - break; - - case ISL_FORMAT_R32_SINT: - case ISL_FORMAT_R32_UINT: - format = ISL_FORMAT_R32_FLOAT; - break; - - default: - break; - } - } - } - - if (obj->StencilSampling && firstImage->_BaseFormat == GL_DEPTH_STENCIL) { - if (devinfo->ver <= 7) { - assert(mt->shadow_mt && !mt->stencil_mt->shadow_needs_update); - mt = mt->shadow_mt; - } else { - mt = mt->stencil_mt; - } - format = ISL_FORMAT_R8_UINT; - } else if (devinfo->ver <= 7 && mt->format == MESA_FORMAT_S_UINT8) { - assert(mt->shadow_mt && !mt->shadow_needs_update); - mt = mt->shadow_mt; - format = ISL_FORMAT_R8_UINT; - } else if (brw_miptree_needs_fake_etc(brw, mt)) { - assert(mt->shadow_mt && !mt->shadow_needs_update); - mt = mt->shadow_mt; - } - - const int surf_index = surf_offset - &brw->wm.base.surf_offset[0]; - - struct isl_view view = { - .format = format, - .base_level = obj->Attrib.MinLevel + obj->Attrib.BaseLevel, - .levels = intel_obj->_MaxLevel - obj->Attrib.BaseLevel + 1, - .base_array_layer = obj->Attrib.MinLayer, - .array_len = view_num_layers, - .swizzle = { - .r = swizzle_to_scs(GET_SWZ(swizzle, 0), need_green_to_blue), - .g = swizzle_to_scs(GET_SWZ(swizzle, 1), need_green_to_blue), - .b = swizzle_to_scs(GET_SWZ(swizzle, 2), need_green_to_blue), - .a = swizzle_to_scs(GET_SWZ(swizzle, 3), need_green_to_blue), - }, - .usage = ISL_SURF_USAGE_TEXTURE_BIT, - }; - - /* On Ivy Bridge and earlier, we handle texture swizzle with shader - * code. The actual surface swizzle should be identity. - */ - if (devinfo->verx10 <= 70) - view.swizzle = ISL_SWIZZLE_IDENTITY; - - if (obj->Target == GL_TEXTURE_CUBE_MAP || - obj->Target == GL_TEXTURE_CUBE_MAP_ARRAY) - view.usage |= ISL_SURF_USAGE_CUBE_BIT; - - enum isl_aux_usage aux_usage = - brw_miptree_texture_aux_usage(brw, mt, format, - brw->gfx9_astc5x5_wa_tex_mask); - - brw_emit_surface_state(brw, mt, mt->target, view, aux_usage, - surf_offset, surf_index, - 0); - } -} - -void -brw_emit_buffer_surface_state(struct brw_context *brw, - uint32_t *out_offset, - struct brw_bo *bo, - unsigned buffer_offset, - enum isl_format format, - unsigned buffer_size, - unsigned pitch, - unsigned reloc_flags) -{ - uint32_t *dw = brw_state_batch(brw, - brw->isl_dev.ss.size, - brw->isl_dev.ss.align, - out_offset); - - isl_buffer_fill_state(&brw->isl_dev, dw, - .address = !bo ? buffer_offset : - brw_state_reloc(&brw->batch, - *out_offset + brw->isl_dev.ss.addr_offset, - bo, buffer_offset, - reloc_flags), - .size_B = buffer_size, - .format = format, - .swizzle = ISL_SWIZZLE_IDENTITY, - .stride_B = pitch, - .mocs = brw_mocs(&brw->isl_dev, bo)); -} - -static unsigned -buffer_texture_range_size(struct brw_context *brw, - struct gl_texture_object *obj) -{ - assert(obj->Target == GL_TEXTURE_BUFFER); - const unsigned texel_size = _mesa_get_format_bytes(obj->_BufferObjectFormat); - const unsigned buffer_size = (!obj->BufferObject ? 0 : - obj->BufferObject->Size); - const unsigned buffer_offset = MIN2(buffer_size, obj->BufferOffset); - - /* The ARB_texture_buffer_specification says: - * - * "The number of texels in the buffer texture's texel array is given by - * - * floor( / ( * sizeof()), - * - * where is the size of the buffer object, in basic - * machine units and and are the element count - * and base data type for elements, as specified in Table X.1. The - * number of texels in the texel array is then clamped to the - * implementation-dependent limit MAX_TEXTURE_BUFFER_SIZE_ARB." - * - * We need to clamp the size in bytes to MAX_TEXTURE_BUFFER_SIZE * stride, - * so that when ISL divides by stride to obtain the number of texels, that - * texel count is clamped to MAX_TEXTURE_BUFFER_SIZE. - */ - return MIN3((unsigned)obj->BufferSize, - buffer_size - buffer_offset, - brw->ctx.Const.MaxTextureBufferSize * texel_size); -} - -static void -emit_null_surface_state(struct brw_context *brw, - const struct gl_framebuffer *fb, - uint32_t *out_offset); - -void -brw_update_buffer_texture_surface(struct gl_context *ctx, - unsigned unit, - uint32_t *surf_offset) -{ - struct brw_context *brw = brw_context(ctx); - struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current; - struct brw_buffer_object *intel_obj = - brw_buffer_object(tObj->BufferObject); - const unsigned size = buffer_texture_range_size(brw, tObj); - struct brw_bo *bo = NULL; - mesa_format format = tObj->_BufferObjectFormat; - const enum isl_format isl_format = brw_isl_format_for_mesa_format(format); - int texel_size = _mesa_get_format_bytes(format); - - if (tObj->BufferObject == NULL) { - emit_null_surface_state(brw, NULL, surf_offset); - return; - } - - if (intel_obj) - bo = brw_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size, - false); - - if (isl_format == ISL_FORMAT_UNSUPPORTED) { - _mesa_problem(NULL, "bad format %s for texture buffer\n", - _mesa_get_format_name(format)); - } - - brw_emit_buffer_surface_state(brw, surf_offset, bo, - tObj->BufferOffset, - isl_format, - size, - texel_size, - 0); -} - -/** - * Set up a binding table entry for use by stream output logic (transform - * feedback). - * - * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES. - */ -void -brw_update_sol_surface(struct brw_context *brw, - struct gl_buffer_object *buffer_obj, - uint32_t *out_offset, unsigned num_vector_components, - unsigned stride_dwords, unsigned offset_dwords) -{ - struct brw_buffer_object *intel_bo = brw_buffer_object(buffer_obj); - uint32_t offset_bytes = 4 * offset_dwords; - struct brw_bo *bo = brw_bufferobj_buffer(brw, intel_bo, - offset_bytes, - buffer_obj->Size - offset_bytes, - true); - uint32_t *surf = brw_state_batch(brw, 6 * 4, 32, out_offset); - uint32_t pitch_minus_1 = 4*stride_dwords - 1; - size_t size_dwords = buffer_obj->Size / 4; - uint32_t buffer_size_minus_1, width, height, depth, surface_format; - - /* FIXME: can we rely on core Mesa to ensure that the buffer isn't - * too big to map using a single binding table entry? - */ - assert((size_dwords - offset_dwords) / stride_dwords - <= BRW_MAX_NUM_BUFFER_ENTRIES); - - if (size_dwords > offset_dwords + num_vector_components) { - /* There is room for at least 1 transform feedback output in the buffer. - * Compute the number of additional transform feedback outputs the - * buffer has room for. - */ - buffer_size_minus_1 = - (size_dwords - offset_dwords - num_vector_components) / stride_dwords; - } else { - /* There isn't even room for a single transform feedback output in the - * buffer. We can't configure the binding table entry to prevent output - * entirely; we'll have to rely on the geometry shader to detect - * overflow. But to minimize the damage in case of a bug, set up the - * binding table entry to just allow a single output. - */ - buffer_size_minus_1 = 0; - } - width = buffer_size_minus_1 & 0x7f; - height = (buffer_size_minus_1 & 0xfff80) >> 7; - depth = (buffer_size_minus_1 & 0x7f00000) >> 20; - - switch (num_vector_components) { - case 1: - surface_format = ISL_FORMAT_R32_FLOAT; - break; - case 2: - surface_format = ISL_FORMAT_R32G32_FLOAT; - break; - case 3: - surface_format = ISL_FORMAT_R32G32B32_FLOAT; - break; - case 4: - surface_format = ISL_FORMAT_R32G32B32A32_FLOAT; - break; - default: - unreachable("Invalid vector size for transform feedback output"); - } - - surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT | - BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT | - surface_format << BRW_SURFACE_FORMAT_SHIFT | - BRW_SURFACE_RC_READ_WRITE; - surf[1] = brw_state_reloc(&brw->batch, - *out_offset + 4, bo, offset_bytes, RELOC_WRITE); - surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT | - height << BRW_SURFACE_HEIGHT_SHIFT); - surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT | - pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT); - surf[4] = 0; - surf[5] = 0; -} - -/* Creates a new WM constant buffer reflecting the current fragment program's - * constants, if needed by the fragment program. - * - * Otherwise, constants go through the CURBEs using the brw_constant_buffer - * state atom. - */ -static void -brw_upload_wm_pull_constants(struct brw_context *brw) -{ - struct brw_stage_state *stage_state = &brw->wm.base; - /* BRW_NEW_FRAGMENT_PROGRAM */ - struct brw_program *fp = - (struct brw_program *) brw->programs[MESA_SHADER_FRAGMENT]; - - /* BRW_NEW_FS_PROG_DATA */ - struct brw_stage_prog_data *prog_data = brw->wm.base.prog_data; - - _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_FRAGMENT); - /* _NEW_PROGRAM_CONSTANTS */ - brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program, - stage_state, prog_data); -} - -const struct brw_tracked_state brw_wm_pull_constants = { - .dirty = { - .mesa = _NEW_PROGRAM_CONSTANTS, - .brw = BRW_NEW_BATCH | - BRW_NEW_FRAGMENT_PROGRAM | - BRW_NEW_FS_PROG_DATA, - }, - .emit = brw_upload_wm_pull_constants, -}; - -/** - * Creates a null renderbuffer surface. - * - * This is used when the shader doesn't write to any color output. An FB - * write to target 0 will still be emitted, because that's how the thread is - * terminated (and computed depth is returned), so we need to have the - * hardware discard the target 0 color output.. - */ -static void -emit_null_surface_state(struct brw_context *brw, - const struct gl_framebuffer *fb, - uint32_t *out_offset) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - uint32_t *surf = brw_state_batch(brw, - brw->isl_dev.ss.size, - brw->isl_dev.ss.align, - out_offset); - - /* Use the fb dimensions or 1x1x1 */ - const unsigned width = fb ? _mesa_geometric_width(fb) : 1; - const unsigned height = fb ? _mesa_geometric_height(fb) : 1; - const unsigned samples = fb ? _mesa_geometric_samples(fb) : 1; - - if (devinfo->ver != 6 || samples <= 1) { - isl_null_fill_state(&brw->isl_dev, surf, - .size = isl_extent3d(width, height, 1)); - return; - } - - /* On Gfx6, null render targets seem to cause GPU hangs when multisampling. - * So work around this problem by rendering into dummy color buffer. - * - * To decrease the amount of memory needed by the workaround buffer, we - * set its pitch to 128 bytes (the width of a Y tile). This means that - * the amount of memory needed for the workaround buffer is - * (width_in_tiles + height_in_tiles - 1) tiles. - * - * Note that since the workaround buffer will be interpreted by the - * hardware as an interleaved multisampled buffer, we need to compute - * width_in_tiles and height_in_tiles by dividing the width and height - * by 16 rather than the normal Y-tile size of 32. - */ - unsigned width_in_tiles = ALIGN(width, 16) / 16; - unsigned height_in_tiles = ALIGN(height, 16) / 16; - unsigned pitch_minus_1 = 127; - unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096; - brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo, - size_needed); - - surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT | - ISL_FORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT); - surf[1] = brw_state_reloc(&brw->batch, *out_offset + 4, - brw->wm.multisampled_null_render_target_bo, - 0, RELOC_WRITE); - - surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT | - (height - 1) << BRW_SURFACE_HEIGHT_SHIFT); - - /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming - * Notes): - * - * If Surface Type is SURFTYPE_NULL, this field must be TRUE - */ - surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y | - pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT); - surf[4] = BRW_SURFACE_MULTISAMPLECOUNT_4; - surf[5] = 0; -} - -/** - * Sets up a surface state structure to point at the given region. - * While it is only used for the front/back buffer currently, it should be - * usable for further buffers when doing ARB_draw_buffer support. - */ -static uint32_t -gfx4_update_renderbuffer_surface(struct brw_context *brw, - struct gl_renderbuffer *rb, - unsigned unit, - uint32_t surf_index) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - struct gl_context *ctx = &brw->ctx; - struct brw_renderbuffer *irb = brw_renderbuffer(rb); - struct brw_mipmap_tree *mt = irb->mt; - uint32_t *surf; - uint32_t tile_x, tile_y; - enum isl_format format; - uint32_t offset; - /* _NEW_BUFFERS */ - mesa_format rb_format = _mesa_get_render_format(ctx, brw_rb_format(irb)); - /* BRW_NEW_FS_PROG_DATA */ - - if (rb->TexImage && !devinfo->has_surface_tile_offset) { - brw_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y); - - if (tile_x != 0 || tile_y != 0) { - /* Original gfx4 hardware couldn't draw to a non-tile-aligned - * destination in a miptree unless you actually setup your renderbuffer - * as a miptree and used the fragile lod/array_index/etc. controls to - * select the image. So, instead, we just make a new single-level - * miptree and render into that. - */ - brw_renderbuffer_move_to_temp(brw, irb, false); - assert(irb->align_wa_mt); - mt = irb->align_wa_mt; - } - } - - surf = brw_state_batch(brw, 6 * 4, 32, &offset); - - format = brw->mesa_to_isl_render_format[rb_format]; - if (unlikely(!brw->mesa_format_supports_render[rb_format])) { - _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n", - __func__, _mesa_get_format_name(rb_format)); - } - - surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT | - format << BRW_SURFACE_FORMAT_SHIFT); - - /* reloc */ - assert(mt->offset % mt->cpp == 0); - surf[1] = brw_state_reloc(&brw->batch, offset + 4, mt->bo, - mt->offset + - brw_renderbuffer_get_tile_offsets(irb, - &tile_x, - &tile_y), - RELOC_WRITE); - - surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT | - (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT); - - surf[3] = (brw_get_surface_tiling_bits(mt->surf.tiling) | - (mt->surf.row_pitch_B - 1) << BRW_SURFACE_PITCH_SHIFT); - - surf[4] = brw_get_surface_num_multisamples(mt->surf.samples); - - assert(devinfo->has_surface_tile_offset || (tile_x == 0 && tile_y == 0)); - /* Note that the low bits of these fields are missing, so - * there's the possibility of getting in trouble. - */ - assert(tile_x % 4 == 0); - assert(tile_y % 2 == 0); - surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT | - (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT | - (mt->surf.image_alignment_el.height == 4 ? - BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0)); - - if (devinfo->ver < 6) { - /* _NEW_COLOR */ - if (!ctx->Color.ColorLogicOpEnabled && - ctx->Color._AdvancedBlendMode == BLEND_NONE && - (ctx->Color.BlendEnabled & (1 << unit))) - surf[0] |= BRW_SURFACE_BLEND_ENABLED; - - if (!GET_COLORMASK_BIT(ctx->Color.ColorMask, unit, 0)) - surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT; - if (!GET_COLORMASK_BIT(ctx->Color.ColorMask, unit, 1)) - surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT; - if (!GET_COLORMASK_BIT(ctx->Color.ColorMask, unit, 2)) - surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT; - - /* As mentioned above, disable writes to the alpha component when the - * renderbuffer is XRGB. - */ - if (ctx->DrawBuffer->Visual.alphaBits == 0 || - !GET_COLORMASK_BIT(ctx->Color.ColorMask, unit, 3)) { - surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT; - } - } - - return offset; -} - -static void -update_renderbuffer_surfaces(struct brw_context *brw) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - const struct gl_context *ctx = &brw->ctx; - - /* _NEW_BUFFERS | _NEW_COLOR */ - const struct gl_framebuffer *fb = ctx->DrawBuffer; - - /* Render targets always start at binding table index 0. */ - const unsigned rt_start = 0; - - uint32_t *surf_offsets = brw->wm.base.surf_offset; - - /* Update surfaces for drawing buffers */ - if (fb->_NumColorDrawBuffers >= 1) { - for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) { - struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[i]; - - if (brw_renderbuffer(rb)) { - surf_offsets[rt_start + i] = devinfo->ver >= 6 ? - gfx6_update_renderbuffer_surface(brw, rb, i, rt_start + i) : - gfx4_update_renderbuffer_surface(brw, rb, i, rt_start + i); - } else { - emit_null_surface_state(brw, fb, &surf_offsets[rt_start + i]); - } - } - } else { - emit_null_surface_state(brw, fb, &surf_offsets[rt_start]); - } - - /* The PIPE_CONTROL command description says: - * - * "Whenever a Binding Table Index (BTI) used by a Render Taget Message - * points to a different RENDER_SURFACE_STATE, SW must issue a Render - * Target Cache Flush by enabling this bit. When render target flush - * is set due to new association of BTI, PS Scoreboard Stall bit must - * be set in this packet." - */ - if (devinfo->ver >= 11) { - brw_emit_pipe_control_flush(brw, - PIPE_CONTROL_RENDER_TARGET_FLUSH | - PIPE_CONTROL_STALL_AT_SCOREBOARD); - } - - brw->ctx.NewDriverState |= BRW_NEW_SURFACES; -} - -const struct brw_tracked_state brw_renderbuffer_surfaces = { - .dirty = { - .mesa = _NEW_BUFFERS | - _NEW_COLOR, - .brw = BRW_NEW_BATCH, - }, - .emit = update_renderbuffer_surfaces, -}; - -const struct brw_tracked_state gfx6_renderbuffer_surfaces = { - .dirty = { - .mesa = _NEW_BUFFERS, - .brw = BRW_NEW_BATCH | - BRW_NEW_AUX_STATE, - }, - .emit = update_renderbuffer_surfaces, -}; - -static void -update_renderbuffer_read_surfaces(struct brw_context *brw) -{ - const struct gl_context *ctx = &brw->ctx; - - /* BRW_NEW_FS_PROG_DATA */ - const struct brw_wm_prog_data *wm_prog_data = - brw_wm_prog_data(brw->wm.base.prog_data); - - if (wm_prog_data->has_render_target_reads && - !ctx->Extensions.EXT_shader_framebuffer_fetch) { - /* _NEW_BUFFERS */ - const struct gl_framebuffer *fb = ctx->DrawBuffer; - - for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) { - struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[i]; - const struct brw_renderbuffer *irb = brw_renderbuffer(rb); - const unsigned surf_index = - wm_prog_data->binding_table.render_target_read_start + i; - uint32_t *surf_offset = &brw->wm.base.surf_offset[surf_index]; - - if (irb) { - const enum isl_format format = brw->mesa_to_isl_render_format[ - _mesa_get_render_format(ctx, brw_rb_format(irb))]; - assert(isl_format_supports_sampling(&brw->screen->devinfo, - format)); - - /* Override the target of the texture if the render buffer is a - * single slice of a 3D texture (since the minimum array element - * field of the surface state structure is ignored by the sampler - * unit for 3D textures on some hardware), or if the render buffer - * is a 1D array (since shaders always provide the array index - * coordinate at the Z component to avoid state-dependent - * recompiles when changing the texture target of the - * framebuffer). - */ - const GLenum target = - (irb->mt->target == GL_TEXTURE_3D && - irb->layer_count == 1) ? GL_TEXTURE_2D : - irb->mt->target == GL_TEXTURE_1D_ARRAY ? GL_TEXTURE_2D_ARRAY : - irb->mt->target; - - const struct isl_view view = { - .format = format, - .base_level = irb->mt_level - irb->mt->first_level, - .levels = 1, - .base_array_layer = irb->mt_layer, - .array_len = irb->layer_count, - .swizzle = ISL_SWIZZLE_IDENTITY, - .usage = ISL_SURF_USAGE_TEXTURE_BIT, - }; - - enum isl_aux_usage aux_usage = - brw_miptree_texture_aux_usage(brw, irb->mt, format, - brw->gfx9_astc5x5_wa_tex_mask); - if (brw->draw_aux_usage[i] == ISL_AUX_USAGE_NONE) - aux_usage = ISL_AUX_USAGE_NONE; - - brw_emit_surface_state(brw, irb->mt, target, view, aux_usage, - surf_offset, surf_index, - 0); - - } else { - emit_null_surface_state(brw, fb, surf_offset); - } - } - - brw->ctx.NewDriverState |= BRW_NEW_SURFACES; - } -} - -const struct brw_tracked_state brw_renderbuffer_read_surfaces = { - .dirty = { - .mesa = _NEW_BUFFERS, - .brw = BRW_NEW_BATCH | - BRW_NEW_AUX_STATE | - BRW_NEW_FS_PROG_DATA, - }, - .emit = update_renderbuffer_read_surfaces, -}; - -static bool -is_depth_texture(struct brw_texture_object *iobj) -{ - GLenum base_format = _mesa_get_format_base_format(iobj->_Format); - return base_format == GL_DEPTH_COMPONENT || - (base_format == GL_DEPTH_STENCIL && !iobj->base.StencilSampling); -} - -static void -update_stage_texture_surfaces(struct brw_context *brw, - const struct gl_program *prog, - struct brw_stage_state *stage_state, - bool for_gather, uint32_t plane) -{ - if (!prog) - return; - - struct gl_context *ctx = &brw->ctx; - - uint32_t *surf_offset = stage_state->surf_offset; - - /* BRW_NEW_*_PROG_DATA */ - if (for_gather) - surf_offset += stage_state->prog_data->binding_table.gather_texture_start; - else - surf_offset += stage_state->prog_data->binding_table.plane_start[plane]; - - unsigned num_samplers = BITSET_LAST_BIT(prog->info.textures_used); - for (unsigned s = 0; s < num_samplers; s++) { - surf_offset[s] = 0; - - if (BITSET_TEST(prog->info.textures_used, s)) { - const unsigned unit = prog->SamplerUnits[s]; - const bool used_by_txf = BITSET_TEST(prog->info.textures_used_by_txf, s); - struct gl_texture_object *obj = ctx->Texture.Unit[unit]._Current; - struct brw_texture_object *iobj = brw_texture_object(obj); - - /* _NEW_TEXTURE */ - if (!obj) - continue; - - if ((prog->ShadowSamplers & (1 << s)) && !is_depth_texture(iobj)) { - /* A programming note for the sample_c message says: - * - * "The Surface Format of the associated surface must be - * indicated as supporting shadow mapping as indicated in the - * surface format table." - * - * Accessing non-depth textures via a sampler*Shadow type is - * undefined. GLSL 4.50 page 162 says: - * - * "If a shadow texture call is made to a sampler that does not - * represent a depth texture, then results are undefined." - * - * We give them a null surface (zeros) for undefined. We've seen - * GPU hangs with color buffers and sample_c, so we try and avoid - * those with this hack. - */ - emit_null_surface_state(brw, NULL, surf_offset + s); - } else { - brw_update_texture_surface(ctx, unit, surf_offset + s, for_gather, - used_by_txf, plane); - } - } - } -} - - -/** - * Construct SURFACE_STATE objects for enabled textures. - */ -static void -brw_update_texture_surfaces(struct brw_context *brw) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - /* BRW_NEW_VERTEX_PROGRAM */ - struct gl_program *vs = brw->programs[MESA_SHADER_VERTEX]; - - /* BRW_NEW_TESS_PROGRAMS */ - struct gl_program *tcs = brw->programs[MESA_SHADER_TESS_CTRL]; - struct gl_program *tes = brw->programs[MESA_SHADER_TESS_EVAL]; - - /* BRW_NEW_GEOMETRY_PROGRAM */ - struct gl_program *gs = brw->programs[MESA_SHADER_GEOMETRY]; - - /* BRW_NEW_FRAGMENT_PROGRAM */ - struct gl_program *fs = brw->programs[MESA_SHADER_FRAGMENT]; - - /* _NEW_TEXTURE */ - update_stage_texture_surfaces(brw, vs, &brw->vs.base, false, 0); - update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, false, 0); - update_stage_texture_surfaces(brw, tes, &brw->tes.base, false, 0); - update_stage_texture_surfaces(brw, gs, &brw->gs.base, false, 0); - update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 0); - - /* emit alternate set of surface state for gather. this - * allows the surface format to be overriden for only the - * gather4 messages. */ - if (devinfo->ver < 8) { - if (vs && vs->info.uses_texture_gather) - update_stage_texture_surfaces(brw, vs, &brw->vs.base, true, 0); - if (tcs && tcs->info.uses_texture_gather) - update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, true, 0); - if (tes && tes->info.uses_texture_gather) - update_stage_texture_surfaces(brw, tes, &brw->tes.base, true, 0); - if (gs && gs->info.uses_texture_gather) - update_stage_texture_surfaces(brw, gs, &brw->gs.base, true, 0); - if (fs && fs->info.uses_texture_gather) - update_stage_texture_surfaces(brw, fs, &brw->wm.base, true, 0); - } - - if (fs) { - update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 1); - update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 2); - } - - brw->ctx.NewDriverState |= BRW_NEW_SURFACES; -} - -const struct brw_tracked_state brw_texture_surfaces = { - .dirty = { - .mesa = _NEW_TEXTURE, - .brw = BRW_NEW_BATCH | - BRW_NEW_AUX_STATE | - BRW_NEW_FRAGMENT_PROGRAM | - BRW_NEW_FS_PROG_DATA | - BRW_NEW_GEOMETRY_PROGRAM | - BRW_NEW_GS_PROG_DATA | - BRW_NEW_TESS_PROGRAMS | - BRW_NEW_TCS_PROG_DATA | - BRW_NEW_TES_PROG_DATA | - BRW_NEW_TEXTURE_BUFFER | - BRW_NEW_VERTEX_PROGRAM | - BRW_NEW_VS_PROG_DATA, - }, - .emit = brw_update_texture_surfaces, -}; - -static void -brw_update_cs_texture_surfaces(struct brw_context *brw) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - /* BRW_NEW_COMPUTE_PROGRAM */ - struct gl_program *cs = brw->programs[MESA_SHADER_COMPUTE]; - - /* _NEW_TEXTURE */ - update_stage_texture_surfaces(brw, cs, &brw->cs.base, false, 0); - - /* emit alternate set of surface state for gather. this - * allows the surface format to be overriden for only the - * gather4 messages. - */ - if (devinfo->ver < 8) { - if (cs && cs->info.uses_texture_gather) - update_stage_texture_surfaces(brw, cs, &brw->cs.base, true, 0); - } - - brw->ctx.NewDriverState |= BRW_NEW_SURFACES; -} - -const struct brw_tracked_state brw_cs_texture_surfaces = { - .dirty = { - .mesa = _NEW_TEXTURE, - .brw = BRW_NEW_BATCH | - BRW_NEW_COMPUTE_PROGRAM | - BRW_NEW_AUX_STATE, - }, - .emit = brw_update_cs_texture_surfaces, -}; - -static void -upload_buffer_surface(struct brw_context *brw, - struct gl_buffer_binding *binding, - uint32_t *out_offset, - enum isl_format format, - unsigned reloc_flags) -{ - if (!binding->BufferObject) { - emit_null_surface_state(brw, NULL, out_offset); - } else { - ptrdiff_t size = binding->BufferObject->Size - binding->Offset; - if (!binding->AutomaticSize) - size = MIN2(size, binding->Size); - - if (size == 0) { - emit_null_surface_state(brw, NULL, out_offset); - return; - } - - struct brw_buffer_object *iobj = - brw_buffer_object(binding->BufferObject); - struct brw_bo *bo = - brw_bufferobj_buffer(brw, iobj, binding->Offset, size, - (reloc_flags & RELOC_WRITE) != 0); - - brw_emit_buffer_surface_state(brw, out_offset, bo, binding->Offset, - format, size, 1, reloc_flags); - } -} - -void -brw_upload_ubo_surfaces(struct brw_context *brw, struct gl_program *prog, - struct brw_stage_state *stage_state, - struct brw_stage_prog_data *prog_data) -{ - struct gl_context *ctx = &brw->ctx; - - if (!prog || (prog->info.num_ubos == 0 && - prog->info.num_ssbos == 0 && - prog->info.num_abos == 0)) - return; - - if (prog->info.num_ubos) { - assert(prog_data->binding_table.ubo_start < BRW_MAX_SURFACES); - uint32_t *ubo_surf_offsets = - &stage_state->surf_offset[prog_data->binding_table.ubo_start]; - - for (int i = 0; i < prog->info.num_ubos; i++) { - struct gl_buffer_binding *binding = - &ctx->UniformBufferBindings[prog->sh.UniformBlocks[i]->Binding]; - upload_buffer_surface(brw, binding, &ubo_surf_offsets[i], - ISL_FORMAT_R32G32B32A32_FLOAT, 0); - } - } - - if (prog->info.num_ssbos || prog->info.num_abos) { - assert(prog_data->binding_table.ssbo_start < BRW_MAX_SURFACES); - uint32_t *ssbo_surf_offsets = - &stage_state->surf_offset[prog_data->binding_table.ssbo_start]; - uint32_t *abo_surf_offsets = ssbo_surf_offsets + prog->info.num_ssbos; - - for (int i = 0; i < prog->info.num_abos; i++) { - struct gl_buffer_binding *binding = - &ctx->AtomicBufferBindings[prog->sh.AtomicBuffers[i]->Binding]; - upload_buffer_surface(brw, binding, &abo_surf_offsets[i], - ISL_FORMAT_RAW, RELOC_WRITE); - } - - for (int i = 0; i < prog->info.num_ssbos; i++) { - struct gl_buffer_binding *binding = - &ctx->ShaderStorageBufferBindings[prog->sh.ShaderStorageBlocks[i]->Binding]; - - upload_buffer_surface(brw, binding, &ssbo_surf_offsets[i], - ISL_FORMAT_RAW, RELOC_WRITE); - } - } - - stage_state->push_constants_dirty = true; - brw->ctx.NewDriverState |= BRW_NEW_SURFACES; -} - -static void -brw_upload_wm_ubo_surfaces(struct brw_context *brw) -{ - struct gl_context *ctx = &brw->ctx; - /* _NEW_PROGRAM */ - struct gl_program *prog = ctx->FragmentProgram._Current; - - /* BRW_NEW_FS_PROG_DATA */ - brw_upload_ubo_surfaces(brw, prog, &brw->wm.base, brw->wm.base.prog_data); -} - -const struct brw_tracked_state brw_wm_ubo_surfaces = { - .dirty = { - .mesa = _NEW_PROGRAM, - .brw = BRW_NEW_BATCH | - BRW_NEW_FS_PROG_DATA | - BRW_NEW_UNIFORM_BUFFER, - }, - .emit = brw_upload_wm_ubo_surfaces, -}; - -static void -brw_upload_cs_ubo_surfaces(struct brw_context *brw) -{ - struct gl_context *ctx = &brw->ctx; - /* _NEW_PROGRAM */ - struct gl_program *prog = - ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE]; - - /* BRW_NEW_CS_PROG_DATA */ - brw_upload_ubo_surfaces(brw, prog, &brw->cs.base, brw->cs.base.prog_data); -} - -const struct brw_tracked_state brw_cs_ubo_surfaces = { - .dirty = { - .mesa = _NEW_PROGRAM, - .brw = BRW_NEW_BATCH | - BRW_NEW_CS_PROG_DATA | - BRW_NEW_UNIFORM_BUFFER, - }, - .emit = brw_upload_cs_ubo_surfaces, -}; - -static void -brw_upload_cs_image_surfaces(struct brw_context *brw) -{ - /* _NEW_PROGRAM */ - const struct gl_program *cp = brw->programs[MESA_SHADER_COMPUTE]; - - if (cp) { - /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */ - brw_upload_image_surfaces(brw, cp, &brw->cs.base, - brw->cs.base.prog_data); - } -} - -const struct brw_tracked_state brw_cs_image_surfaces = { - .dirty = { - .mesa = _NEW_TEXTURE | _NEW_PROGRAM, - .brw = BRW_NEW_BATCH | - BRW_NEW_CS_PROG_DATA | - BRW_NEW_AUX_STATE | - BRW_NEW_IMAGE_UNITS - }, - .emit = brw_upload_cs_image_surfaces, -}; - -static uint32_t -get_image_format(struct brw_context *brw, mesa_format format, GLenum access) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - enum isl_format hw_format = brw_isl_format_for_mesa_format(format); - if (access == GL_WRITE_ONLY || access == GL_NONE) { - return hw_format; - } else if (isl_has_matching_typed_storage_image_format(devinfo, hw_format)) { - /* Typed surface reads support a very limited subset of the shader - * image formats. Translate it into the closest format the - * hardware supports. - */ - return isl_lower_storage_image_format(devinfo, hw_format); - } else { - /* The hardware doesn't actually support a typed format that we can use - * so we have to fall back to untyped read/write messages. - */ - return ISL_FORMAT_RAW; - } -} - -static void -update_default_image_param(struct brw_context *brw, - struct gl_image_unit *u, - struct brw_image_param *param) -{ - memset(param, 0, sizeof(*param)); - /* Set the swizzling shifts to all-ones to effectively disable swizzling -- - * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more - * detailed explanation of these parameters. - */ - param->swizzling[0] = 0xff; - param->swizzling[1] = 0xff; -} - -static void -update_buffer_image_param(struct brw_context *brw, - struct gl_image_unit *u, - struct brw_image_param *param) -{ - const unsigned size = buffer_texture_range_size(brw, u->TexObj); - update_default_image_param(brw, u, param); - - param->size[0] = size / _mesa_get_format_bytes(u->_ActualFormat); - param->stride[0] = _mesa_get_format_bytes(u->_ActualFormat); -} - -static void -update_image_surface(struct brw_context *brw, - struct gl_image_unit *u, - GLenum access, - uint32_t *surf_offset, - struct brw_image_param *param) -{ - if (_mesa_is_image_unit_valid(&brw->ctx, u)) { - struct gl_texture_object *obj = u->TexObj; - const unsigned format = get_image_format(brw, u->_ActualFormat, access); - const bool written = (access != GL_READ_ONLY && access != GL_NONE); - - if (obj->Target == GL_TEXTURE_BUFFER) { - const unsigned texel_size = (format == ISL_FORMAT_RAW ? 1 : - _mesa_get_format_bytes(u->_ActualFormat)); - const unsigned buffer_size = buffer_texture_range_size(brw, obj); - struct brw_bo *const bo = !obj->BufferObject ? NULL : - brw_bufferobj_buffer(brw, brw_buffer_object(obj->BufferObject), - obj->BufferOffset, buffer_size, written); - - brw_emit_buffer_surface_state( - brw, surf_offset, bo, obj->BufferOffset, - format, buffer_size, texel_size, - written ? RELOC_WRITE : 0); - - update_buffer_image_param(brw, u, param); - - } else { - struct brw_texture_object *intel_obj = brw_texture_object(obj); - struct brw_mipmap_tree *mt = intel_obj->mt; - - unsigned base_layer, num_layers; - if (u->Layered) { - if (obj->Target == GL_TEXTURE_3D) { - base_layer = 0; - num_layers = minify(mt->surf.logical_level0_px.depth, u->Level); - } else { - assert(obj->Immutable || obj->Attrib.MinLayer == 0); - base_layer = obj->Attrib.MinLayer; - num_layers = obj->Immutable ? - obj->Attrib.NumLayers : - mt->surf.logical_level0_px.array_len; - } - } else { - base_layer = obj->Attrib.MinLayer + u->_Layer; - num_layers = 1; - } - - struct isl_view view = { - .format = format, - .base_level = obj->Attrib.MinLevel + u->Level, - .levels = 1, - .base_array_layer = base_layer, - .array_len = num_layers, - .swizzle = ISL_SWIZZLE_IDENTITY, - .usage = ISL_SURF_USAGE_STORAGE_BIT, - }; - - if (format == ISL_FORMAT_RAW) { - brw_emit_buffer_surface_state( - brw, surf_offset, mt->bo, mt->offset, - format, mt->bo->size - mt->offset, 1 /* pitch */, - written ? RELOC_WRITE : 0); - - } else { - const int surf_index = surf_offset - &brw->wm.base.surf_offset[0]; - assert(!brw_miptree_has_color_unresolved(mt, - view.base_level, 1, - view.base_array_layer, - view.array_len)); - brw_emit_surface_state(brw, mt, mt->target, view, - ISL_AUX_USAGE_NONE, - surf_offset, surf_index, - written ? RELOC_WRITE : 0); - } - - isl_surf_fill_image_param(&brw->isl_dev, param, &mt->surf, &view); - } - - } else { - emit_null_surface_state(brw, NULL, surf_offset); - update_default_image_param(brw, u, param); - } -} - -void -brw_upload_image_surfaces(struct brw_context *brw, - const struct gl_program *prog, - struct brw_stage_state *stage_state, - struct brw_stage_prog_data *prog_data) -{ - assert(prog); - struct gl_context *ctx = &brw->ctx; - - if (prog->info.num_images) { - for (unsigned i = 0; i < prog->info.num_images; i++) { - struct gl_image_unit *u = &ctx->ImageUnits[prog->sh.ImageUnits[i]]; - const unsigned surf_idx = prog_data->binding_table.image_start + i; - - update_image_surface(brw, u, prog->sh.ImageAccess[i], - &stage_state->surf_offset[surf_idx], - &stage_state->image_param[i]); - } - - brw->ctx.NewDriverState |= BRW_NEW_SURFACES; - /* This may have changed the image metadata dependent on the context - * image unit state and passed to the program as uniforms, make sure - * that push and pull constants are reuploaded. - */ - brw->NewGLState |= _NEW_PROGRAM_CONSTANTS; - } -} - -static void -brw_upload_wm_image_surfaces(struct brw_context *brw) -{ - /* BRW_NEW_FRAGMENT_PROGRAM */ - const struct gl_program *wm = brw->programs[MESA_SHADER_FRAGMENT]; - - if (wm) { - /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */ - brw_upload_image_surfaces(brw, wm, &brw->wm.base, - brw->wm.base.prog_data); - } -} - -const struct brw_tracked_state brw_wm_image_surfaces = { - .dirty = { - .mesa = _NEW_TEXTURE, - .brw = BRW_NEW_BATCH | - BRW_NEW_AUX_STATE | - BRW_NEW_FRAGMENT_PROGRAM | - BRW_NEW_FS_PROG_DATA | - BRW_NEW_IMAGE_UNITS - }, - .emit = brw_upload_wm_image_surfaces, -}; - -static void -brw_upload_cs_work_groups_surface(struct brw_context *brw) -{ - struct gl_context *ctx = &brw->ctx; - /* _NEW_PROGRAM */ - struct gl_program *prog = - ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE]; - /* BRW_NEW_CS_PROG_DATA */ - const struct brw_cs_prog_data *cs_prog_data = - brw_cs_prog_data(brw->cs.base.prog_data); - - if (prog && cs_prog_data->uses_num_work_groups) { - const unsigned surf_idx = - cs_prog_data->binding_table.work_groups_start; - uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx]; - struct brw_bo *bo; - uint32_t bo_offset; - - if (brw->compute.num_work_groups_bo == NULL) { - bo = NULL; - brw_upload_data(&brw->upload, - (void *)brw->compute.num_work_groups, - 3 * sizeof(GLuint), - sizeof(GLuint), - &bo, - &bo_offset); - } else { - bo = brw->compute.num_work_groups_bo; - bo_offset = brw->compute.num_work_groups_offset; - } - - brw_emit_buffer_surface_state(brw, surf_offset, - bo, bo_offset, - ISL_FORMAT_RAW, - 3 * sizeof(GLuint), 1, - RELOC_WRITE); - - /* The state buffer now holds a reference to our upload, drop ours. */ - if (bo != brw->compute.num_work_groups_bo) - brw_bo_unreference(bo); - - brw->ctx.NewDriverState |= BRW_NEW_SURFACES; - } -} - -const struct brw_tracked_state brw_cs_work_groups_surface = { - .dirty = { - .brw = BRW_NEW_CS_PROG_DATA | - BRW_NEW_CS_WORK_GROUPS - }, - .emit = brw_upload_cs_work_groups_surface, -}; diff --git a/src/mesa/drivers/dri/i965/genX_blorp_exec.c b/src/mesa/drivers/dri/i965/genX_blorp_exec.c deleted file mode 100644 index aed53d9..0000000 --- a/src/mesa/drivers/dri/i965/genX_blorp_exec.c +++ /dev/null @@ -1,412 +0,0 @@ -/* - * Copyright © 2011 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include - -#include "brw_batch.h" -#include "brw_mipmap_tree.h" -#include "brw_fbo.h" - -#include "brw_context.h" -#include "brw_state.h" - -#include "blorp/blorp_genX_exec.h" - -#if GFX_VER <= 5 -#include "gfx4_blorp_exec.h" -#endif - -#include "brw_blorp.h" - -static void blorp_measure_start(struct blorp_batch *batch, - const struct blorp_params *params) { } - -static void * -blorp_emit_dwords(struct blorp_batch *batch, unsigned n) -{ - assert(batch->blorp->driver_ctx == batch->driver_batch); - struct brw_context *brw = batch->driver_batch; - - brw_batch_begin(brw, n); - uint32_t *map = brw->batch.map_next; - brw->batch.map_next += n; - brw_batch_advance(brw); - return map; -} - -static uint64_t -blorp_emit_reloc(struct blorp_batch *batch, - void *location, struct blorp_address address, uint32_t delta) -{ - assert(batch->blorp->driver_ctx == batch->driver_batch); - struct brw_context *brw = batch->driver_batch; - uint32_t offset; - - if (GFX_VER < 6 && brw_ptr_in_state_buffer(&brw->batch, location)) { - offset = (char *)location - (char *)brw->batch.state.map; - return brw_state_reloc(&brw->batch, offset, - address.buffer, address.offset + delta, - address.reloc_flags); - } - - assert(!brw_ptr_in_state_buffer(&brw->batch, location)); - - offset = (char *)location - (char *)brw->batch.batch.map; - return brw_batch_reloc(&brw->batch, offset, - address.buffer, address.offset + delta, - address.reloc_flags); -} - -static void -blorp_surface_reloc(struct blorp_batch *batch, uint32_t ss_offset, - struct blorp_address address, uint32_t delta) -{ - assert(batch->blorp->driver_ctx == batch->driver_batch); - struct brw_context *brw = batch->driver_batch; - struct brw_bo *bo = address.buffer; - - uint64_t reloc_val = - brw_state_reloc(&brw->batch, ss_offset, bo, address.offset + delta, - address.reloc_flags); - - void *reloc_ptr = (void *)brw->batch.state.map + ss_offset; -#if GFX_VER >= 8 - *(uint64_t *)reloc_ptr = reloc_val; -#else - *(uint32_t *)reloc_ptr = reloc_val; -#endif -} - -static uint64_t -blorp_get_surface_address(UNUSED struct blorp_batch *blorp_batch, - UNUSED struct blorp_address address) -{ - /* We'll let blorp_surface_reloc write the address. */ - return 0ull; -} - -#if GFX_VER >= 7 && GFX_VER < 10 -static struct blorp_address -blorp_get_surface_base_address(struct blorp_batch *batch) -{ - assert(batch->blorp->driver_ctx == batch->driver_batch); - struct brw_context *brw = batch->driver_batch; - return (struct blorp_address) { - .buffer = brw->batch.state.bo, - .offset = 0, - }; -} -#endif - -static void * -blorp_alloc_dynamic_state(struct blorp_batch *batch, - uint32_t size, - uint32_t alignment, - uint32_t *offset) -{ - assert(batch->blorp->driver_ctx == batch->driver_batch); - struct brw_context *brw = batch->driver_batch; - - return brw_state_batch(brw, size, alignment, offset); -} - -UNUSED static void * -blorp_alloc_general_state(struct blorp_batch *blorp_batch, - uint32_t size, - uint32_t alignment, - uint32_t *offset) -{ - /* Use dynamic state range for general state on i965. */ - return blorp_alloc_dynamic_state(blorp_batch, size, alignment, offset); -} - -static void -blorp_alloc_binding_table(struct blorp_batch *batch, unsigned num_entries, - unsigned state_size, unsigned state_alignment, - uint32_t *bt_offset, uint32_t *surface_offsets, - void **surface_maps) -{ - assert(batch->blorp->driver_ctx == batch->driver_batch); - struct brw_context *brw = batch->driver_batch; - - uint32_t *bt_map = brw_state_batch(brw, - num_entries * sizeof(uint32_t), 32, - bt_offset); - - for (unsigned i = 0; i < num_entries; i++) { - surface_maps[i] = brw_state_batch(brw, - state_size, state_alignment, - &(surface_offsets)[i]); - bt_map[i] = surface_offsets[i]; - } -} - -static void * -blorp_alloc_vertex_buffer(struct blorp_batch *batch, uint32_t size, - struct blorp_address *addr) -{ - assert(batch->blorp->driver_ctx == batch->driver_batch); - struct brw_context *brw = batch->driver_batch; - - /* From the Skylake PRM, 3DSTATE_VERTEX_BUFFERS: - * - * "The VF cache needs to be invalidated before binding and then using - * Vertex Buffers that overlap with any previously bound Vertex Buffer - * (at a 64B granularity) since the last invalidation. A VF cache - * invalidate is performed by setting the "VF Cache Invalidation Enable" - * bit in PIPE_CONTROL." - * - * This restriction first appears in the Skylake PRM but the internal docs - * also list it as being an issue on Broadwell. In order to avoid this - * problem, we align all vertex buffer allocations to 64 bytes. - */ - uint32_t offset; - void *data = brw_state_batch(brw, size, 64, &offset); - - *addr = (struct blorp_address) { - .buffer = brw->batch.state.bo, - .offset = offset, - - /* The VF cache designers apparently cut corners, and made the cache - * only consider the bottom 32 bits of memory addresses. If you happen - * to have two vertex buffers which get placed exactly 4 GiB apart and - * use them in back-to-back draw calls, you can get collisions. To work - * around this problem, we restrict vertex buffers to the low 32 bits of - * the address space. - */ - .reloc_flags = RELOC_32BIT, - - .mocs = brw_mocs(&brw->isl_dev, brw->batch.state.bo), - }; - - return data; -} - -/** - * See vf_invalidate_for_vb_48b_transitions in genX_state_upload.c. - */ -static void -blorp_vf_invalidate_for_vb_48b_transitions(UNUSED struct blorp_batch *batch, - UNUSED const struct blorp_address *addrs, - UNUSED uint32_t *sizes, - UNUSED unsigned num_vbs) -{ -#if GFX_VER >= 8 && GFX_VER < 11 - struct brw_context *brw = batch->driver_batch; - bool need_invalidate = false; - - for (unsigned i = 0; i < num_vbs; i++) { - struct brw_bo *bo = addrs[i].buffer; - uint16_t high_bits = - bo && (bo->kflags & EXEC_OBJECT_PINNED) ? bo->gtt_offset >> 32u : 0; - - if (high_bits != brw->vb.last_bo_high_bits[i]) { - need_invalidate = true; - brw->vb.last_bo_high_bits[i] = high_bits; - } - } - - if (need_invalidate) { - brw_emit_pipe_control_flush(brw, PIPE_CONTROL_VF_CACHE_INVALIDATE | PIPE_CONTROL_CS_STALL); - } -#endif -} - -UNUSED static struct blorp_address -blorp_get_workaround_address(struct blorp_batch *batch) -{ - assert(batch->blorp->driver_ctx == batch->driver_batch); - struct brw_context *brw = batch->driver_batch; - - return (struct blorp_address) { - .buffer = brw->workaround_bo, - .offset = brw->workaround_bo_offset, - }; -} - -static void -blorp_flush_range(UNUSED struct blorp_batch *batch, UNUSED void *start, - UNUSED size_t size) -{ - /* All allocated states come from the batch which we will flush before we - * submit it. There's nothing for us to do here. - */ -} - -#if GFX_VER >= 7 -static const struct intel_l3_config * -blorp_get_l3_config(struct blorp_batch *batch) -{ - assert(batch->blorp->driver_ctx == batch->driver_batch); - struct brw_context *brw = batch->driver_batch; - - return brw->l3.config; -} -#else /* GFX_VER < 7 */ -static void -blorp_emit_urb_config(struct blorp_batch *batch, - unsigned vs_entry_size, - UNUSED unsigned sf_entry_size) -{ - assert(batch->blorp->driver_ctx == batch->driver_batch); - struct brw_context *brw = batch->driver_batch; - -#if GFX_VER == 6 - gfx6_upload_urb(brw, vs_entry_size, false, 0); -#else - /* We calculate it now and emit later. */ - brw_calculate_urb_fence(brw, 0, vs_entry_size, sf_entry_size); -#endif -} -#endif - -void -genX(blorp_exec)(struct blorp_batch *batch, - const struct blorp_params *params) -{ - assert(batch->blorp->driver_ctx == batch->driver_batch); - struct brw_context *brw = batch->driver_batch; - struct gl_context *ctx = &brw->ctx; - bool check_aperture_failed_once = false; - -#if GFX_VER >= 11 - /* The PIPE_CONTROL command description says: - * - * "Whenever a Binding Table Index (BTI) used by a Render Taget Message - * points to a different RENDER_SURFACE_STATE, SW must issue a Render - * Target Cache Flush by enabling this bit. When render target flush - * is set due to new association of BTI, PS Scoreboard Stall bit must - * be set in this packet." - */ - brw_emit_pipe_control_flush(brw, - PIPE_CONTROL_RENDER_TARGET_FLUSH | - PIPE_CONTROL_STALL_AT_SCOREBOARD); -#endif - - /* Flush the sampler and render caches. We definitely need to flush the - * sampler cache so that we get updated contents from the render cache for - * the glBlitFramebuffer() source. Also, we are sometimes warned in the - * docs to flush the cache between reinterpretations of the same surface - * data with different formats, which blorp does for stencil and depth - * data. - */ - if (params->src.enabled) - brw_cache_flush_for_read(brw, params->src.addr.buffer); - if (params->dst.enabled) { - brw_cache_flush_for_render(brw, params->dst.addr.buffer, - params->dst.view.format, - params->dst.aux_usage); - } - if (params->depth.enabled) - brw_cache_flush_for_depth(brw, params->depth.addr.buffer); - if (params->stencil.enabled) - brw_cache_flush_for_depth(brw, params->stencil.addr.buffer); - - brw_select_pipeline(brw, BRW_RENDER_PIPELINE); - brw_emit_l3_state(brw); - -retry: - brw_batch_require_space(brw, 1400); - brw_require_statebuffer_space(brw, 600); - brw_batch_save_state(brw); - check_aperture_failed_once |= brw_batch_saved_state_is_empty(brw); - brw->batch.no_wrap = true; - -#if GFX_VER == 6 - /* Emit workaround flushes when we switch from drawing to blorping. */ - brw_emit_post_sync_nonzero_flush(brw); -#endif - - brw->vtbl.emit_state_base_address(brw); - -#if GFX_VER >= 8 - gfx7_l3_state.emit(brw); -#endif - -#if GFX_VER >= 6 - brw_emit_depth_stall_flushes(brw); -#endif - -#if GFX_VER == 8 - gfx8_write_pma_stall_bits(brw, 0); -#endif - - const unsigned scale = params->fast_clear_op ? UINT_MAX : 1; - if (brw->current_hash_scale != scale) { - brw_emit_hashing_mode(brw, params->x1 - params->x0, - params->y1 - params->y0, scale); - } - - blorp_emit(batch, GENX(3DSTATE_DRAWING_RECTANGLE), rect) { - rect.ClippedDrawingRectangleXMax = MAX2(params->x1, params->x0) - 1; - rect.ClippedDrawingRectangleYMax = MAX2(params->y1, params->y0) - 1; - } - - blorp_exec(batch, params); - - brw->batch.no_wrap = false; - - /* Check if the blorp op we just did would make our batch likely to fail to - * map all the BOs into the GPU at batch exec time later. If so, flush the - * batch and try again with nothing else in the batch. - */ - if (!brw_batch_has_aperture_space(brw, 0)) { - if (!check_aperture_failed_once) { - check_aperture_failed_once = true; - brw_batch_reset_to_saved(brw); - brw_batch_flush(brw); - goto retry; - } else { - int ret = brw_batch_flush(brw); - WARN_ONCE(ret == -ENOSPC, - "i965: blorp emit exceeded available aperture space\n"); - } - } - - if (unlikely(brw->always_flush_batch)) - brw_batch_flush(brw); - - /* We've smashed all state compared to what the normal 3D pipeline - * rendering tracks for GL. - */ - brw->ctx.NewDriverState |= BRW_NEW_BLORP; - brw->no_depth_or_stencil = !params->depth.enabled && - !params->stencil.enabled; - brw->ib.index_size = -1; - brw->urb.vsize = 0; - brw->urb.gs_present = false; - brw->urb.gsize = 0; - brw->urb.tess_present = false; - brw->urb.hsize = 0; - brw->urb.dsize = 0; - - if (params->dst.enabled) { - brw_render_cache_add_bo(brw, params->dst.addr.buffer, - params->dst.view.format, - params->dst.aux_usage); - } - if (params->depth.enabled) - brw_depth_cache_add_bo(brw, params->depth.addr.buffer); - if (params->stencil.enabled) - brw_depth_cache_add_bo(brw, params->stencil.addr.buffer); -} diff --git a/src/mesa/drivers/dri/i965/genX_boilerplate.h b/src/mesa/drivers/dri/i965/genX_boilerplate.h deleted file mode 100644 index 20df05d..0000000 --- a/src/mesa/drivers/dri/i965/genX_boilerplate.h +++ /dev/null @@ -1,160 +0,0 @@ -/* - * Copyright © 2018 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#ifndef GENX_BOILERPLATE_H -#define GENX_BOILERPLATE_H - -#include - -#include "genxml/gen_macros.h" - -#include "brw_context.h" -#include "brw_batch.h" - -UNUSED static void * -emit_dwords(struct brw_context *brw, unsigned n) -{ - brw_batch_begin(brw, n); - uint32_t *map = brw->batch.map_next; - brw->batch.map_next += n; - brw_batch_advance(brw); - return map; -} - -struct brw_address { - struct brw_bo *bo; - unsigned reloc_flags; - uint32_t offset; -}; - -#define __gen_address_type struct brw_address -#define __gen_user_data struct brw_context - -static uint64_t -__gen_combine_address(struct brw_context *brw, void *location, - struct brw_address address, uint32_t delta) -{ - struct brw_batch *batch = &brw->batch; - uint32_t offset; - - if (address.bo == NULL) { - return address.offset + delta; - } else { - if (GFX_VER < 6 && brw_ptr_in_state_buffer(batch, location)) { - offset = (char *) location - (char *) brw->batch.state.map; - return brw_state_reloc(batch, offset, address.bo, - address.offset + delta, - address.reloc_flags); - } - - assert(!brw_ptr_in_state_buffer(batch, location)); - - offset = (char *) location - (char *) brw->batch.batch.map; - return brw_batch_reloc(batch, offset, address.bo, - address.offset + delta, - address.reloc_flags); - } -} - -UNUSED static struct brw_address -rw_bo(struct brw_bo *bo, uint32_t offset) -{ - return (struct brw_address) { - .bo = bo, - .offset = offset, - .reloc_flags = RELOC_WRITE, - }; -} - -UNUSED static struct brw_address -ro_bo(struct brw_bo *bo, uint32_t offset) -{ - return (struct brw_address) { - .bo = bo, - .offset = offset, - }; -} - -UNUSED static struct brw_address -rw_32_bo(struct brw_bo *bo, uint32_t offset) -{ - return (struct brw_address) { - .bo = bo, - .offset = offset, - .reloc_flags = RELOC_WRITE | RELOC_32BIT, - }; -} - -UNUSED static struct brw_address -ro_32_bo(struct brw_bo *bo, uint32_t offset) -{ - return (struct brw_address) { - .bo = bo, - .offset = offset, - .reloc_flags = RELOC_32BIT, - }; -} - -UNUSED static struct brw_address -ggtt_bo(struct brw_bo *bo, uint32_t offset) -{ - return (struct brw_address) { - .bo = bo, - .offset = offset, - .reloc_flags = RELOC_WRITE | RELOC_NEEDS_GGTT, - }; -} - -#include "genxml/genX_pack.h" - -#define _brw_cmd_length(cmd) cmd ## _length -#define _brw_cmd_length_bias(cmd) cmd ## _length_bias -#define _brw_cmd_header(cmd) cmd ## _header -#define _brw_cmd_pack(cmd) cmd ## _pack - -#define brw_batch_emit(brw, cmd, name) \ - for (struct cmd name = { _brw_cmd_header(cmd) }, \ - *_dst = emit_dwords(brw, _brw_cmd_length(cmd)); \ - __builtin_expect(_dst != NULL, 1); \ - _brw_cmd_pack(cmd)(brw, (void *)_dst, &name), \ - _dst = NULL) - -#define brw_batch_emitn(brw, cmd, n, ...) ({ \ - uint32_t *_dw = emit_dwords(brw, n); \ - struct cmd template = { \ - _brw_cmd_header(cmd), \ - .DWordLength = n - _brw_cmd_length_bias(cmd), \ - __VA_ARGS__ \ - }; \ - _brw_cmd_pack(cmd)(brw, _dw, &template); \ - _dw + 1; /* Array starts at dw[1] */ \ - }) - -#define brw_state_emit(brw, cmd, align, offset, name) \ - for (struct cmd name = {}, \ - *_dst = brw_state_batch(brw, _brw_cmd_length(cmd) * 4, \ - align, offset); \ - __builtin_expect(_dst != NULL, 1); \ - _brw_cmd_pack(cmd)(brw, (void *)_dst, &name), \ - _dst = NULL) - -#endif diff --git a/src/mesa/drivers/dri/i965/genX_pipe_control.c b/src/mesa/drivers/dri/i965/genX_pipe_control.c deleted file mode 100644 index 880b7c7..0000000 --- a/src/mesa/drivers/dri/i965/genX_pipe_control.c +++ /dev/null @@ -1,514 +0,0 @@ -/* - * Copyright © 2017 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "genX_boilerplate.h" -#include "brw_defines.h" -#include "brw_state.h" - -static unsigned -flags_to_post_sync_op(uint32_t flags) -{ - if (flags & PIPE_CONTROL_WRITE_IMMEDIATE) - return WriteImmediateData; - - if (flags & PIPE_CONTROL_WRITE_DEPTH_COUNT) - return WritePSDepthCount; - - if (flags & PIPE_CONTROL_WRITE_TIMESTAMP) - return WriteTimestamp; - - return 0; -} - -/** - * Do the given flags have a Post Sync or LRI Post Sync operation? - */ -static enum pipe_control_flags -get_post_sync_flags(enum pipe_control_flags flags) -{ - flags &= PIPE_CONTROL_WRITE_IMMEDIATE | - PIPE_CONTROL_WRITE_DEPTH_COUNT | - PIPE_CONTROL_WRITE_TIMESTAMP | - PIPE_CONTROL_LRI_POST_SYNC_OP; - - /* Only one "Post Sync Op" is allowed, and it's mutually exclusive with - * "LRI Post Sync Operation". So more than one bit set would be illegal. - */ - assert(util_bitcount(flags) <= 1); - - return flags; -} - -#define IS_COMPUTE_PIPELINE(brw) \ - (GFX_VER >= 7 && brw->last_pipeline == BRW_COMPUTE_PIPELINE) - -/* Closed interval - GFX_VER \in [x, y] */ -#define IS_GFX_VER_BETWEEN(x, y) (GFX_VER >= x && GFX_VER <= y) -#define IS_GFX_VERx10_BETWEEN(x, y) \ - (GFX_VERx10 >= x && GFX_VERx10 <= y) - -/** - * Emit a series of PIPE_CONTROL commands, taking into account any - * workarounds necessary to actually accomplish the caller's request. - * - * Unless otherwise noted, spec quotations in this function come from: - * - * Synchronization of the 3D Pipeline > PIPE_CONTROL Command > Programming - * Restrictions for PIPE_CONTROL. - * - * You should not use this function directly. Use the helpers in - * brw_pipe_control.c instead, which may split the pipe control further. - */ -void -genX(emit_raw_pipe_control)(struct brw_context *brw, uint32_t flags, - struct brw_bo *bo, uint32_t offset, uint64_t imm) -{ - UNUSED const struct intel_device_info *devinfo = &brw->screen->devinfo; - enum pipe_control_flags post_sync_flags = get_post_sync_flags(flags); - enum pipe_control_flags non_lri_post_sync_flags = - post_sync_flags & ~PIPE_CONTROL_LRI_POST_SYNC_OP; - - /* Recursive PIPE_CONTROL workarounds -------------------------------- - * (http://knowyourmeme.com/memes/xzibit-yo-dawg) - * - * We do these first because we want to look at the original operation, - * rather than any workarounds we set. - */ - if (GFX_VER == 6 && (flags & PIPE_CONTROL_RENDER_TARGET_FLUSH)) { - /* Hardware workaround: SNB B-Spec says: - * - * "[Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush - * Enable = 1, a PIPE_CONTROL with any non-zero post-sync-op is - * required." - */ - brw_emit_post_sync_nonzero_flush(brw); - } - - if (GFX_VER == 9 && (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE)) { - /* The PIPE_CONTROL "VF Cache Invalidation Enable" bit description - * lists several workarounds: - * - * "Project: SKL, KBL, BXT - * - * If the VF Cache Invalidation Enable is set to a 1 in a - * PIPE_CONTROL, a separate Null PIPE_CONTROL, all bitfields - * sets to 0, with the VF Cache Invalidation Enable set to 0 - * needs to be sent prior to the PIPE_CONTROL with VF Cache - * Invalidation Enable set to a 1." - */ - genX(emit_raw_pipe_control)(brw, 0, NULL, 0, 0); - } - - if (GFX_VER == 9 && IS_COMPUTE_PIPELINE(brw) && post_sync_flags) { - /* Project: SKL / Argument: LRI Post Sync Operation [23] - * - * "PIPECONTROL command with “Command Streamer Stall Enable” must be - * programmed prior to programming a PIPECONTROL command with "LRI - * Post Sync Operation" in GPGPU mode of operation (i.e when - * PIPELINE_SELECT command is set to GPGPU mode of operation)." - * - * The same text exists a few rows below for Post Sync Op. - */ - genX(emit_raw_pipe_control)(brw, PIPE_CONTROL_CS_STALL, NULL, 0, 0); - } - - /* "Flush Types" workarounds --------------------------------------------- - * We do these now because they may add post-sync operations or CS stalls. - */ - - if (IS_GFX_VER_BETWEEN(8, 10) && - (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE)) { - /* Project: BDW, SKL+ (stopping at CNL) / Argument: VF Invalidate - * - * "'Post Sync Operation' must be enabled to 'Write Immediate Data' or - * 'Write PS Depth Count' or 'Write Timestamp'." - */ - if (!bo) { - flags |= PIPE_CONTROL_WRITE_IMMEDIATE; - post_sync_flags |= PIPE_CONTROL_WRITE_IMMEDIATE; - non_lri_post_sync_flags |= PIPE_CONTROL_WRITE_IMMEDIATE; - bo = brw->workaround_bo; - offset = brw->workaround_bo_offset; - } - } - - if (GFX_VERx10 < 75 && (flags & PIPE_CONTROL_DEPTH_STALL)) { - /* Project: PRE-HSW / Argument: Depth Stall - * - * "The following bits must be clear: - * - Render Target Cache Flush Enable ([12] of DW1) - * - Depth Cache Flush Enable ([0] of DW1)" - */ - assert(!(flags & (PIPE_CONTROL_RENDER_TARGET_FLUSH | - PIPE_CONTROL_DEPTH_CACHE_FLUSH))); - } - - if (GFX_VER >= 6 && (flags & PIPE_CONTROL_DEPTH_STALL)) { - /* From the PIPE_CONTROL instruction table, bit 13 (Depth Stall Enable): - * - * "This bit must be DISABLED for operations other than writing - * PS_DEPTH_COUNT." - * - * This seems like nonsense. An Ivybridge workaround requires us to - * emit a PIPE_CONTROL with a depth stall and write immediate post-sync - * operation. Gfx8+ requires us to emit depth stalls and depth cache - * flushes together. So, it's hard to imagine this means anything other - * than "we originally intended this to be used for PS_DEPTH_COUNT". - * - * We ignore the supposed restriction and do nothing. - */ - } - - if (GFX_VERx10 < 75 && (flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH)) { - /* Project: PRE-HSW / Argument: Depth Cache Flush - * - * "Depth Stall must be clear ([13] of DW1)." - */ - assert(!(flags & PIPE_CONTROL_DEPTH_STALL)); - } - - if (flags & (PIPE_CONTROL_RENDER_TARGET_FLUSH | - PIPE_CONTROL_STALL_AT_SCOREBOARD)) { - /* From the PIPE_CONTROL instruction table, bit 12 and bit 1: - * - * "This bit must be DISABLED for End-of-pipe (Read) fences, - * PS_DEPTH_COUNT or TIMESTAMP queries." - * - * TODO: Implement end-of-pipe checking. - */ - assert(!(post_sync_flags & (PIPE_CONTROL_WRITE_DEPTH_COUNT | - PIPE_CONTROL_WRITE_TIMESTAMP))); - } - - if (GFX_VER < 11 && (flags & PIPE_CONTROL_STALL_AT_SCOREBOARD)) { - /* From the PIPE_CONTROL instruction table, bit 1: - * - * "This bit is ignored if Depth Stall Enable is set. - * Further, the render cache is not flushed even if Write Cache - * Flush Enable bit is set." - * - * We assert that the caller doesn't do this combination, to try and - * prevent mistakes. It shouldn't hurt the GPU, though. - * - * We skip this check on Gfx11+ as the "Stall and Pixel Scoreboard" - * and "Render Target Flush" combo is explicitly required for BTI - * update workarounds. - */ - assert(!(flags & (PIPE_CONTROL_DEPTH_STALL | - PIPE_CONTROL_RENDER_TARGET_FLUSH))); - } - - /* PIPE_CONTROL page workarounds ------------------------------------- */ - - if (IS_GFX_VER_BETWEEN(7, 8) && - (flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE)) { - /* From the PIPE_CONTROL page itself: - * - * "IVB, HSW, BDW - * Restriction: Pipe_control with CS-stall bit set must be issued - * before a pipe-control command that has the State Cache - * Invalidate bit set." - */ - flags |= PIPE_CONTROL_CS_STALL; - } - - if (GFX_VERx10 == 75) { - /* From the PIPE_CONTROL page itself: - * - * "HSW - Programming Note: PIPECONTROL with RO Cache Invalidation: - * Prior to programming a PIPECONTROL command with any of the RO - * cache invalidation bit set, program a PIPECONTROL flush command - * with “CS stall” bit and “HDC Flush” bit set." - * - * TODO: Actually implement this. What's an HDC Flush? - */ - } - - if (flags & PIPE_CONTROL_FLUSH_LLC) { - /* From the PIPE_CONTROL instruction table, bit 26 (Flush LLC): - * - * "Project: ALL - * SW must always program Post-Sync Operation to "Write Immediate - * Data" when Flush LLC is set." - * - * For now, we just require the caller to do it. - */ - assert(flags & PIPE_CONTROL_WRITE_IMMEDIATE); - } - - /* "Post-Sync Operation" workarounds -------------------------------- */ - - /* Project: All / Argument: Global Snapshot Count Reset [19] - * - * "This bit must not be exercised on any product. - * Requires stall bit ([20] of DW1) set." - * - * We don't use this, so we just assert that it isn't used. The - * PIPE_CONTROL instruction page indicates that they intended this - * as a debug feature and don't think it is useful in production, - * but it may actually be usable, should we ever want to. - */ - assert((flags & PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET) == 0); - - if (flags & (PIPE_CONTROL_MEDIA_STATE_CLEAR | - PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE)) { - /* Project: All / Arguments: - * - * - Generic Media State Clear [16] - * - Indirect State Pointers Disable [16] - * - * "Requires stall bit ([20] of DW1) set." - * - * Also, the PIPE_CONTROL instruction table, bit 16 (Generic Media - * State Clear) says: - * - * "PIPECONTROL command with “Command Streamer Stall Enable” must be - * programmed prior to programming a PIPECONTROL command with "Media - * State Clear" set in GPGPU mode of operation" - * - * This is a subset of the earlier rule, so there's nothing to do. - */ - flags |= PIPE_CONTROL_CS_STALL; - } - - if (flags & PIPE_CONTROL_STORE_DATA_INDEX) { - /* Project: All / Argument: Store Data Index - * - * "Post-Sync Operation ([15:14] of DW1) must be set to something other - * than '0'." - * - * For now, we just assert that the caller does this. We might want to - * automatically add a write to the workaround BO... - */ - assert(non_lri_post_sync_flags != 0); - } - - if (flags & PIPE_CONTROL_SYNC_GFDT) { - /* Project: All / Argument: Sync GFDT - * - * "Post-Sync Operation ([15:14] of DW1) must be set to something other - * than '0' or 0x2520[13] must be set." - * - * For now, we just assert that the caller does this. - */ - assert(non_lri_post_sync_flags != 0); - } - - if (IS_GFX_VERx10_BETWEEN(60, 75) && - (flags & PIPE_CONTROL_TLB_INVALIDATE)) { - /* Project: SNB, IVB, HSW / Argument: TLB inv - * - * "{All SKUs}{All Steppings}: Post-Sync Operation ([15:14] of DW1) - * must be set to something other than '0'." - * - * For now, we just assert that the caller does this. - */ - assert(non_lri_post_sync_flags != 0); - } - - if (GFX_VER >= 7 && (flags & PIPE_CONTROL_TLB_INVALIDATE)) { - /* Project: IVB+ / Argument: TLB inv - * - * "Requires stall bit ([20] of DW1) set." - * - * Also, from the PIPE_CONTROL instruction table: - * - * "Project: SKL+ - * Post Sync Operation or CS stall must be set to ensure a TLB - * invalidation occurs. Otherwise no cycle will occur to the TLB - * cache to invalidate." - * - * This is not a subset of the earlier rule, so there's nothing to do. - */ - flags |= PIPE_CONTROL_CS_STALL; - } - - if (GFX_VER == 9 && devinfo->gt == 4) { - /* TODO: The big Skylake GT4 post sync op workaround */ - } - - /* "GPGPU specific workarounds" (both post-sync and flush) ------------ */ - - if (IS_COMPUTE_PIPELINE(brw)) { - if (GFX_VER >= 9 && (flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE)) { - /* Project: SKL+ / Argument: Tex Invalidate - * "Requires stall bit ([20] of DW) set for all GPGPU Workloads." - */ - flags |= PIPE_CONTROL_CS_STALL; - } - - if (GFX_VER == 8 && (post_sync_flags || - (flags & (PIPE_CONTROL_NOTIFY_ENABLE | - PIPE_CONTROL_DEPTH_STALL | - PIPE_CONTROL_RENDER_TARGET_FLUSH | - PIPE_CONTROL_DEPTH_CACHE_FLUSH | - PIPE_CONTROL_DATA_CACHE_FLUSH)))) { - /* Project: BDW / Arguments: - * - * - LRI Post Sync Operation [23] - * - Post Sync Op [15:14] - * - Notify En [8] - * - Depth Stall [13] - * - Render Target Cache Flush [12] - * - Depth Cache Flush [0] - * - DC Flush Enable [5] - * - * "Requires stall bit ([20] of DW) set for all GPGPU and Media - * Workloads." - * - * (The docs have separate table rows for each bit, with essentially - * the same workaround text. We've combined them here.) - */ - flags |= PIPE_CONTROL_CS_STALL; - - /* Also, from the PIPE_CONTROL instruction table, bit 20: - * - * "Project: BDW - * This bit must be always set when PIPE_CONTROL command is - * programmed by GPGPU and MEDIA workloads, except for the cases - * when only Read Only Cache Invalidation bits are set (State - * Cache Invalidation Enable, Instruction cache Invalidation - * Enable, Texture Cache Invalidation Enable, Constant Cache - * Invalidation Enable). This is to WA FFDOP CG issue, this WA - * need not implemented when FF_DOP_CG is disable via "Fixed - * Function DOP Clock Gate Disable" bit in RC_PSMI_CTRL register." - * - * It sounds like we could avoid CS stalls in some cases, but we - * don't currently bother. This list isn't exactly the list above, - * either... - */ - } - } - - /* Implement the WaCsStallAtEveryFourthPipecontrol workaround on IVB, BYT: - * - * "Every 4th PIPE_CONTROL command, not counting the PIPE_CONTROL with - * only read-cache-invalidate bit(s) set, must have a CS_STALL bit set." - * - * Note that the kernel does CS stalls between batches, so we only need - * to count them within a batch. We currently naively count every 4, and - * don't skip the ones with only read-cache-invalidate bits set. This - * may or may not be a problem... - */ - if (GFX_VERx10 == 70) { - if (flags & PIPE_CONTROL_CS_STALL) { - /* If we're doing a CS stall, reset the counter and carry on. */ - brw->pipe_controls_since_last_cs_stall = 0; - } - - /* If this is the fourth pipe control without a CS stall, do one now. */ - if (++brw->pipe_controls_since_last_cs_stall == 4) { - brw->pipe_controls_since_last_cs_stall = 0; - flags |= PIPE_CONTROL_CS_STALL; - } - } - - /* "Stall" workarounds ---------------------------------------------- - * These have to come after the earlier ones because we may have added - * some additional CS stalls above. - */ - - if (GFX_VER < 9 && (flags & PIPE_CONTROL_CS_STALL)) { - /* Project: PRE-SKL, VLV, CHV - * - * "[All Stepping][All SKUs]: - * - * One of the following must also be set: - * - * - Render Target Cache Flush Enable ([12] of DW1) - * - Depth Cache Flush Enable ([0] of DW1) - * - Stall at Pixel Scoreboard ([1] of DW1) - * - Depth Stall ([13] of DW1) - * - Post-Sync Operation ([13] of DW1) - * - DC Flush Enable ([5] of DW1)" - * - * If we don't already have one of those bits set, we choose to add - * "Stall at Pixel Scoreboard". Some of the other bits require a - * CS stall as a workaround (see above), which would send us into - * an infinite recursion of PIPE_CONTROLs. "Stall at Pixel Scoreboard" - * appears to be safe, so we choose that. - */ - const uint32_t wa_bits = PIPE_CONTROL_RENDER_TARGET_FLUSH | - PIPE_CONTROL_DEPTH_CACHE_FLUSH | - PIPE_CONTROL_WRITE_IMMEDIATE | - PIPE_CONTROL_WRITE_DEPTH_COUNT | - PIPE_CONTROL_WRITE_TIMESTAMP | - PIPE_CONTROL_STALL_AT_SCOREBOARD | - PIPE_CONTROL_DEPTH_STALL | - PIPE_CONTROL_DATA_CACHE_FLUSH; - if (!(flags & wa_bits)) - flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD; - } - - /* Emit --------------------------------------------------------------- */ - - brw_batch_emit(brw, GENX(PIPE_CONTROL), pc) { - #if GFX_VER >= 9 - pc.FlushLLC = 0; - #endif - #if GFX_VER >= 7 - pc.LRIPostSyncOperation = NoLRIOperation; - pc.PipeControlFlushEnable = flags & PIPE_CONTROL_FLUSH_ENABLE; - pc.DCFlushEnable = flags & PIPE_CONTROL_DATA_CACHE_FLUSH; - #endif - #if GFX_VER >= 6 - pc.StoreDataIndex = 0; - pc.CommandStreamerStallEnable = flags & PIPE_CONTROL_CS_STALL; - pc.GlobalSnapshotCountReset = - flags & PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET; - pc.TLBInvalidate = flags & PIPE_CONTROL_TLB_INVALIDATE; - pc.GenericMediaStateClear = flags & PIPE_CONTROL_MEDIA_STATE_CLEAR; - pc.StallAtPixelScoreboard = flags & PIPE_CONTROL_STALL_AT_SCOREBOARD; - pc.RenderTargetCacheFlushEnable = - flags & PIPE_CONTROL_RENDER_TARGET_FLUSH; - pc.DepthCacheFlushEnable = flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH; - pc.StateCacheInvalidationEnable = - flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE; - pc.VFCacheInvalidationEnable = flags & PIPE_CONTROL_VF_CACHE_INVALIDATE; - pc.ConstantCacheInvalidationEnable = - flags & PIPE_CONTROL_CONST_CACHE_INVALIDATE; - #else - pc.WriteCacheFlush = flags & PIPE_CONTROL_RENDER_TARGET_FLUSH; - #endif - pc.PostSyncOperation = flags_to_post_sync_op(flags); - pc.DepthStallEnable = flags & PIPE_CONTROL_DEPTH_STALL; - pc.InstructionCacheInvalidateEnable = - flags & PIPE_CONTROL_INSTRUCTION_INVALIDATE; - pc.NotifyEnable = flags & PIPE_CONTROL_NOTIFY_ENABLE; - #if GFX_VERx10 >= 45 - pc.IndirectStatePointersDisable = - flags & PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE; - #endif - #if GFX_VER >= 6 - pc.TextureCacheInvalidationEnable = - flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; - #elif GFX_VER == 5 || GFX_VERx10 == 45 - pc.TextureCacheFlushEnable = - flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; - #endif - pc.Address = ggtt_bo(bo, offset); - if (GFX_VER < 7 && bo) - pc.DestinationAddressType = DAT_GGTT; - pc.ImmediateData = imm; - } -} diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c deleted file mode 100644 index 3db621b..0000000 --- a/src/mesa/drivers/dri/i965/genX_state_upload.c +++ /dev/null @@ -1,6088 +0,0 @@ -/* - * Copyright © 2017 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include - -#include "main/samplerobj.h" - -#include "dev/intel_device_info.h" -#include "common/intel_sample_positions.h" -#include "genxml/gen_macros.h" -#include "common/intel_guardband.h" - -#include "main/bufferobj.h" -#include "main/context.h" -#include "main/enums.h" -#include "main/macros.h" -#include "main/state.h" - -#include "genX_boilerplate.h" - -#include "brw_context.h" -#include "brw_cs.h" -#include "brw_draw.h" -#include "brw_multisample_state.h" -#include "brw_state.h" -#include "brw_wm.h" -#include "brw_util.h" - -#include "brw_batch.h" -#include "brw_buffer_objects.h" -#include "brw_fbo.h" - -#include "main/enums.h" -#include "main/fbobject.h" -#include "main/framebuffer.h" -#include "main/glformats.h" -#include "main/shaderapi.h" -#include "main/stencil.h" -#include "main/transformfeedback.h" -#include "main/varray.h" -#include "main/viewport.h" -#include "util/half_float.h" - -#if GFX_VER == 4 -static struct brw_address -KSP(struct brw_context *brw, uint32_t offset) -{ - return ro_bo(brw->cache.bo, offset); -} -#else -static uint32_t -KSP(UNUSED struct brw_context *brw, uint32_t offset) -{ - return offset; -} -#endif - -#if GFX_VER >= 7 -static void -emit_lrm(struct brw_context *brw, uint32_t reg, struct brw_address addr) -{ - brw_batch_emit(brw, GENX(MI_LOAD_REGISTER_MEM), lrm) { - lrm.RegisterAddress = reg; - lrm.MemoryAddress = addr; - } -} -#endif - -#if GFX_VER == 7 -static void -emit_lri(struct brw_context *brw, uint32_t reg, uint32_t imm) -{ - brw_batch_emit(brw, GENX(MI_LOAD_REGISTER_IMM), lri) { - lri.RegisterOffset = reg; - lri.DataDWord = imm; - } -} -#endif - -/** - * Define the base addresses which some state is referenced from. - * - * This allows us to avoid having to emit relocations for the objects, - * and is actually required for binding table pointers on Gfx6. - * - * Surface state base address covers binding table pointers and surface state - * objects, but not the surfaces that the surface state objects point to. - */ -static void -genX(emit_state_base_address)(struct brw_context *brw) -{ - if (brw->batch.state_base_address_emitted) - return; - - /* FINISHME: According to section 3.6.1 "STATE_BASE_ADDRESS" of - * vol1a of the G45 PRM, MI_FLUSH with the ISC invalidate should be - * programmed prior to STATE_BASE_ADDRESS. - * - * However, given that the instruction SBA (general state base - * address) on this chipset is always set to 0 across X and GL, - * maybe this isn't required for us in particular. - */ - - UNUSED uint32_t mocs = brw_mocs(&brw->isl_dev, NULL); - - /* Flush before updating STATE_BASE_ADDRESS */ -#if GFX_VER >= 6 - const unsigned dc_flush = - GFX_VER >= 7 ? PIPE_CONTROL_DATA_CACHE_FLUSH : 0; - - /* Emit a render target cache flush. - * - * This isn't documented anywhere in the PRM. However, it seems to be - * necessary prior to changing the surface state base adress. We've - * seen issues in Vulkan where we get GPU hangs when using multi-level - * command buffers which clear depth, reset state base address, and then - * go render stuff. - * - * Normally, in GL, we would trust the kernel to do sufficient stalls - * and flushes prior to executing our batch. However, it doesn't seem - * as if the kernel's flushing is always sufficient and we don't want to - * rely on it. - * - * We make this an end-of-pipe sync instead of a normal flush because we - * do not know the current status of the GPU. On Haswell at least, - * having a fast-clear operation in flight at the same time as a normal - * rendering operation can cause hangs. Since the kernel's flushing is - * insufficient, we need to ensure that any rendering operations from - * other processes are definitely complete before we try to do our own - * rendering. It's a bit of a big hammer but it appears to work. - */ - brw_emit_end_of_pipe_sync(brw, - PIPE_CONTROL_RENDER_TARGET_FLUSH | - PIPE_CONTROL_DEPTH_CACHE_FLUSH | - dc_flush); -#endif - - brw_batch_emit(brw, GENX(STATE_BASE_ADDRESS), sba) { - /* Set base addresses */ - sba.GeneralStateBaseAddressModifyEnable = true; - -#if GFX_VER >= 6 - sba.DynamicStateBaseAddressModifyEnable = true; - sba.DynamicStateBaseAddress = ro_bo(brw->batch.state.bo, 0); -#endif - - sba.SurfaceStateBaseAddressModifyEnable = true; - sba.SurfaceStateBaseAddress = ro_bo(brw->batch.state.bo, 0); - - sba.IndirectObjectBaseAddressModifyEnable = true; - -#if GFX_VER >= 5 - sba.InstructionBaseAddressModifyEnable = true; - sba.InstructionBaseAddress = ro_bo(brw->cache.bo, 0); -#endif - - /* Set buffer sizes on Gfx8+ or upper bounds on Gfx4-7 */ -#if GFX_VER >= 8 - sba.GeneralStateBufferSize = 0xfffff; - sba.IndirectObjectBufferSize = 0xfffff; - sba.InstructionBufferSize = 0xfffff; - sba.DynamicStateBufferSize = MAX_STATE_SIZE; - - sba.GeneralStateBufferSizeModifyEnable = true; - sba.DynamicStateBufferSizeModifyEnable = true; - sba.IndirectObjectBufferSizeModifyEnable = true; - sba.InstructionBuffersizeModifyEnable = true; -#else - sba.GeneralStateAccessUpperBoundModifyEnable = true; - sba.IndirectObjectAccessUpperBoundModifyEnable = true; - -#if GFX_VER >= 5 - sba.InstructionAccessUpperBoundModifyEnable = true; -#endif - -#if GFX_VER >= 6 - /* Dynamic state upper bound. Although the documentation says that - * programming it to zero will cause it to be ignored, that is a lie. - * If this isn't programmed to a real bound, the sampler border color - * pointer is rejected, causing border color to mysteriously fail. - */ - sba.DynamicStateAccessUpperBound = ro_bo(NULL, 0xfffff000); - sba.DynamicStateAccessUpperBoundModifyEnable = true; -#else - /* Same idea but using General State Base Address on Gfx4-5 */ - sba.GeneralStateAccessUpperBound = ro_bo(NULL, 0xfffff000); -#endif -#endif - -#if GFX_VER >= 6 - /* The hardware appears to pay attention to the MOCS fields even - * if you don't set the "Address Modify Enable" bit for the base. - */ - sba.GeneralStateMOCS = mocs; - sba.StatelessDataPortAccessMOCS = mocs; - sba.DynamicStateMOCS = mocs; - sba.IndirectObjectMOCS = mocs; - sba.InstructionMOCS = mocs; - sba.SurfaceStateMOCS = mocs; -#endif -#if GFX_VER >= 9 - sba.BindlessSurfaceStateMOCS = mocs; -#endif -#if GFX_VER >= 11 - sba.BindlessSamplerStateMOCS = mocs; -#endif - } - - /* Flush after updating STATE_BASE_ADDRESS */ -#if GFX_VER >= 6 - brw_emit_pipe_control_flush(brw, - PIPE_CONTROL_INSTRUCTION_INVALIDATE | - PIPE_CONTROL_STATE_CACHE_INVALIDATE | - PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE); -#endif - - /* According to section 3.6.1 of VOL1 of the 965 PRM, - * STATE_BASE_ADDRESS updates require a reissue of: - * - * 3DSTATE_PIPELINE_POINTERS - * 3DSTATE_BINDING_TABLE_POINTERS - * MEDIA_STATE_POINTERS - * - * and this continues through Ironlake. The Sandy Bridge PRM, vol - * 1 part 1 says that the folowing packets must be reissued: - * - * 3DSTATE_CC_POINTERS - * 3DSTATE_BINDING_TABLE_POINTERS - * 3DSTATE_SAMPLER_STATE_POINTERS - * 3DSTATE_VIEWPORT_STATE_POINTERS - * MEDIA_STATE_POINTERS - * - * Those are always reissued following SBA updates anyway (new - * batch time), except in the case of the program cache BO - * changing. Having a separate state flag makes the sequence more - * obvious. - */ - brw->ctx.NewDriverState |= BRW_NEW_STATE_BASE_ADDRESS; - brw->batch.state_base_address_emitted = true; -} - -/** - * Polygon stipple packet - */ -static void -genX(upload_polygon_stipple)(struct brw_context *brw) -{ - struct gl_context *ctx = &brw->ctx; - - /* _NEW_POLYGON */ - if (!ctx->Polygon.StippleFlag) - return; - - brw_batch_emit(brw, GENX(3DSTATE_POLY_STIPPLE_PATTERN), poly) { - /* Polygon stipple is provided in OpenGL order, i.e. bottom - * row first. If we're rendering to a window (i.e. the - * default frame buffer object, 0), then we need to invert - * it to match our pixel layout. But if we're rendering - * to a FBO (i.e. any named frame buffer object), we *don't* - * need to invert - we already match the layout. - */ - if (ctx->DrawBuffer->FlipY) { - for (unsigned i = 0; i < 32; i++) - poly.PatternRow[i] = ctx->PolygonStipple[31 - i]; /* invert */ - } else { - for (unsigned i = 0; i < 32; i++) - poly.PatternRow[i] = ctx->PolygonStipple[i]; - } - } -} - -static const struct brw_tracked_state genX(polygon_stipple) = { - .dirty = { - .mesa = _NEW_POLYGON | - _NEW_POLYGONSTIPPLE, - .brw = BRW_NEW_CONTEXT, - }, - .emit = genX(upload_polygon_stipple), -}; - -/** - * Polygon stipple offset packet - */ -static void -genX(upload_polygon_stipple_offset)(struct brw_context *brw) -{ - struct gl_context *ctx = &brw->ctx; - - /* _NEW_POLYGON */ - if (!ctx->Polygon.StippleFlag) - return; - - brw_batch_emit(brw, GENX(3DSTATE_POLY_STIPPLE_OFFSET), poly) { - /* _NEW_BUFFERS - * - * If we're drawing to a system window we have to invert the Y axis - * in order to match the OpenGL pixel coordinate system, and our - * offset must be matched to the window position. If we're drawing - * to a user-created FBO then our native pixel coordinate system - * works just fine, and there's no window system to worry about. - */ - if (ctx->DrawBuffer->FlipY) { - poly.PolygonStippleYOffset = - (32 - (_mesa_geometric_height(ctx->DrawBuffer) & 31)) & 31; - } - } -} - -static const struct brw_tracked_state genX(polygon_stipple_offset) = { - .dirty = { - .mesa = _NEW_BUFFERS | - _NEW_POLYGON, - .brw = BRW_NEW_CONTEXT, - }, - .emit = genX(upload_polygon_stipple_offset), -}; - -/** - * Line stipple packet - */ -static void -genX(upload_line_stipple)(struct brw_context *brw) -{ - struct gl_context *ctx = &brw->ctx; - - if (!ctx->Line.StippleFlag) - return; - - brw_batch_emit(brw, GENX(3DSTATE_LINE_STIPPLE), line) { - line.LineStipplePattern = ctx->Line.StipplePattern; - - line.LineStippleInverseRepeatCount = 1.0f / ctx->Line.StippleFactor; - line.LineStippleRepeatCount = ctx->Line.StippleFactor; - } -} - -static const struct brw_tracked_state genX(line_stipple) = { - .dirty = { - .mesa = _NEW_LINE, - .brw = BRW_NEW_CONTEXT, - }, - .emit = genX(upload_line_stipple), -}; - -/* Constant single cliprect for framebuffer object or DRI2 drawing */ -static void -genX(upload_drawing_rect)(struct brw_context *brw) -{ - struct gl_context *ctx = &brw->ctx; - const struct gl_framebuffer *fb = ctx->DrawBuffer; - const unsigned int fb_width = _mesa_geometric_width(fb); - const unsigned int fb_height = _mesa_geometric_height(fb); - - brw_batch_emit(brw, GENX(3DSTATE_DRAWING_RECTANGLE), rect) { - rect.ClippedDrawingRectangleXMax = fb_width - 1; - rect.ClippedDrawingRectangleYMax = fb_height - 1; - } -} - -static const struct brw_tracked_state genX(drawing_rect) = { - .dirty = { - .mesa = _NEW_BUFFERS, - .brw = BRW_NEW_BLORP | - BRW_NEW_CONTEXT, - }, - .emit = genX(upload_drawing_rect), -}; - -static uint32_t * -genX(emit_vertex_buffer_state)(struct brw_context *brw, - uint32_t *dw, - unsigned buffer_nr, - struct brw_bo *bo, - unsigned start_offset, - UNUSED unsigned end_offset, - unsigned stride, - UNUSED unsigned step_rate) -{ - struct GENX(VERTEX_BUFFER_STATE) buf_state = { - .VertexBufferIndex = buffer_nr, - .BufferPitch = stride, - - /* The VF cache designers apparently cut corners, and made the cache - * only consider the bottom 32 bits of memory addresses. If you happen - * to have two vertex buffers which get placed exactly 4 GiB apart and - * use them in back-to-back draw calls, you can get collisions. To work - * around this problem, we restrict vertex buffers to the low 32 bits of - * the address space. - */ - .BufferStartingAddress = ro_32_bo(bo, start_offset), -#if GFX_VER >= 8 - .BufferSize = end_offset - start_offset, -#endif - -#if GFX_VER >= 7 - .AddressModifyEnable = true, -#endif - -#if GFX_VER >= 6 - .MOCS = brw_mocs(&brw->isl_dev, bo), -#endif - -#if GFX_VER < 8 - .BufferAccessType = step_rate ? INSTANCEDATA : VERTEXDATA, - .InstanceDataStepRate = step_rate, -#if GFX_VER >= 5 - .EndAddress = ro_bo(bo, end_offset - 1), -#endif -#endif - }; - - GENX(VERTEX_BUFFER_STATE_pack)(brw, dw, &buf_state); - return dw + GENX(VERTEX_BUFFER_STATE_length); -} - -UNUSED static bool -is_passthru_format(uint32_t format) -{ - switch (format) { - case ISL_FORMAT_R64_PASSTHRU: - case ISL_FORMAT_R64G64_PASSTHRU: - case ISL_FORMAT_R64G64B64_PASSTHRU: - case ISL_FORMAT_R64G64B64A64_PASSTHRU: - return true; - default: - return false; - } -} - -UNUSED static int -uploads_needed(uint32_t format, - bool is_dual_slot) -{ - if (!is_passthru_format(format)) - return 1; - - if (is_dual_slot) - return 2; - - switch (format) { - case ISL_FORMAT_R64_PASSTHRU: - case ISL_FORMAT_R64G64_PASSTHRU: - return 1; - case ISL_FORMAT_R64G64B64_PASSTHRU: - case ISL_FORMAT_R64G64B64A64_PASSTHRU: - return 2; - default: - unreachable("not reached"); - } -} - -/* - * Returns the format that we are finally going to use when upload a vertex - * element. It will only change if we are using *64*PASSTHRU formats, as for - * gen < 8 they need to be splitted on two *32*FLOAT formats. - * - * @upload points in which upload we are. Valid values are [0,1] - */ -static uint32_t -downsize_format_if_needed(uint32_t format, - int upload) -{ - assert(upload == 0 || upload == 1); - - if (!is_passthru_format(format)) - return format; - - /* ISL_FORMAT_R64_PASSTHRU and ISL_FORMAT_R64G64_PASSTHRU with an upload == - * 1 means that we have been forced to do 2 uploads for a size <= 2. This - * happens with gen < 8 and dvec3 or dvec4 vertex shader input - * variables. In those cases, we return ISL_FORMAT_R32_FLOAT as a way of - * flagging that we want to fill with zeroes this second forced upload. - */ - switch (format) { - case ISL_FORMAT_R64_PASSTHRU: - return upload == 0 ? ISL_FORMAT_R32G32_FLOAT - : ISL_FORMAT_R32_FLOAT; - case ISL_FORMAT_R64G64_PASSTHRU: - return upload == 0 ? ISL_FORMAT_R32G32B32A32_FLOAT - : ISL_FORMAT_R32_FLOAT; - case ISL_FORMAT_R64G64B64_PASSTHRU: - return upload == 0 ? ISL_FORMAT_R32G32B32A32_FLOAT - : ISL_FORMAT_R32G32_FLOAT; - case ISL_FORMAT_R64G64B64A64_PASSTHRU: - return ISL_FORMAT_R32G32B32A32_FLOAT; - default: - unreachable("not reached"); - } -} - -/* - * Returns the number of componentes associated with a format that is used on - * a 64 to 32 format split. See downsize_format() - */ -static int -upload_format_size(uint32_t upload_format) -{ - switch (upload_format) { - case ISL_FORMAT_R32_FLOAT: - - /* downsized_format has returned this one in order to flag that we are - * performing a second upload which we want to have filled with - * zeroes. This happens with gen < 8, a size <= 2, and dvec3 or dvec4 - * vertex shader input variables. - */ - - return 0; - case ISL_FORMAT_R32G32_FLOAT: - return 2; - case ISL_FORMAT_R32G32B32A32_FLOAT: - return 4; - default: - unreachable("not reached"); - } -} - -static UNUSED uint16_t -pinned_bo_high_bits(struct brw_bo *bo) -{ - return (bo->kflags & EXEC_OBJECT_PINNED) ? bo->gtt_offset >> 32ull : 0; -} - -/* The VF cache designers apparently cut corners, and made the cache key's - * tuple only consider the bottom 32 bits - * of the address. If you happen to have two vertex buffers which get placed - * exactly 4 GiB apart and use them in back-to-back draw calls, you can get - * collisions. (These collisions can happen within a single batch.) - * - * In the soft-pin world, we'd like to assign addresses up front, and never - * move buffers. So, we need to do a VF cache invalidate if the buffer for - * a particular VB slot has different [48:32] address bits than the last one. - * - * In the relocation world, we have no idea what the addresses will be, so - * we can't apply this workaround. Instead, we tell the kernel to move it - * to the low 4GB regardless. - * - * This HW issue is gone on Gfx11+. - */ -static void -vf_invalidate_for_vb_48bit_transitions(UNUSED struct brw_context *brw) -{ -#if GFX_VER >= 8 && GFX_VER < 11 - bool need_invalidate = false; - - for (unsigned i = 0; i < brw->vb.nr_buffers; i++) { - uint16_t high_bits = pinned_bo_high_bits(brw->vb.buffers[i].bo); - - if (high_bits != brw->vb.last_bo_high_bits[i]) { - need_invalidate = true; - brw->vb.last_bo_high_bits[i] = high_bits; - } - } - - if (brw->draw.draw_params_bo) { - uint16_t high_bits = pinned_bo_high_bits(brw->draw.draw_params_bo); - - if (brw->vb.last_bo_high_bits[brw->vb.nr_buffers] != high_bits) { - need_invalidate = true; - brw->vb.last_bo_high_bits[brw->vb.nr_buffers] = high_bits; - } - } - - if (brw->draw.derived_draw_params_bo) { - uint16_t high_bits = pinned_bo_high_bits(brw->draw.derived_draw_params_bo); - - if (brw->vb.last_bo_high_bits[brw->vb.nr_buffers + 1] != high_bits) { - need_invalidate = true; - brw->vb.last_bo_high_bits[brw->vb.nr_buffers + 1] = high_bits; - } - } - - if (need_invalidate) { - brw_emit_pipe_control_flush(brw, PIPE_CONTROL_VF_CACHE_INVALIDATE | PIPE_CONTROL_CS_STALL); - } -#endif -} - -static void -vf_invalidate_for_ib_48bit_transition(UNUSED struct brw_context *brw) -{ -#if GFX_VER >= 8 - uint16_t high_bits = pinned_bo_high_bits(brw->ib.bo); - - if (high_bits != brw->ib.last_bo_high_bits) { - brw_emit_pipe_control_flush(brw, PIPE_CONTROL_VF_CACHE_INVALIDATE); - brw->ib.last_bo_high_bits = high_bits; - } -#endif -} - -static void -genX(emit_vertices)(struct brw_context *brw) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - uint32_t *dw; - - brw_prepare_vertices(brw); - brw_prepare_shader_draw_parameters(brw); - -#if GFX_VER < 6 - brw_emit_query_begin(brw); -#endif - - const struct brw_vs_prog_data *vs_prog_data = - brw_vs_prog_data(brw->vs.base.prog_data); - -#if GFX_VER >= 8 - struct gl_context *ctx = &brw->ctx; - const bool uses_edge_flag = (ctx->Polygon.FrontMode != GL_FILL || - ctx->Polygon.BackMode != GL_FILL); - - if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid) { - unsigned vue = brw->vb.nr_enabled; - - /* The element for the edge flags must always be last, so we have to - * insert the SGVS before it in that case. - */ - if (uses_edge_flag) { - assert(vue > 0); - vue--; - } - - WARN_ONCE(vue >= 33, - "Trying to insert VID/IID past 33rd vertex element, " - "need to reorder the vertex attrbutes."); - - brw_batch_emit(brw, GENX(3DSTATE_VF_SGVS), vfs) { - if (vs_prog_data->uses_vertexid) { - vfs.VertexIDEnable = true; - vfs.VertexIDComponentNumber = 2; - vfs.VertexIDElementOffset = vue; - } - - if (vs_prog_data->uses_instanceid) { - vfs.InstanceIDEnable = true; - vfs.InstanceIDComponentNumber = 3; - vfs.InstanceIDElementOffset = vue; - } - } - - brw_batch_emit(brw, GENX(3DSTATE_VF_INSTANCING), vfi) { - vfi.InstancingEnable = true; - vfi.VertexElementIndex = vue; - } - } else { - brw_batch_emit(brw, GENX(3DSTATE_VF_SGVS), vfs); - } -#endif - - const bool uses_draw_params = - vs_prog_data->uses_firstvertex || - vs_prog_data->uses_baseinstance; - - const bool uses_derived_draw_params = - vs_prog_data->uses_drawid || - vs_prog_data->uses_is_indexed_draw; - - const bool needs_sgvs_element = (uses_draw_params || - vs_prog_data->uses_instanceid || - vs_prog_data->uses_vertexid); - - unsigned nr_elements = - brw->vb.nr_enabled + needs_sgvs_element + uses_derived_draw_params; - -#if GFX_VER < 8 - /* If any of the formats of vb.enabled needs more that one upload, we need - * to add it to nr_elements - */ - for (unsigned i = 0; i < brw->vb.nr_enabled; i++) { - struct brw_vertex_element *input = brw->vb.enabled[i]; - uint32_t format = brw_get_vertex_surface_type(brw, input->glformat); - - if (uploads_needed(format, input->is_dual_slot) > 1) - nr_elements++; - } -#endif - - /* If the VS doesn't read any inputs (calculating vertex position from - * a state variable for some reason, for example), emit a single pad - * VERTEX_ELEMENT struct and bail. - * - * The stale VB state stays in place, but they don't do anything unless - * a VE loads from them. - */ - if (nr_elements == 0) { - dw = brw_batch_emitn(brw, GENX(3DSTATE_VERTEX_ELEMENTS), - 1 + GENX(VERTEX_ELEMENT_STATE_length)); - struct GENX(VERTEX_ELEMENT_STATE) elem = { - .Valid = true, - .SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT, - .Component0Control = VFCOMP_STORE_0, - .Component1Control = VFCOMP_STORE_0, - .Component2Control = VFCOMP_STORE_0, - .Component3Control = VFCOMP_STORE_1_FP, - }; - GENX(VERTEX_ELEMENT_STATE_pack)(brw, dw, &elem); - return; - } - - /* Now emit 3DSTATE_VERTEX_BUFFERS and 3DSTATE_VERTEX_ELEMENTS packets. */ - const unsigned nr_buffers = brw->vb.nr_buffers + - uses_draw_params + uses_derived_draw_params; - - vf_invalidate_for_vb_48bit_transitions(brw); - - if (nr_buffers) { - assert(nr_buffers <= (GFX_VER >= 6 ? 33 : 17)); - - dw = brw_batch_emitn(brw, GENX(3DSTATE_VERTEX_BUFFERS), - 1 + GENX(VERTEX_BUFFER_STATE_length) * nr_buffers); - - for (unsigned i = 0; i < brw->vb.nr_buffers; i++) { - const struct brw_vertex_buffer *buffer = &brw->vb.buffers[i]; - /* Prior to Haswell and Bay Trail we have to use 4-component formats - * to fake 3-component ones. In particular, we do this for - * half-float and 8 and 16-bit integer formats. This means that the - * vertex element may poke over the end of the buffer by 2 bytes. - */ - const unsigned padding = - (GFX_VERx10 < 75 && devinfo->platform != INTEL_PLATFORM_BYT) * 2; - const unsigned end = buffer->offset + buffer->size + padding; - dw = genX(emit_vertex_buffer_state)(brw, dw, i, buffer->bo, - buffer->offset, - end, - buffer->stride, - buffer->step_rate); - } - - if (uses_draw_params) { - dw = genX(emit_vertex_buffer_state)(brw, dw, brw->vb.nr_buffers, - brw->draw.draw_params_bo, - brw->draw.draw_params_offset, - brw->draw.draw_params_bo->size, - 0 /* stride */, - 0 /* step rate */); - } - - if (uses_derived_draw_params) { - dw = genX(emit_vertex_buffer_state)(brw, dw, brw->vb.nr_buffers + 1, - brw->draw.derived_draw_params_bo, - brw->draw.derived_draw_params_offset, - brw->draw.derived_draw_params_bo->size, - 0 /* stride */, - 0 /* step rate */); - } - } - - /* The hardware allows one more VERTEX_ELEMENTS than VERTEX_BUFFERS, - * presumably for VertexID/InstanceID. - */ -#if GFX_VER >= 6 - assert(nr_elements <= 34); - const struct brw_vertex_element *gfx6_edgeflag_input = NULL; -#else - assert(nr_elements <= 18); -#endif - - dw = brw_batch_emitn(brw, GENX(3DSTATE_VERTEX_ELEMENTS), - 1 + GENX(VERTEX_ELEMENT_STATE_length) * nr_elements); - unsigned i; - for (i = 0; i < brw->vb.nr_enabled; i++) { - const struct brw_vertex_element *input = brw->vb.enabled[i]; - const struct gl_vertex_format *glformat = input->glformat; - uint32_t format = brw_get_vertex_surface_type(brw, glformat); - uint32_t comp0 = VFCOMP_STORE_SRC; - uint32_t comp1 = VFCOMP_STORE_SRC; - uint32_t comp2 = VFCOMP_STORE_SRC; - uint32_t comp3 = VFCOMP_STORE_SRC; - const unsigned num_uploads = GFX_VER < 8 ? - uploads_needed(format, input->is_dual_slot) : 1; - -#if GFX_VER >= 8 - /* From the BDW PRM, Volume 2d, page 588 (VERTEX_ELEMENT_STATE): - * "Any SourceElementFormat of *64*_PASSTHRU cannot be used with an - * element which has edge flag enabled." - */ - assert(!(is_passthru_format(format) && uses_edge_flag)); -#endif - - /* The gfx4 driver expects edgeflag to come in as a float, and passes - * that float on to the tests in the clipper. Mesa's current vertex - * attribute value for EdgeFlag is stored as a float, which works out. - * glEdgeFlagPointer, on the other hand, gives us an unnormalized - * integer ubyte. Just rewrite that to convert to a float. - * - * Gfx6+ passes edgeflag as sideband along with the vertex, instead - * of in the VUE. We have to upload it sideband as the last vertex - * element according to the B-Spec. - */ -#if GFX_VER >= 6 - if (input == &brw->vb.inputs[VERT_ATTRIB_EDGEFLAG]) { - gfx6_edgeflag_input = input; - continue; - } -#endif - - for (unsigned c = 0; c < num_uploads; c++) { - const uint32_t upload_format = GFX_VER >= 8 ? format : - downsize_format_if_needed(format, c); - /* If we need more that one upload, the offset stride would be 128 - * bits (16 bytes), as for previous uploads we are using the full - * entry. */ - const unsigned offset = input->offset + c * 16; - - const int size = (GFX_VER < 8 && is_passthru_format(format)) ? - upload_format_size(upload_format) : glformat->Size; - - switch (size) { - case 0: comp0 = VFCOMP_STORE_0; FALLTHROUGH; - case 1: comp1 = VFCOMP_STORE_0; FALLTHROUGH; - case 2: comp2 = VFCOMP_STORE_0; FALLTHROUGH; - case 3: - if (GFX_VER >= 8 && glformat->Doubles) { - comp3 = VFCOMP_STORE_0; - } else if (glformat->Integer) { - comp3 = VFCOMP_STORE_1_INT; - } else { - comp3 = VFCOMP_STORE_1_FP; - } - - break; - } - -#if GFX_VER >= 8 - /* From the BDW PRM, Volume 2d, page 586 (VERTEX_ELEMENT_STATE): - * - * "When SourceElementFormat is set to one of the *64*_PASSTHRU - * formats, 64-bit components are stored in the URB without any - * conversion. In this case, vertex elements must be written as 128 - * or 256 bits, with VFCOMP_STORE_0 being used to pad the output as - * required. E.g., if R64_PASSTHRU is used to copy a 64-bit Red - * component into the URB, Component 1 must be specified as - * VFCOMP_STORE_0 (with Components 2,3 set to VFCOMP_NOSTORE) in - * order to output a 128-bit vertex element, or Components 1-3 must - * be specified as VFCOMP_STORE_0 in order to output a 256-bit vertex - * element. Likewise, use of R64G64B64_PASSTHRU requires Component 3 - * to be specified as VFCOMP_STORE_0 in order to output a 256-bit - * vertex element." - */ - if (glformat->Doubles && !input->is_dual_slot) { - /* Store vertex elements which correspond to double and dvec2 vertex - * shader inputs as 128-bit vertex elements, instead of 256-bits. - */ - comp2 = VFCOMP_NOSTORE; - comp3 = VFCOMP_NOSTORE; - } -#endif - - struct GENX(VERTEX_ELEMENT_STATE) elem_state = { - .VertexBufferIndex = input->buffer, - .Valid = true, - .SourceElementFormat = upload_format, - .SourceElementOffset = offset, - .Component0Control = comp0, - .Component1Control = comp1, - .Component2Control = comp2, - .Component3Control = comp3, -#if GFX_VER < 5 - .DestinationElementOffset = i * 4, -#endif - }; - - GENX(VERTEX_ELEMENT_STATE_pack)(brw, dw, &elem_state); - dw += GENX(VERTEX_ELEMENT_STATE_length); - } - } - - if (needs_sgvs_element) { - struct GENX(VERTEX_ELEMENT_STATE) elem_state = { - .Valid = true, - .Component0Control = VFCOMP_STORE_0, - .Component1Control = VFCOMP_STORE_0, - .Component2Control = VFCOMP_STORE_0, - .Component3Control = VFCOMP_STORE_0, -#if GFX_VER < 5 - .DestinationElementOffset = i * 4, -#endif - }; - -#if GFX_VER >= 8 - if (uses_draw_params) { - elem_state.VertexBufferIndex = brw->vb.nr_buffers; - elem_state.SourceElementFormat = ISL_FORMAT_R32G32_UINT; - elem_state.Component0Control = VFCOMP_STORE_SRC; - elem_state.Component1Control = VFCOMP_STORE_SRC; - } -#else - elem_state.VertexBufferIndex = brw->vb.nr_buffers; - elem_state.SourceElementFormat = ISL_FORMAT_R32G32_UINT; - if (uses_draw_params) { - elem_state.Component0Control = VFCOMP_STORE_SRC; - elem_state.Component1Control = VFCOMP_STORE_SRC; - } - - if (vs_prog_data->uses_vertexid) - elem_state.Component2Control = VFCOMP_STORE_VID; - - if (vs_prog_data->uses_instanceid) - elem_state.Component3Control = VFCOMP_STORE_IID; -#endif - - GENX(VERTEX_ELEMENT_STATE_pack)(brw, dw, &elem_state); - dw += GENX(VERTEX_ELEMENT_STATE_length); - } - - if (uses_derived_draw_params) { - struct GENX(VERTEX_ELEMENT_STATE) elem_state = { - .Valid = true, - .VertexBufferIndex = brw->vb.nr_buffers + 1, - .SourceElementFormat = ISL_FORMAT_R32G32_UINT, - .Component0Control = VFCOMP_STORE_SRC, - .Component1Control = VFCOMP_STORE_SRC, - .Component2Control = VFCOMP_STORE_0, - .Component3Control = VFCOMP_STORE_0, -#if GFX_VER < 5 - .DestinationElementOffset = i * 4, -#endif - }; - - GENX(VERTEX_ELEMENT_STATE_pack)(brw, dw, &elem_state); - dw += GENX(VERTEX_ELEMENT_STATE_length); - } - -#if GFX_VER >= 6 - if (gfx6_edgeflag_input) { - const struct gl_vertex_format *glformat = gfx6_edgeflag_input->glformat; - const uint32_t format = brw_get_vertex_surface_type(brw, glformat); - - struct GENX(VERTEX_ELEMENT_STATE) elem_state = { - .Valid = true, - .VertexBufferIndex = gfx6_edgeflag_input->buffer, - .EdgeFlagEnable = true, - .SourceElementFormat = format, - .SourceElementOffset = gfx6_edgeflag_input->offset, - .Component0Control = VFCOMP_STORE_SRC, - .Component1Control = VFCOMP_STORE_0, - .Component2Control = VFCOMP_STORE_0, - .Component3Control = VFCOMP_STORE_0, - }; - - GENX(VERTEX_ELEMENT_STATE_pack)(brw, dw, &elem_state); - dw += GENX(VERTEX_ELEMENT_STATE_length); - } -#endif - -#if GFX_VER >= 8 - for (unsigned i = 0, j = 0; i < brw->vb.nr_enabled; i++) { - const struct brw_vertex_element *input = brw->vb.enabled[i]; - const struct brw_vertex_buffer *buffer = &brw->vb.buffers[input->buffer]; - unsigned element_index; - - /* The edge flag element is reordered to be the last one in the code - * above so we need to compensate for that in the element indices used - * below. - */ - if (input == gfx6_edgeflag_input) - element_index = nr_elements - 1; - else - element_index = j++; - - brw_batch_emit(brw, GENX(3DSTATE_VF_INSTANCING), vfi) { - vfi.VertexElementIndex = element_index; - vfi.InstancingEnable = buffer->step_rate != 0; - vfi.InstanceDataStepRate = buffer->step_rate; - } - } - - if (vs_prog_data->uses_drawid) { - const unsigned element = brw->vb.nr_enabled + needs_sgvs_element; - - brw_batch_emit(brw, GENX(3DSTATE_VF_INSTANCING), vfi) { - vfi.VertexElementIndex = element; - } - } -#endif -} - -static const struct brw_tracked_state genX(vertices) = { - .dirty = { - .mesa = _NEW_POLYGON, - .brw = BRW_NEW_BATCH | - BRW_NEW_BLORP | - BRW_NEW_VERTEX_PROGRAM | - BRW_NEW_VERTICES | - BRW_NEW_VS_PROG_DATA, - }, - .emit = genX(emit_vertices), -}; - -static void -genX(emit_index_buffer)(struct brw_context *brw) -{ - const struct _mesa_index_buffer *index_buffer = brw->ib.ib; - - if (index_buffer == NULL) - return; - - vf_invalidate_for_ib_48bit_transition(brw); - - brw_batch_emit(brw, GENX(3DSTATE_INDEX_BUFFER), ib) { -#if GFX_VERx10 < 75 - assert(brw->ib.enable_cut_index == brw->prim_restart.enable_cut_index); - ib.CutIndexEnable = brw->ib.enable_cut_index; -#endif - ib.IndexFormat = brw_get_index_type(1 << index_buffer->index_size_shift); - -#if GFX_VER >= 6 - ib.MOCS = brw_mocs(&brw->isl_dev, brw->ib.bo); -#endif - - /* The VF cache designers apparently cut corners, and made the cache - * only consider the bottom 32 bits of memory addresses. If you happen - * to have two index buffers which get placed exactly 4 GiB apart and - * use them in back-to-back draw calls, you can get collisions. To work - * around this problem, we restrict index buffers to the low 32 bits of - * the address space. - */ - ib.BufferStartingAddress = ro_32_bo(brw->ib.bo, 0); -#if GFX_VER >= 8 - ib.BufferSize = brw->ib.size; -#else - ib.BufferEndingAddress = ro_bo(brw->ib.bo, brw->ib.size - 1); -#endif - } -} - -static const struct brw_tracked_state genX(index_buffer) = { - .dirty = { - .mesa = 0, - .brw = BRW_NEW_BATCH | - BRW_NEW_BLORP | - BRW_NEW_INDEX_BUFFER, - }, - .emit = genX(emit_index_buffer), -}; - -#if GFX_VERx10 >= 75 -static void -genX(upload_cut_index)(struct brw_context *brw) -{ - brw_batch_emit(brw, GENX(3DSTATE_VF), vf) { - if (brw->prim_restart.enable_cut_index && brw->ib.ib) { - vf.IndexedDrawCutIndexEnable = true; - vf.CutIndex = brw->prim_restart.restart_index; - } - } -} - -const struct brw_tracked_state genX(cut_index) = { - .dirty = { - .mesa = _NEW_TRANSFORM, - .brw = BRW_NEW_INDEX_BUFFER, - }, - .emit = genX(upload_cut_index), -}; -#endif - -static void -genX(upload_vf_statistics)(struct brw_context *brw) -{ - brw_batch_emit(brw, GENX(3DSTATE_VF_STATISTICS), vf) { - vf.StatisticsEnable = true; - } -} - -const struct brw_tracked_state genX(vf_statistics) = { - .dirty = { - .mesa = 0, - .brw = BRW_NEW_BLORP | BRW_NEW_CONTEXT, - }, - .emit = genX(upload_vf_statistics), -}; - -#if GFX_VER >= 6 -/** - * Determine the appropriate attribute override value to store into the - * 3DSTATE_SF structure for a given fragment shader attribute. The attribute - * override value contains two pieces of information: the location of the - * attribute in the VUE (relative to urb_entry_read_offset, see below), and a - * flag indicating whether to "swizzle" the attribute based on the direction - * the triangle is facing. - * - * If an attribute is "swizzled", then the given VUE location is used for - * front-facing triangles, and the VUE location that immediately follows is - * used for back-facing triangles. We use this to implement the mapping from - * gl_FrontColor/gl_BackColor to gl_Color. - * - * urb_entry_read_offset is the offset into the VUE at which the SF unit is - * being instructed to begin reading attribute data. It can be set to a - * nonzero value to prevent the SF unit from wasting time reading elements of - * the VUE that are not needed by the fragment shader. It is measured in - * 256-bit increments. - */ -static void -genX(get_attr_override)(struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) *attr, - const struct brw_vue_map *vue_map, - int urb_entry_read_offset, int fs_attr, - bool two_side_color, uint32_t *max_source_attr) -{ - /* Find the VUE slot for this attribute. */ - int slot = vue_map->varying_to_slot[fs_attr]; - - /* Viewport and Layer are stored in the VUE header. We need to override - * them to zero if earlier stages didn't write them, as GL requires that - * they read back as zero when not explicitly set. - */ - if (fs_attr == VARYING_SLOT_VIEWPORT || fs_attr == VARYING_SLOT_LAYER) { - attr->ComponentOverrideX = true; - attr->ComponentOverrideW = true; - attr->ConstantSource = CONST_0000; - - if (!(vue_map->slots_valid & VARYING_BIT_LAYER)) - attr->ComponentOverrideY = true; - if (!(vue_map->slots_valid & VARYING_BIT_VIEWPORT)) - attr->ComponentOverrideZ = true; - - return; - } - - /* If there was only a back color written but not front, use back - * as the color instead of undefined - */ - if (slot == -1 && fs_attr == VARYING_SLOT_COL0) - slot = vue_map->varying_to_slot[VARYING_SLOT_BFC0]; - if (slot == -1 && fs_attr == VARYING_SLOT_COL1) - slot = vue_map->varying_to_slot[VARYING_SLOT_BFC1]; - - if (slot == -1) { - /* This attribute does not exist in the VUE--that means that the vertex - * shader did not write to it. This means that either: - * - * (a) This attribute is a texture coordinate, and it is going to be - * replaced with point coordinates (as a consequence of a call to - * glTexEnvi(GL_POINT_SPRITE, GL_COORD_REPLACE, GL_TRUE)), so the - * hardware will ignore whatever attribute override we supply. - * - * (b) This attribute is read by the fragment shader but not written by - * the vertex shader, so its value is undefined. Therefore the - * attribute override we supply doesn't matter. - * - * (c) This attribute is gl_PrimitiveID, and it wasn't written by the - * previous shader stage. - * - * Note that we don't have to worry about the cases where the attribute - * is gl_PointCoord or is undergoing point sprite coordinate - * replacement, because in those cases, this function isn't called. - * - * In case (c), we need to program the attribute overrides so that the - * primitive ID will be stored in this slot. In every other case, the - * attribute override we supply doesn't matter. So just go ahead and - * program primitive ID in every case. - */ - attr->ComponentOverrideW = true; - attr->ComponentOverrideX = true; - attr->ComponentOverrideY = true; - attr->ComponentOverrideZ = true; - attr->ConstantSource = PRIM_ID; - return; - } - - /* Compute the location of the attribute relative to urb_entry_read_offset. - * Each increment of urb_entry_read_offset represents a 256-bit value, so - * it counts for two 128-bit VUE slots. - */ - int source_attr = slot - 2 * urb_entry_read_offset; - assert(source_attr >= 0 && source_attr < 32); - - /* If we are doing two-sided color, and the VUE slot following this one - * represents a back-facing color, then we need to instruct the SF unit to - * do back-facing swizzling. - */ - bool swizzling = two_side_color && - ((vue_map->slot_to_varying[slot] == VARYING_SLOT_COL0 && - vue_map->slot_to_varying[slot+1] == VARYING_SLOT_BFC0) || - (vue_map->slot_to_varying[slot] == VARYING_SLOT_COL1 && - vue_map->slot_to_varying[slot+1] == VARYING_SLOT_BFC1)); - - /* Update max_source_attr. If swizzling, the SF will read this slot + 1. */ - if (*max_source_attr < source_attr + swizzling) - *max_source_attr = source_attr + swizzling; - - attr->SourceAttribute = source_attr; - if (swizzling) - attr->SwizzleSelect = INPUTATTR_FACING; -} - - -static void -genX(calculate_attr_overrides)(const struct brw_context *brw, - struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) *attr_overrides, - uint32_t *point_sprite_enables, - uint32_t *urb_entry_read_length, - uint32_t *urb_entry_read_offset) -{ - const struct gl_context *ctx = &brw->ctx; - - /* _NEW_POINT */ - const struct gl_point_attrib *point = &ctx->Point; - - /* BRW_NEW_FRAGMENT_PROGRAM */ - const struct gl_program *fp = brw->programs[MESA_SHADER_FRAGMENT]; - - /* BRW_NEW_FS_PROG_DATA */ - const struct brw_wm_prog_data *wm_prog_data = - brw_wm_prog_data(brw->wm.base.prog_data); - uint32_t max_source_attr = 0; - - *point_sprite_enables = 0; - - int first_slot = - brw_compute_first_urb_slot_required(fp->info.inputs_read, - &brw->vue_map_geom_out); - - /* Each URB offset packs two varying slots */ - assert(first_slot % 2 == 0); - *urb_entry_read_offset = first_slot / 2; - - /* From the Ivybridge PRM, Vol 2 Part 1, 3DSTATE_SBE, - * description of dw10 Point Sprite Texture Coordinate Enable: - * - * "This field must be programmed to zero when non-point primitives - * are rendered." - * - * The SandyBridge PRM doesn't explicitly say that point sprite enables - * must be programmed to zero when rendering non-point primitives, but - * the IvyBridge PRM does, and if we don't, we get garbage. - * - * This is not required on Haswell, as the hardware ignores this state - * when drawing non-points -- although we do still need to be careful to - * correctly set the attr overrides. - * - * _NEW_POLYGON - * BRW_NEW_PRIMITIVE | BRW_NEW_GS_PROG_DATA | BRW_NEW_TES_PROG_DATA - */ - bool drawing_points = brw_is_drawing_points(brw); - - for (uint8_t idx = 0; idx < wm_prog_data->urb_setup_attribs_count; idx++) { - uint8_t attr = wm_prog_data->urb_setup_attribs[idx]; - int input_index = wm_prog_data->urb_setup[attr]; - - assert(0 <= input_index); - - /* _NEW_POINT */ - bool point_sprite = false; - if (drawing_points) { - if (point->PointSprite && - (attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7) && - (point->CoordReplace & (1u << (attr - VARYING_SLOT_TEX0)))) { - point_sprite = true; - } - - if (attr == VARYING_SLOT_PNTC) - point_sprite = true; - - if (point_sprite) - *point_sprite_enables |= (1 << input_index); - } - - /* BRW_NEW_VUE_MAP_GEOM_OUT | _NEW_LIGHT | _NEW_PROGRAM */ - struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) attribute = { 0 }; - - if (!point_sprite) { - genX(get_attr_override)(&attribute, - &brw->vue_map_geom_out, - *urb_entry_read_offset, attr, - _mesa_vertex_program_two_side_enabled(ctx), - &max_source_attr); - } - - /* The hardware can only do the overrides on 16 overrides at a - * time, and the other up to 16 have to be lined up so that the - * input index = the output index. We'll need to do some - * tweaking to make sure that's the case. - */ - if (input_index < 16) - attr_overrides[input_index] = attribute; - else - assert(attribute.SourceAttribute == input_index); - } - - /* From the Sandy Bridge PRM, Volume 2, Part 1, documentation for - * 3DSTATE_SF DWord 1 bits 15:11, "Vertex URB Entry Read Length": - * - * "This field should be set to the minimum length required to read the - * maximum source attribute. The maximum source attribute is indicated - * by the maximum value of the enabled Attribute # Source Attribute if - * Attribute Swizzle Enable is set, Number of Output Attributes-1 if - * enable is not set. - * read_length = ceiling((max_source_attr + 1) / 2) - * - * [errata] Corruption/Hang possible if length programmed larger than - * recommended" - * - * Similar text exists for Ivy Bridge. - */ - *urb_entry_read_length = DIV_ROUND_UP(max_source_attr + 1, 2); -} -#endif - -/* ---------------------------------------------------------------------- */ - -#if GFX_VER >= 8 -typedef struct GENX(3DSTATE_WM_DEPTH_STENCIL) DEPTH_STENCIL_GENXML; -#elif GFX_VER >= 6 -typedef struct GENX(DEPTH_STENCIL_STATE) DEPTH_STENCIL_GENXML; -#else -typedef struct GENX(COLOR_CALC_STATE) DEPTH_STENCIL_GENXML; -#endif - -static inline void -set_depth_stencil_bits(struct brw_context *brw, DEPTH_STENCIL_GENXML *ds) -{ - struct gl_context *ctx = &brw->ctx; - - /* _NEW_BUFFERS */ - struct brw_renderbuffer *depth_irb = - brw_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH); - - /* _NEW_DEPTH */ - struct gl_depthbuffer_attrib *depth = &ctx->Depth; - - /* _NEW_STENCIL */ - struct gl_stencil_attrib *stencil = &ctx->Stencil; - const int b = stencil->_BackFace; - - if (depth->Test && depth_irb) { - ds->DepthTestEnable = true; - ds->DepthBufferWriteEnable = brw_depth_writes_enabled(brw); - ds->DepthTestFunction = brw_translate_compare_func(depth->Func); - } - - if (brw->stencil_enabled) { - ds->StencilTestEnable = true; - ds->StencilWriteMask = stencil->WriteMask[0] & 0xff; - ds->StencilTestMask = stencil->ValueMask[0] & 0xff; - - ds->StencilTestFunction = - brw_translate_compare_func(stencil->Function[0]); - ds->StencilFailOp = - brw_translate_stencil_op(stencil->FailFunc[0]); - ds->StencilPassDepthPassOp = - brw_translate_stencil_op(stencil->ZPassFunc[0]); - ds->StencilPassDepthFailOp = - brw_translate_stencil_op(stencil->ZFailFunc[0]); - - ds->StencilBufferWriteEnable = brw->stencil_write_enabled; - - if (brw->stencil_two_sided) { - ds->DoubleSidedStencilEnable = true; - ds->BackfaceStencilWriteMask = stencil->WriteMask[b] & 0xff; - ds->BackfaceStencilTestMask = stencil->ValueMask[b] & 0xff; - - ds->BackfaceStencilTestFunction = - brw_translate_compare_func(stencil->Function[b]); - ds->BackfaceStencilFailOp = - brw_translate_stencil_op(stencil->FailFunc[b]); - ds->BackfaceStencilPassDepthPassOp = - brw_translate_stencil_op(stencil->ZPassFunc[b]); - ds->BackfaceStencilPassDepthFailOp = - brw_translate_stencil_op(stencil->ZFailFunc[b]); - } - -#if GFX_VER <= 5 || GFX_VER >= 9 - ds->StencilReferenceValue = _mesa_get_stencil_ref(ctx, 0); - ds->BackfaceStencilReferenceValue = _mesa_get_stencil_ref(ctx, b); -#endif - } -} - -#if GFX_VER >= 6 -static void -genX(upload_depth_stencil_state)(struct brw_context *brw) -{ -#if GFX_VER >= 8 - brw_batch_emit(brw, GENX(3DSTATE_WM_DEPTH_STENCIL), wmds) { - set_depth_stencil_bits(brw, &wmds); - } -#else - uint32_t ds_offset; - brw_state_emit(brw, GENX(DEPTH_STENCIL_STATE), 64, &ds_offset, ds) { - set_depth_stencil_bits(brw, &ds); - } - - /* Now upload a pointer to the indirect state */ -#if GFX_VER == 6 - brw_batch_emit(brw, GENX(3DSTATE_CC_STATE_POINTERS), ptr) { - ptr.PointertoDEPTH_STENCIL_STATE = ds_offset; - ptr.DEPTH_STENCIL_STATEChange = true; - } -#else - brw_batch_emit(brw, GENX(3DSTATE_DEPTH_STENCIL_STATE_POINTERS), ptr) { - ptr.PointertoDEPTH_STENCIL_STATE = ds_offset; - } -#endif -#endif -} - -static const struct brw_tracked_state genX(depth_stencil_state) = { - .dirty = { - .mesa = _NEW_BUFFERS | - _NEW_DEPTH | - _NEW_STENCIL, - .brw = BRW_NEW_BLORP | - (GFX_VER >= 8 ? BRW_NEW_CONTEXT - : BRW_NEW_BATCH | - BRW_NEW_STATE_BASE_ADDRESS), - }, - .emit = genX(upload_depth_stencil_state), -}; -#endif - -/* ---------------------------------------------------------------------- */ - -#if GFX_VER <= 5 - -static void -genX(upload_clip_state)(struct brw_context *brw) -{ - struct gl_context *ctx = &brw->ctx; - - ctx->NewDriverState |= BRW_NEW_GFX4_UNIT_STATE; - brw_state_emit(brw, GENX(CLIP_STATE), 32, &brw->clip.state_offset, clip) { - clip.KernelStartPointer = KSP(brw, brw->clip.prog_offset); - clip.GRFRegisterCount = - DIV_ROUND_UP(brw->clip.prog_data->total_grf, 16) - 1; - clip.FloatingPointMode = FLOATING_POINT_MODE_Alternate; - clip.SingleProgramFlow = true; - clip.VertexURBEntryReadLength = brw->clip.prog_data->urb_read_length; - clip.ConstantURBEntryReadLength = brw->clip.prog_data->curb_read_length; - - /* BRW_NEW_PUSH_CONSTANT_ALLOCATION */ - clip.ConstantURBEntryReadOffset = brw->curbe.clip_start * 2; - clip.DispatchGRFStartRegisterForURBData = 1; - clip.VertexURBEntryReadOffset = 0; - - /* BRW_NEW_URB_FENCE */ - clip.NumberofURBEntries = brw->urb.nr_clip_entries; - clip.URBEntryAllocationSize = brw->urb.vsize - 1; - - if (brw->urb.nr_clip_entries >= 10) { - /* Half of the URB entries go to each thread, and it has to be an - * even number. - */ - assert(brw->urb.nr_clip_entries % 2 == 0); - - /* Although up to 16 concurrent Clip threads are allowed on Ironlake, - * only 2 threads can output VUEs at a time. - */ - clip.MaximumNumberofThreads = (GFX_VER == 5 ? 16 : 2) - 1; - } else { - assert(brw->urb.nr_clip_entries >= 5); - clip.MaximumNumberofThreads = 1 - 1; - } - - clip.VertexPositionSpace = VPOS_NDCSPACE; - clip.UserClipFlagsMustClipEnable = true; - clip.GuardbandClipTestEnable = true; - - clip.ClipperViewportStatePointer = - ro_bo(brw->batch.state.bo, brw->clip.vp_offset); - - clip.ScreenSpaceViewportXMin = -1; - clip.ScreenSpaceViewportXMax = 1; - clip.ScreenSpaceViewportYMin = -1; - clip.ScreenSpaceViewportYMax = 1; - - clip.ViewportXYClipTestEnable = true; - clip.ViewportZClipTestEnable = !(ctx->Transform.DepthClampNear && - ctx->Transform.DepthClampFar); - - /* _NEW_TRANSFORM */ - if (GFX_VER == 5 || GFX_VERx10 == 45) { - clip.UserClipDistanceClipTestEnableBitmask = - ctx->Transform.ClipPlanesEnabled; - } else { - /* Up to 6 actual clip flags, plus the 7th for the negative RHW - * workaround. - */ - clip.UserClipDistanceClipTestEnableBitmask = - (ctx->Transform.ClipPlanesEnabled & 0x3f) | 0x40; - } - - if (ctx->Transform.ClipDepthMode == GL_ZERO_TO_ONE) - clip.APIMode = APIMODE_D3D; - else - clip.APIMode = APIMODE_OGL; - - clip.GuardbandClipTestEnable = true; - - clip.ClipMode = brw->clip.prog_data->clip_mode; - -#if GFX_VERx10 == 45 - clip.NegativeWClipTestEnable = true; -#endif - } -} - -const struct brw_tracked_state genX(clip_state) = { - .dirty = { - .mesa = _NEW_TRANSFORM | - _NEW_VIEWPORT, - .brw = BRW_NEW_BATCH | - BRW_NEW_BLORP | - BRW_NEW_CLIP_PROG_DATA | - BRW_NEW_PUSH_CONSTANT_ALLOCATION | - BRW_NEW_PROGRAM_CACHE | - BRW_NEW_URB_FENCE, - }, - .emit = genX(upload_clip_state), -}; - -#else - -static void -genX(upload_clip_state)(struct brw_context *brw) -{ - struct gl_context *ctx = &brw->ctx; - - /* _NEW_BUFFERS */ - struct gl_framebuffer *fb = ctx->DrawBuffer; - - /* BRW_NEW_FS_PROG_DATA */ - struct brw_wm_prog_data *wm_prog_data = - brw_wm_prog_data(brw->wm.base.prog_data); - - brw_batch_emit(brw, GENX(3DSTATE_CLIP), clip) { - clip.StatisticsEnable = !brw->meta_in_progress; - - if (wm_prog_data->barycentric_interp_modes & - BRW_BARYCENTRIC_NONPERSPECTIVE_BITS) - clip.NonPerspectiveBarycentricEnable = true; - -#if GFX_VER >= 7 - clip.EarlyCullEnable = true; -#endif - -#if GFX_VER == 7 - clip.FrontWinding = brw->polygon_front_bit != fb->FlipY; - - if (ctx->Polygon.CullFlag) { - switch (ctx->Polygon.CullFaceMode) { - case GL_FRONT: - clip.CullMode = CULLMODE_FRONT; - break; - case GL_BACK: - clip.CullMode = CULLMODE_BACK; - break; - case GL_FRONT_AND_BACK: - clip.CullMode = CULLMODE_BOTH; - break; - default: - unreachable("Should not get here: invalid CullFlag"); - } - } else { - clip.CullMode = CULLMODE_NONE; - } -#endif - -#if GFX_VER < 8 - clip.UserClipDistanceCullTestEnableBitmask = - brw_vue_prog_data(brw->vs.base.prog_data)->cull_distance_mask; - - clip.ViewportZClipTestEnable = !(ctx->Transform.DepthClampNear && - ctx->Transform.DepthClampFar); -#endif - - /* _NEW_LIGHT */ - if (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION) { - clip.TriangleStripListProvokingVertexSelect = 0; - clip.TriangleFanProvokingVertexSelect = 1; - clip.LineStripListProvokingVertexSelect = 0; - } else { - clip.TriangleStripListProvokingVertexSelect = 2; - clip.TriangleFanProvokingVertexSelect = 2; - clip.LineStripListProvokingVertexSelect = 1; - } - - /* _NEW_TRANSFORM */ - clip.UserClipDistanceClipTestEnableBitmask = - ctx->Transform.ClipPlanesEnabled; - -#if GFX_VER >= 8 - clip.ForceUserClipDistanceClipTestEnableBitmask = true; -#endif - - if (ctx->Transform.ClipDepthMode == GL_ZERO_TO_ONE) - clip.APIMode = APIMODE_D3D; - else - clip.APIMode = APIMODE_OGL; - - clip.GuardbandClipTestEnable = true; - - /* BRW_NEW_VIEWPORT_COUNT */ - const unsigned viewport_count = brw->clip.viewport_count; - - if (ctx->RasterDiscard) { - clip.ClipMode = CLIPMODE_REJECT_ALL; -#if GFX_VER == 6 - perf_debug("Rasterizer discard is currently implemented via the " - "clipper; having the GS not write primitives would " - "likely be faster.\n"); -#endif - } else { - clip.ClipMode = CLIPMODE_NORMAL; - } - - clip.ClipEnable = true; - - /* _NEW_POLYGON, - * BRW_NEW_GEOMETRY_PROGRAM | BRW_NEW_TES_PROG_DATA | BRW_NEW_PRIMITIVE - */ - if (!brw_is_drawing_points(brw) && !brw_is_drawing_lines(brw)) - clip.ViewportXYClipTestEnable = true; - - clip.MinimumPointWidth = 0.125; - clip.MaximumPointWidth = 255.875; - clip.MaximumVPIndex = viewport_count - 1; - if (_mesa_geometric_layers(fb) == 0) - clip.ForceZeroRTAIndexEnable = true; - } -} - -static const struct brw_tracked_state genX(clip_state) = { - .dirty = { - .mesa = _NEW_BUFFERS | - _NEW_LIGHT | - _NEW_POLYGON | - _NEW_TRANSFORM, - .brw = BRW_NEW_BLORP | - BRW_NEW_CONTEXT | - BRW_NEW_FS_PROG_DATA | - BRW_NEW_GS_PROG_DATA | - BRW_NEW_VS_PROG_DATA | - BRW_NEW_META_IN_PROGRESS | - BRW_NEW_PRIMITIVE | - BRW_NEW_RASTERIZER_DISCARD | - BRW_NEW_TES_PROG_DATA | - BRW_NEW_VIEWPORT_COUNT, - }, - .emit = genX(upload_clip_state), -}; -#endif - -/* ---------------------------------------------------------------------- */ - -static void -genX(upload_sf)(struct brw_context *brw) -{ - struct gl_context *ctx = &brw->ctx; - float point_size; - -#if GFX_VER <= 7 - /* _NEW_BUFFERS */ - bool flip_y = ctx->DrawBuffer->FlipY; - UNUSED const bool multisampled_fbo = - _mesa_geometric_samples(ctx->DrawBuffer) > 1; -#endif - -#if GFX_VER < 6 - const struct brw_sf_prog_data *sf_prog_data = brw->sf.prog_data; - - ctx->NewDriverState |= BRW_NEW_GFX4_UNIT_STATE; - - brw_state_emit(brw, GENX(SF_STATE), 64, &brw->sf.state_offset, sf) { - sf.KernelStartPointer = KSP(brw, brw->sf.prog_offset); - sf.FloatingPointMode = FLOATING_POINT_MODE_Alternate; - sf.GRFRegisterCount = DIV_ROUND_UP(sf_prog_data->total_grf, 16) - 1; - sf.DispatchGRFStartRegisterForURBData = 3; - sf.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET; - sf.VertexURBEntryReadLength = sf_prog_data->urb_read_length; - sf.NumberofURBEntries = brw->urb.nr_sf_entries; - sf.URBEntryAllocationSize = brw->urb.sfsize - 1; - - /* STATE_PREFETCH command description describes this state as being - * something loaded through the GPE (L2 ISC), so it's INSTRUCTION - * domain. - */ - sf.SetupViewportStateOffset = - ro_bo(brw->batch.state.bo, brw->sf.vp_offset); - - sf.PointRasterizationRule = RASTRULE_UPPER_RIGHT; - - /* sf.ConstantURBEntryReadLength = stage_prog_data->curb_read_length; */ - /* sf.ConstantURBEntryReadOffset = brw->curbe.vs_start * 2; */ - - sf.MaximumNumberofThreads = - MIN2(GFX_VER == 5 ? 48 : 24, brw->urb.nr_sf_entries) - 1; - - sf.SpritePointEnable = ctx->Point.PointSprite; - - sf.DestinationOriginHorizontalBias = 0.5; - sf.DestinationOriginVerticalBias = 0.5; -#else - brw_batch_emit(brw, GENX(3DSTATE_SF), sf) { - sf.StatisticsEnable = true; -#endif - sf.ViewportTransformEnable = true; - -#if GFX_VER == 7 - /* _NEW_BUFFERS */ - sf.DepthBufferSurfaceFormat = brw_depthbuffer_format(brw); -#endif - -#if GFX_VER <= 7 - /* _NEW_POLYGON */ - sf.FrontWinding = brw->polygon_front_bit != flip_y; -#if GFX_VER >= 6 - sf.GlobalDepthOffsetEnableSolid = ctx->Polygon.OffsetFill; - sf.GlobalDepthOffsetEnableWireframe = ctx->Polygon.OffsetLine; - sf.GlobalDepthOffsetEnablePoint = ctx->Polygon.OffsetPoint; - - switch (ctx->Polygon.FrontMode) { - case GL_FILL: - sf.FrontFaceFillMode = FILL_MODE_SOLID; - break; - case GL_LINE: - sf.FrontFaceFillMode = FILL_MODE_WIREFRAME; - break; - case GL_POINT: - sf.FrontFaceFillMode = FILL_MODE_POINT; - break; - default: - unreachable("not reached"); - } - - switch (ctx->Polygon.BackMode) { - case GL_FILL: - sf.BackFaceFillMode = FILL_MODE_SOLID; - break; - case GL_LINE: - sf.BackFaceFillMode = FILL_MODE_WIREFRAME; - break; - case GL_POINT: - sf.BackFaceFillMode = FILL_MODE_POINT; - break; - default: - unreachable("not reached"); - } - - if (multisampled_fbo && ctx->Multisample.Enabled) - sf.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN; - - sf.GlobalDepthOffsetConstant = ctx->Polygon.OffsetUnits * 2; - sf.GlobalDepthOffsetScale = ctx->Polygon.OffsetFactor; - sf.GlobalDepthOffsetClamp = ctx->Polygon.OffsetClamp; -#endif - - sf.ScissorRectangleEnable = true; - - if (ctx->Polygon.CullFlag) { - switch (ctx->Polygon.CullFaceMode) { - case GL_FRONT: - sf.CullMode = CULLMODE_FRONT; - break; - case GL_BACK: - sf.CullMode = CULLMODE_BACK; - break; - case GL_FRONT_AND_BACK: - sf.CullMode = CULLMODE_BOTH; - break; - default: - unreachable("not reached"); - } - } else { - sf.CullMode = CULLMODE_NONE; - } - -#if GFX_VERx10 == 75 - sf.LineStippleEnable = ctx->Line.StippleFlag; -#endif - -#endif - - /* _NEW_LINE */ -#if GFX_VER == 8 - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - if (devinfo->platform == INTEL_PLATFORM_CHV) - sf.CHVLineWidth = brw_get_line_width(brw); - else - sf.LineWidth = brw_get_line_width(brw); -#else - sf.LineWidth = brw_get_line_width(brw); -#endif - - if (ctx->Line.SmoothFlag) { - sf.LineEndCapAntialiasingRegionWidth = _10pixels; -#if GFX_VER <= 7 - sf.AntialiasingEnable = true; -#endif - } - - /* _NEW_POINT - Clamp to ARB_point_parameters user limits */ - point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize); - /* Clamp to the hardware limits */ - sf.PointWidth = CLAMP(point_size, 0.125f, 255.875f); - - /* _NEW_PROGRAM | _NEW_POINT, BRW_NEW_VUE_MAP_GEOM_OUT */ - if (use_state_point_size(brw)) - sf.PointWidthSource = State; - -#if GFX_VER >= 8 - /* _NEW_POINT | _NEW_MULTISAMPLE */ - if ((ctx->Point.SmoothFlag || _mesa_is_multisample_enabled(ctx)) && - !ctx->Point.PointSprite) - sf.SmoothPointEnable = true; -#endif - -#if GFX_VER == 10 - /* _NEW_BUFFERS - * Smooth Point Enable bit MUST not be set when NUM_MULTISAMPLES > 1. - */ - const bool multisampled_fbo = - _mesa_geometric_samples(ctx->DrawBuffer) > 1; - if (multisampled_fbo) - sf.SmoothPointEnable = false; -#endif - -#if GFX_VERx10 >= 45 - sf.AALineDistanceMode = AALINEDISTANCE_TRUE; -#endif - - /* _NEW_LIGHT */ - if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) { - sf.TriangleStripListProvokingVertexSelect = 2; - sf.TriangleFanProvokingVertexSelect = 2; - sf.LineStripListProvokingVertexSelect = 1; - } else { - sf.TriangleFanProvokingVertexSelect = 1; - } - -#if GFX_VER == 6 - /* BRW_NEW_FS_PROG_DATA */ - const struct brw_wm_prog_data *wm_prog_data = - brw_wm_prog_data(brw->wm.base.prog_data); - - sf.AttributeSwizzleEnable = true; - sf.NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs; - - /* - * Window coordinates in an FBO are inverted, which means point - * sprite origin must be inverted, too. - */ - if ((ctx->Point.SpriteOrigin == GL_LOWER_LEFT) == flip_y) { - sf.PointSpriteTextureCoordinateOrigin = LOWERLEFT; - } else { - sf.PointSpriteTextureCoordinateOrigin = UPPERLEFT; - } - - /* BRW_NEW_VUE_MAP_GEOM_OUT | BRW_NEW_FRAGMENT_PROGRAM | - * _NEW_POINT | _NEW_LIGHT | _NEW_PROGRAM | BRW_NEW_FS_PROG_DATA - */ - uint32_t urb_entry_read_length; - uint32_t urb_entry_read_offset; - uint32_t point_sprite_enables; - genX(calculate_attr_overrides)(brw, sf.Attribute, &point_sprite_enables, - &urb_entry_read_length, - &urb_entry_read_offset); - sf.VertexURBEntryReadLength = urb_entry_read_length; - sf.VertexURBEntryReadOffset = urb_entry_read_offset; - sf.PointSpriteTextureCoordinateEnable = point_sprite_enables; - sf.ConstantInterpolationEnable = wm_prog_data->flat_inputs; -#endif - } -} - -static const struct brw_tracked_state genX(sf_state) = { - .dirty = { - .mesa = _NEW_LIGHT | - _NEW_LINE | - _NEW_POINT | - _NEW_PROGRAM | - (GFX_VER >= 6 ? _NEW_MULTISAMPLE : 0) | - (GFX_VER <= 7 ? _NEW_BUFFERS | _NEW_POLYGON : 0) | - (GFX_VER == 10 ? _NEW_BUFFERS : 0), - .brw = BRW_NEW_BLORP | - BRW_NEW_VUE_MAP_GEOM_OUT | - (GFX_VER <= 5 ? BRW_NEW_BATCH | - BRW_NEW_PROGRAM_CACHE | - BRW_NEW_SF_PROG_DATA | - BRW_NEW_SF_VP | - BRW_NEW_URB_FENCE - : 0) | - (GFX_VER >= 6 ? BRW_NEW_CONTEXT : 0) | - (GFX_VER >= 6 && GFX_VER <= 7 ? - BRW_NEW_GS_PROG_DATA | - BRW_NEW_PRIMITIVE | - BRW_NEW_TES_PROG_DATA - : 0) | - (GFX_VER == 6 ? BRW_NEW_FS_PROG_DATA | - BRW_NEW_FRAGMENT_PROGRAM - : 0), - }, - .emit = genX(upload_sf), -}; - -/* ---------------------------------------------------------------------- */ - -static bool -brw_color_buffer_write_enabled(struct brw_context *brw) -{ - struct gl_context *ctx = &brw->ctx; - /* BRW_NEW_FRAGMENT_PROGRAM */ - const struct gl_program *fp = brw->programs[MESA_SHADER_FRAGMENT]; - unsigned i; - - /* _NEW_BUFFERS */ - for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { - struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i]; - uint64_t outputs_written = fp->info.outputs_written; - - /* _NEW_COLOR */ - if (rb && (outputs_written & BITFIELD64_BIT(FRAG_RESULT_COLOR) || - outputs_written & BITFIELD64_BIT(FRAG_RESULT_DATA0 + i)) && - GET_COLORMASK(ctx->Color.ColorMask, i)) { - return true; - } - } - - return false; -} - -static void -genX(upload_wm)(struct brw_context *brw) -{ - struct gl_context *ctx = &brw->ctx; - - /* BRW_NEW_FS_PROG_DATA */ - const struct brw_wm_prog_data *wm_prog_data = - brw_wm_prog_data(brw->wm.base.prog_data); - - UNUSED bool writes_depth = - wm_prog_data->computed_depth_mode != BRW_PSCDEPTH_OFF; - UNUSED struct brw_stage_state *stage_state = &brw->wm.base; - UNUSED const struct intel_device_info *devinfo = &brw->screen->devinfo; - -#if GFX_VER == 6 - /* We can't fold this into gfx6_upload_wm_push_constants(), because - * according to the SNB PRM, vol 2 part 1 section 7.2.2 - * (3DSTATE_CONSTANT_PS [DevSNB]): - * - * "[DevSNB]: This packet must be followed by WM_STATE." - */ - brw_batch_emit(brw, GENX(3DSTATE_CONSTANT_PS), wmcp) { - if (wm_prog_data->base.nr_params != 0) { - wmcp.Buffer0Valid = true; - /* Pointer to the WM constant buffer. Covered by the set of - * state flags from gfx6_upload_wm_push_constants. - */ - wmcp.ConstantBody.PointertoConstantBuffer0 = stage_state->push_const_offset; - wmcp.ConstantBody.ConstantBuffer0ReadLength = stage_state->push_const_size - 1; - } - } -#endif - -#if GFX_VER >= 6 - brw_batch_emit(brw, GENX(3DSTATE_WM), wm) { -#else - ctx->NewDriverState |= BRW_NEW_GFX4_UNIT_STATE; - brw_state_emit(brw, GENX(WM_STATE), 64, &stage_state->state_offset, wm) { -#endif - -#if GFX_VER <= 6 - wm._8PixelDispatchEnable = wm_prog_data->dispatch_8; - wm._16PixelDispatchEnable = wm_prog_data->dispatch_16; - wm._32PixelDispatchEnable = wm_prog_data->dispatch_32; -#endif - -#if GFX_VER == 4 - /* On gfx4, we only have one shader kernel */ - if (brw_wm_state_has_ksp(wm, 0)) { - assert(brw_wm_prog_data_prog_offset(wm_prog_data, wm, 0) == 0); - wm.KernelStartPointer0 = KSP(brw, stage_state->prog_offset); - wm.GRFRegisterCount0 = brw_wm_prog_data_reg_blocks(wm_prog_data, wm, 0); - wm.DispatchGRFStartRegisterForConstantSetupData0 = - brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, wm, 0); - } -#elif GFX_VER == 5 - /* On gfx5, we have multiple shader kernels but only one GRF start - * register for all kernels - */ - wm.KernelStartPointer0 = stage_state->prog_offset + - brw_wm_prog_data_prog_offset(wm_prog_data, wm, 0); - wm.KernelStartPointer1 = stage_state->prog_offset + - brw_wm_prog_data_prog_offset(wm_prog_data, wm, 1); - wm.KernelStartPointer2 = stage_state->prog_offset + - brw_wm_prog_data_prog_offset(wm_prog_data, wm, 2); - - wm.GRFRegisterCount0 = brw_wm_prog_data_reg_blocks(wm_prog_data, wm, 0); - wm.GRFRegisterCount1 = brw_wm_prog_data_reg_blocks(wm_prog_data, wm, 1); - wm.GRFRegisterCount2 = brw_wm_prog_data_reg_blocks(wm_prog_data, wm, 2); - - wm.DispatchGRFStartRegisterForConstantSetupData0 = - wm_prog_data->base.dispatch_grf_start_reg; - - /* Dispatch GRF Start should be the same for all shaders on gfx5 */ - if (brw_wm_state_has_ksp(wm, 1)) { - assert(wm_prog_data->base.dispatch_grf_start_reg == - brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, wm, 1)); - } - if (brw_wm_state_has_ksp(wm, 2)) { - assert(wm_prog_data->base.dispatch_grf_start_reg == - brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, wm, 2)); - } -#elif GFX_VER == 6 - /* On gfx6, we have multiple shader kernels and we no longer specify a - * register count for each one. - */ - wm.KernelStartPointer0 = stage_state->prog_offset + - brw_wm_prog_data_prog_offset(wm_prog_data, wm, 0); - wm.KernelStartPointer1 = stage_state->prog_offset + - brw_wm_prog_data_prog_offset(wm_prog_data, wm, 1); - wm.KernelStartPointer2 = stage_state->prog_offset + - brw_wm_prog_data_prog_offset(wm_prog_data, wm, 2); - - wm.DispatchGRFStartRegisterForConstantSetupData0 = - brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, wm, 0); - wm.DispatchGRFStartRegisterForConstantSetupData1 = - brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, wm, 1); - wm.DispatchGRFStartRegisterForConstantSetupData2 = - brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, wm, 2); -#endif - -#if GFX_VER <= 5 - wm.ConstantURBEntryReadLength = wm_prog_data->base.curb_read_length; - /* BRW_NEW_PUSH_CONSTANT_ALLOCATION */ - wm.ConstantURBEntryReadOffset = brw->curbe.wm_start * 2; - wm.SetupURBEntryReadLength = wm_prog_data->num_varying_inputs * 2; - wm.SetupURBEntryReadOffset = 0; - wm.EarlyDepthTestEnable = true; -#endif - -#if GFX_VER >= 6 - wm.LineAntialiasingRegionWidth = _10pixels; - wm.LineEndCapAntialiasingRegionWidth = _05pixels; - - wm.PointRasterizationRule = RASTRULE_UPPER_RIGHT; - wm.BarycentricInterpolationMode = wm_prog_data->barycentric_interp_modes; -#else - if (stage_state->sampler_count) - wm.SamplerStatePointer = - ro_bo(brw->batch.state.bo, stage_state->sampler_offset); - - wm.LineAntialiasingRegionWidth = _05pixels; - wm.LineEndCapAntialiasingRegionWidth = _10pixels; - - /* _NEW_POLYGON */ - if (ctx->Polygon.OffsetFill) { - wm.GlobalDepthOffsetEnable = true; - /* Something weird going on with legacy_global_depth_bias, - * offset_constant, scaling and MRD. This value passes glean - * but gives some odd results elsewere (eg. the - * quad-offset-units test). - */ - wm.GlobalDepthOffsetConstant = ctx->Polygon.OffsetUnits * 2; - - /* This is the only value that passes glean: - */ - wm.GlobalDepthOffsetScale = ctx->Polygon.OffsetFactor; - } - - wm.DepthCoefficientURBReadOffset = 1; -#endif - - /* BRW_NEW_STATS_WM */ - wm.StatisticsEnable = GFX_VER >= 6 || brw->stats_wm; - -#if GFX_VER < 7 - if (wm_prog_data->base.use_alt_mode) - wm.FloatingPointMode = FLOATING_POINT_MODE_Alternate; - - wm.SamplerCount = GFX_VER == 5 ? - 0 : DIV_ROUND_UP(stage_state->sampler_count, 4); - - wm.BindingTableEntryCount = - wm_prog_data->base.binding_table.size_bytes / 4; - wm.MaximumNumberofThreads = devinfo->max_wm_threads - 1; - -#if GFX_VER == 6 - wm.DualSourceBlendEnable = - wm_prog_data->dual_src_blend && (ctx->Color.BlendEnabled & 1) && - ctx->Color._BlendUsesDualSrc & 0x1; - wm.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask; - wm.NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs; - - /* From the SNB PRM, volume 2 part 1, page 281: - * "If the PS kernel does not need the Position XY Offsets - * to compute a Position XY value, then this field should be - * programmed to POSOFFSET_NONE." - * - * "SW Recommendation: If the PS kernel needs the Position Offsets - * to compute a Position XY value, this field should match Position - * ZW Interpolation Mode to ensure a consistent position.xyzw - * computation." - * We only require XY sample offsets. So, this recommendation doesn't - * look useful at the moment. We might need this in future. - */ - if (wm_prog_data->uses_pos_offset) - wm.PositionXYOffsetSelect = POSOFFSET_SAMPLE; - else - wm.PositionXYOffsetSelect = POSOFFSET_NONE; -#endif - - if (wm_prog_data->base.total_scratch) { - wm.ScratchSpaceBasePointer = rw_32_bo(stage_state->scratch_bo, 0); - wm.PerThreadScratchSpace = - ffs(stage_state->per_thread_scratch) - 11; - } - - wm.PixelShaderComputedDepth = writes_depth; -#endif - - /* _NEW_LINE */ - wm.LineStippleEnable = ctx->Line.StippleFlag; - - /* _NEW_POLYGON */ - wm.PolygonStippleEnable = ctx->Polygon.StippleFlag; - -#if GFX_VER < 8 - -#if GFX_VER >= 6 - wm.PixelShaderUsesSourceW = wm_prog_data->uses_src_w; - - /* _NEW_BUFFERS */ - const bool multisampled_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1; - - if (multisampled_fbo) { - /* _NEW_MULTISAMPLE */ - if (ctx->Multisample.Enabled) - wm.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN; - else - wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL; - - if (wm_prog_data->persample_dispatch) - wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE; - else - wm.MultisampleDispatchMode = MSDISPMODE_PERPIXEL; - } else { - wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL; - wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE; - } -#endif - wm.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth; - if (wm_prog_data->uses_kill || - _mesa_is_alpha_test_enabled(ctx) || - _mesa_is_alpha_to_coverage_enabled(ctx) || - (GFX_VER >= 6 && wm_prog_data->uses_omask)) { - wm.PixelShaderKillsPixel = true; - } - - /* _NEW_BUFFERS | _NEW_COLOR */ - if (brw_color_buffer_write_enabled(brw) || writes_depth || - wm.PixelShaderKillsPixel || - (GFX_VER >= 6 && wm_prog_data->has_side_effects)) { - wm.ThreadDispatchEnable = true; - } - -#if GFX_VER >= 7 - wm.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode; - wm.PixelShaderUsesInputCoverageMask = wm_prog_data->uses_sample_mask; -#endif - - /* The "UAV access enable" bits are unnecessary on HSW because they only - * seem to have an effect on the HW-assisted coherency mechanism which we - * don't need, and the rasterization-related UAV_ONLY flag and the - * DISPATCH_ENABLE bit can be set independently from it. - * C.f. gfx8_upload_ps_extra(). - * - * BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_FS_PROG_DATA | _NEW_BUFFERS | - * _NEW_COLOR - */ -#if GFX_VERx10 == 75 - if (!(brw_color_buffer_write_enabled(brw) || writes_depth) && - wm_prog_data->has_side_effects) - wm.PSUAVonly = ON; -#endif -#endif - -#if GFX_VER >= 7 - /* BRW_NEW_FS_PROG_DATA */ - if (wm_prog_data->early_fragment_tests) - wm.EarlyDepthStencilControl = EDSC_PREPS; - else if (wm_prog_data->has_side_effects) - wm.EarlyDepthStencilControl = EDSC_PSEXEC; -#endif - } - -#if GFX_VER <= 5 - if (brw->wm.offset_clamp != ctx->Polygon.OffsetClamp) { - brw_batch_emit(brw, GENX(3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP), clamp) { - clamp.GlobalDepthOffsetClamp = ctx->Polygon.OffsetClamp; - } - - brw->wm.offset_clamp = ctx->Polygon.OffsetClamp; - } -#endif -} - -static const struct brw_tracked_state genX(wm_state) = { - .dirty = { - .mesa = _NEW_LINE | - _NEW_POLYGON | - (GFX_VER < 8 ? _NEW_BUFFERS | - _NEW_COLOR : - 0) | - (GFX_VER == 6 ? _NEW_PROGRAM_CONSTANTS : 0) | - (GFX_VER < 6 ? _NEW_POLYGONSTIPPLE : 0) | - (GFX_VER < 8 && GFX_VER >= 6 ? _NEW_MULTISAMPLE : 0), - .brw = BRW_NEW_BLORP | - BRW_NEW_FS_PROG_DATA | - (GFX_VER < 6 ? BRW_NEW_PUSH_CONSTANT_ALLOCATION | - BRW_NEW_FRAGMENT_PROGRAM | - BRW_NEW_PROGRAM_CACHE | - BRW_NEW_SAMPLER_STATE_TABLE | - BRW_NEW_STATS_WM - : 0) | - (GFX_VER < 7 ? BRW_NEW_BATCH : BRW_NEW_CONTEXT), - }, - .emit = genX(upload_wm), -}; - -/* ---------------------------------------------------------------------- */ - -/* We restrict scratch buffers to the bottom 32 bits of the address space - * by using rw_32_bo(). - * - * General State Base Address is a bit broken. If the address + size as - * seen by STATE_BASE_ADDRESS overflows 48 bits, the GPU appears to treat - * all accesses to the buffer as being out of bounds and returns zero. - */ - -#define INIT_THREAD_DISPATCH_FIELDS(pkt, prefix) \ - pkt.KernelStartPointer = KSP(brw, stage_state->prog_offset); \ - /* Wa_1606682166 */ \ - pkt.SamplerCount = \ - GFX_VER == 11 ? \ - 0 : \ - DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4); \ - pkt.BindingTableEntryCount = \ - stage_prog_data->binding_table.size_bytes / 4; \ - pkt.FloatingPointMode = stage_prog_data->use_alt_mode; \ - \ - if (stage_prog_data->total_scratch) { \ - pkt.ScratchSpaceBasePointer = rw_32_bo(stage_state->scratch_bo, 0); \ - pkt.PerThreadScratchSpace = \ - ffs(stage_state->per_thread_scratch) - 11; \ - } \ - \ - pkt.DispatchGRFStartRegisterForURBData = \ - stage_prog_data->dispatch_grf_start_reg; \ - pkt.prefix##URBEntryReadLength = vue_prog_data->urb_read_length; \ - pkt.prefix##URBEntryReadOffset = 0; \ - \ - pkt.StatisticsEnable = true; \ - pkt.Enable = true; - -static void -genX(upload_vs_state)(struct brw_context *brw) -{ - UNUSED struct gl_context *ctx = &brw->ctx; - const struct intel_device_info *devinfo = &brw->screen->devinfo; - struct brw_stage_state *stage_state = &brw->vs.base; - - /* BRW_NEW_VS_PROG_DATA */ - const struct brw_vue_prog_data *vue_prog_data = - brw_vue_prog_data(brw->vs.base.prog_data); - const struct brw_stage_prog_data *stage_prog_data = &vue_prog_data->base; - - assert(vue_prog_data->dispatch_mode == DISPATCH_MODE_SIMD8 || - vue_prog_data->dispatch_mode == DISPATCH_MODE_4X2_DUAL_OBJECT); - assert(GFX_VER < 11 || - vue_prog_data->dispatch_mode == DISPATCH_MODE_SIMD8); - -#if GFX_VER == 6 - /* From the BSpec, 3D Pipeline > Geometry > Vertex Shader > State, - * 3DSTATE_VS, Dword 5.0 "VS Function Enable": - * - * [DevSNB] A pipeline flush must be programmed prior to a 3DSTATE_VS - * command that causes the VS Function Enable to toggle. Pipeline - * flush can be executed by sending a PIPE_CONTROL command with CS - * stall bit set and a post sync operation. - * - * We've already done such a flush at the start of state upload, so we - * don't need to do another one here. - */ - brw_batch_emit(brw, GENX(3DSTATE_CONSTANT_VS), cvs) { - if (stage_state->push_const_size != 0) { - cvs.Buffer0Valid = true; - cvs.ConstantBody.PointertoConstantBuffer0 = stage_state->push_const_offset; - cvs.ConstantBody.ConstantBuffer0ReadLength = stage_state->push_const_size - 1; - } - } -#endif - - if (GFX_VER == 7 && devinfo->platform == INTEL_PLATFORM_IVB) - gfx7_emit_vs_workaround_flush(brw); - -#if GFX_VER >= 6 - brw_batch_emit(brw, GENX(3DSTATE_VS), vs) { -#else - ctx->NewDriverState |= BRW_NEW_GFX4_UNIT_STATE; - brw_state_emit(brw, GENX(VS_STATE), 32, &stage_state->state_offset, vs) { -#endif - INIT_THREAD_DISPATCH_FIELDS(vs, Vertex); - - vs.MaximumNumberofThreads = devinfo->max_vs_threads - 1; - -#if GFX_VER < 6 - vs.GRFRegisterCount = DIV_ROUND_UP(vue_prog_data->total_grf, 16) - 1; - vs.ConstantURBEntryReadLength = stage_prog_data->curb_read_length; - vs.ConstantURBEntryReadOffset = brw->curbe.vs_start * 2; - - vs.NumberofURBEntries = brw->urb.nr_vs_entries >> (GFX_VER == 5 ? 2 : 0); - vs.URBEntryAllocationSize = brw->urb.vsize - 1; - - vs.MaximumNumberofThreads = - CLAMP(brw->urb.nr_vs_entries / 2, 1, devinfo->max_vs_threads) - 1; - - vs.StatisticsEnable = false; - vs.SamplerStatePointer = - ro_bo(brw->batch.state.bo, stage_state->sampler_offset); -#endif - -#if GFX_VER == 5 - /* Force single program flow on Ironlake. We cannot reliably get - * all applications working without it. See: - * https://bugs.freedesktop.org/show_bug.cgi?id=29172 - * - * The most notable and reliably failing application is the Humus - * demo "CelShading" - */ - vs.SingleProgramFlow = true; - vs.SamplerCount = 0; /* hardware requirement */ -#endif - -#if GFX_VER >= 8 - vs.SIMD8DispatchEnable = - vue_prog_data->dispatch_mode == DISPATCH_MODE_SIMD8; - - vs.UserClipDistanceCullTestEnableBitmask = - vue_prog_data->cull_distance_mask; -#endif - } - -#if GFX_VER == 6 - /* Based on my reading of the simulator, the VS constants don't get - * pulled into the VS FF unit until an appropriate pipeline flush - * happens, and instead the 3DSTATE_CONSTANT_VS packet just adds - * references to them into a little FIFO. The flushes are common, - * but don't reliably happen between this and a 3DPRIMITIVE, causing - * the primitive to use the wrong constants. Then the FIFO - * containing the constant setup gets added to again on the next - * constants change, and eventually when a flush does happen the - * unit is overwhelmed by constant changes and dies. - * - * To avoid this, send a PIPE_CONTROL down the line that will - * update the unit immediately loading the constants. The flush - * type bits here were those set by the STATE_BASE_ADDRESS whose - * move in a82a43e8d99e1715dd11c9c091b5ab734079b6a6 triggered the - * bug reports that led to this workaround, and may be more than - * what is strictly required to avoid the issue. - */ - brw_emit_pipe_control_flush(brw, - PIPE_CONTROL_DEPTH_STALL | - PIPE_CONTROL_INSTRUCTION_INVALIDATE | - PIPE_CONTROL_STATE_CACHE_INVALIDATE); -#endif -} - -static const struct brw_tracked_state genX(vs_state) = { - .dirty = { - .mesa = (GFX_VER == 6 ? (_NEW_PROGRAM_CONSTANTS | _NEW_TRANSFORM) : 0), - .brw = BRW_NEW_BATCH | - BRW_NEW_BLORP | - BRW_NEW_CONTEXT | - BRW_NEW_VS_PROG_DATA | - (GFX_VER == 6 ? BRW_NEW_VERTEX_PROGRAM : 0) | - (GFX_VER <= 5 ? BRW_NEW_PUSH_CONSTANT_ALLOCATION | - BRW_NEW_PROGRAM_CACHE | - BRW_NEW_SAMPLER_STATE_TABLE | - BRW_NEW_URB_FENCE - : 0), - }, - .emit = genX(upload_vs_state), -}; - -/* ---------------------------------------------------------------------- */ - -static void -genX(upload_cc_viewport)(struct brw_context *brw) -{ - struct gl_context *ctx = &brw->ctx; - - /* BRW_NEW_VIEWPORT_COUNT */ - const unsigned viewport_count = brw->clip.viewport_count; - - struct GENX(CC_VIEWPORT) ccv; - uint32_t cc_vp_offset; - uint32_t *cc_map = - brw_state_batch(brw, 4 * GENX(CC_VIEWPORT_length) * viewport_count, - 32, &cc_vp_offset); - - for (unsigned i = 0; i < viewport_count; i++) { - /* _NEW_VIEWPORT | _NEW_TRANSFORM */ - const struct gl_viewport_attrib *vp = &ctx->ViewportArray[i]; - if (ctx->Transform.DepthClampNear && ctx->Transform.DepthClampFar) { - ccv.MinimumDepth = MIN2(vp->Near, vp->Far); - ccv.MaximumDepth = MAX2(vp->Near, vp->Far); - } else if (ctx->Transform.DepthClampNear) { - ccv.MinimumDepth = MIN2(vp->Near, vp->Far); - ccv.MaximumDepth = 0.0; - } else if (ctx->Transform.DepthClampFar) { - ccv.MinimumDepth = 0.0; - ccv.MaximumDepth = MAX2(vp->Near, vp->Far); - } else { - ccv.MinimumDepth = 0.0; - ccv.MaximumDepth = 1.0; - } - GENX(CC_VIEWPORT_pack)(NULL, cc_map, &ccv); - cc_map += GENX(CC_VIEWPORT_length); - } - -#if GFX_VER >= 7 - brw_batch_emit(brw, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), ptr) { - ptr.CCViewportPointer = cc_vp_offset; - } -#elif GFX_VER == 6 - brw_batch_emit(brw, GENX(3DSTATE_VIEWPORT_STATE_POINTERS), vp) { - vp.CCViewportStateChange = 1; - vp.PointertoCC_VIEWPORT = cc_vp_offset; - } -#else - brw->cc.vp_offset = cc_vp_offset; - ctx->NewDriverState |= BRW_NEW_CC_VP; -#endif -} - -const struct brw_tracked_state genX(cc_vp) = { - .dirty = { - .mesa = _NEW_TRANSFORM | - _NEW_VIEWPORT, - .brw = BRW_NEW_BATCH | - BRW_NEW_BLORP | - BRW_NEW_VIEWPORT_COUNT, - }, - .emit = genX(upload_cc_viewport) -}; - -/* ---------------------------------------------------------------------- */ - -static void -set_scissor_bits(const struct gl_context *ctx, int i, - bool flip_y, unsigned fb_width, unsigned fb_height, - struct GENX(SCISSOR_RECT) *sc) -{ - int bbox[4]; - - bbox[0] = MAX2(ctx->ViewportArray[i].X, 0); - bbox[1] = MIN2(bbox[0] + ctx->ViewportArray[i].Width, fb_width); - bbox[2] = CLAMP(ctx->ViewportArray[i].Y, 0, fb_height); - bbox[3] = MIN2(bbox[2] + ctx->ViewportArray[i].Height, fb_height); - _mesa_intersect_scissor_bounding_box(ctx, i, bbox); - - if (bbox[0] == bbox[1] || bbox[2] == bbox[3]) { - /* If the scissor was out of bounds and got clamped to 0 width/height - * at the bounds, the subtraction of 1 from maximums could produce a - * negative number and thus not clip anything. Instead, just provide - * a min > max scissor inside the bounds, which produces the expected - * no rendering. - */ - sc->ScissorRectangleXMin = 1; - sc->ScissorRectangleXMax = 0; - sc->ScissorRectangleYMin = 1; - sc->ScissorRectangleYMax = 0; - } else if (!flip_y) { - /* texmemory: Y=0=bottom */ - sc->ScissorRectangleXMin = bbox[0]; - sc->ScissorRectangleXMax = bbox[1] - 1; - sc->ScissorRectangleYMin = bbox[2]; - sc->ScissorRectangleYMax = bbox[3] - 1; - } else { - /* memory: Y=0=top */ - sc->ScissorRectangleXMin = bbox[0]; - sc->ScissorRectangleXMax = bbox[1] - 1; - sc->ScissorRectangleYMin = fb_height - bbox[3]; - sc->ScissorRectangleYMax = fb_height - bbox[2] - 1; - } -} - -#if GFX_VER >= 6 -static void -genX(upload_scissor_state)(struct brw_context *brw) -{ - struct gl_context *ctx = &brw->ctx; - const bool flip_y = ctx->DrawBuffer->FlipY; - struct GENX(SCISSOR_RECT) scissor; - uint32_t scissor_state_offset; - const unsigned int fb_width = _mesa_geometric_width(ctx->DrawBuffer); - const unsigned int fb_height = _mesa_geometric_height(ctx->DrawBuffer); - uint32_t *scissor_map; - - /* BRW_NEW_VIEWPORT_COUNT */ - const unsigned viewport_count = brw->clip.viewport_count; - /* Wa_1409725701: - * "The viewport-specific state used by the SF unit (SCISSOR_RECT) is - * stored as an array of up to 16 elements. The location of first - * element of the array, as specified by Pointer to SCISSOR_RECT, should - * be aligned to a 64-byte boundary. - */ - const unsigned alignment = 64; - scissor_map = brw_state_batch( - brw, GENX(SCISSOR_RECT_length) * sizeof(uint32_t) * viewport_count, - alignment, &scissor_state_offset); - - /* _NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT */ - - /* The scissor only needs to handle the intersection of drawable and - * scissor rect. Clipping to the boundaries of static shared buffers - * for front/back/depth is covered by looping over cliprects in brw_draw.c. - * - * Note that the hardware's coordinates are inclusive, while Mesa's min is - * inclusive but max is exclusive. - */ - for (unsigned i = 0; i < viewport_count; i++) { - set_scissor_bits(ctx, i, flip_y, fb_width, fb_height, &scissor); - GENX(SCISSOR_RECT_pack)( - NULL, scissor_map + i * GENX(SCISSOR_RECT_length), &scissor); - } - - brw_batch_emit(brw, GENX(3DSTATE_SCISSOR_STATE_POINTERS), ptr) { - ptr.ScissorRectPointer = scissor_state_offset; - } -} - -static const struct brw_tracked_state genX(scissor_state) = { - .dirty = { - .mesa = _NEW_BUFFERS | - _NEW_SCISSOR | - _NEW_VIEWPORT, - .brw = BRW_NEW_BATCH | - BRW_NEW_BLORP | - BRW_NEW_VIEWPORT_COUNT, - }, - .emit = genX(upload_scissor_state), -}; -#endif - -/* ---------------------------------------------------------------------- */ - -static void -genX(upload_sf_clip_viewport)(struct brw_context *brw) -{ - struct gl_context *ctx = &brw->ctx; - float y_scale, y_bias; - - /* BRW_NEW_VIEWPORT_COUNT */ - const unsigned viewport_count = brw->clip.viewport_count; - - /* _NEW_BUFFERS */ - const bool flip_y = ctx->DrawBuffer->FlipY; - const uint32_t fb_width = (float)_mesa_geometric_width(ctx->DrawBuffer); - const uint32_t fb_height = (float)_mesa_geometric_height(ctx->DrawBuffer); - -#if GFX_VER >= 7 -#define clv sfv - struct GENX(SF_CLIP_VIEWPORT) sfv; - uint32_t sf_clip_vp_offset; - uint32_t *sf_clip_map = - brw_state_batch(brw, GENX(SF_CLIP_VIEWPORT_length) * 4 * viewport_count, - 64, &sf_clip_vp_offset); -#else - struct GENX(SF_VIEWPORT) sfv; - struct GENX(CLIP_VIEWPORT) clv; - uint32_t sf_vp_offset, clip_vp_offset; - uint32_t *sf_map = - brw_state_batch(brw, GENX(SF_VIEWPORT_length) * 4 * viewport_count, - 32, &sf_vp_offset); - uint32_t *clip_map = - brw_state_batch(brw, GENX(CLIP_VIEWPORT_length) * 4 * viewport_count, - 32, &clip_vp_offset); -#endif - - /* _NEW_BUFFERS */ - if (flip_y) { - y_scale = -1.0; - y_bias = (float)fb_height; - } else { - y_scale = 1.0; - y_bias = 0; - } - - for (unsigned i = 0; i < brw->clip.viewport_count; i++) { - /* _NEW_VIEWPORT: Guardband Clipping */ - float scale[3], translate[3], gb_xmin, gb_xmax, gb_ymin, gb_ymax; - _mesa_get_viewport_xform(ctx, i, scale, translate); - - sfv.ViewportMatrixElementm00 = scale[0]; - sfv.ViewportMatrixElementm11 = scale[1] * y_scale, - sfv.ViewportMatrixElementm22 = scale[2], - sfv.ViewportMatrixElementm30 = translate[0], - sfv.ViewportMatrixElementm31 = translate[1] * y_scale + y_bias, - sfv.ViewportMatrixElementm32 = translate[2], - intel_calculate_guardband_size(fb_width, fb_height, - sfv.ViewportMatrixElementm00, - sfv.ViewportMatrixElementm11, - sfv.ViewportMatrixElementm30, - sfv.ViewportMatrixElementm31, - &gb_xmin, &gb_xmax, &gb_ymin, &gb_ymax); - - - clv.XMinClipGuardband = gb_xmin; - clv.XMaxClipGuardband = gb_xmax; - clv.YMinClipGuardband = gb_ymin; - clv.YMaxClipGuardband = gb_ymax; - -#if GFX_VER < 6 - set_scissor_bits(ctx, i, flip_y, fb_width, fb_height, - &sfv.ScissorRectangle); -#elif GFX_VER >= 8 - /* _NEW_VIEWPORT | _NEW_BUFFERS: Screen Space Viewport - * The hardware will take the intersection of the drawing rectangle, - * scissor rectangle, and the viewport extents. However, emitting - * 3DSTATE_DRAWING_RECTANGLE is expensive since it requires a full - * pipeline stall so we're better off just being a little more clever - * with our viewport so we can emit it once at context creation time. - */ - const float viewport_Xmin = MAX2(ctx->ViewportArray[i].X, 0); - const float viewport_Ymin = MAX2(ctx->ViewportArray[i].Y, 0); - const float viewport_Xmax = - MIN2(ctx->ViewportArray[i].X + ctx->ViewportArray[i].Width, fb_width); - const float viewport_Ymax = - MIN2(ctx->ViewportArray[i].Y + ctx->ViewportArray[i].Height, fb_height); - - if (flip_y) { - sfv.XMinViewPort = viewport_Xmin; - sfv.XMaxViewPort = viewport_Xmax - 1; - sfv.YMinViewPort = fb_height - viewport_Ymax; - sfv.YMaxViewPort = fb_height - viewport_Ymin - 1; - } else { - sfv.XMinViewPort = viewport_Xmin; - sfv.XMaxViewPort = viewport_Xmax - 1; - sfv.YMinViewPort = viewport_Ymin; - sfv.YMaxViewPort = viewport_Ymax - 1; - } -#endif - -#if GFX_VER >= 7 - GENX(SF_CLIP_VIEWPORT_pack)(NULL, sf_clip_map, &sfv); - sf_clip_map += GENX(SF_CLIP_VIEWPORT_length); -#else - GENX(SF_VIEWPORT_pack)(NULL, sf_map, &sfv); - GENX(CLIP_VIEWPORT_pack)(NULL, clip_map, &clv); - sf_map += GENX(SF_VIEWPORT_length); - clip_map += GENX(CLIP_VIEWPORT_length); -#endif - } - -#if GFX_VER >= 7 - brw_batch_emit(brw, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), ptr) { - ptr.SFClipViewportPointer = sf_clip_vp_offset; - } -#elif GFX_VER == 6 - brw_batch_emit(brw, GENX(3DSTATE_VIEWPORT_STATE_POINTERS), vp) { - vp.SFViewportStateChange = 1; - vp.CLIPViewportStateChange = 1; - vp.PointertoCLIP_VIEWPORT = clip_vp_offset; - vp.PointertoSF_VIEWPORT = sf_vp_offset; - } -#else - brw->sf.vp_offset = sf_vp_offset; - brw->clip.vp_offset = clip_vp_offset; - brw->ctx.NewDriverState |= BRW_NEW_SF_VP | BRW_NEW_CLIP_VP; -#endif -} - -static const struct brw_tracked_state genX(sf_clip_viewport) = { - .dirty = { - .mesa = _NEW_BUFFERS | - _NEW_VIEWPORT | - (GFX_VER <= 5 ? _NEW_SCISSOR : 0), - .brw = BRW_NEW_BATCH | - BRW_NEW_BLORP | - BRW_NEW_VIEWPORT_COUNT, - }, - .emit = genX(upload_sf_clip_viewport), -}; - -/* ---------------------------------------------------------------------- */ - -static void -genX(upload_gs_state)(struct brw_context *brw) -{ - UNUSED struct gl_context *ctx = &brw->ctx; - UNUSED const struct intel_device_info *devinfo = &brw->screen->devinfo; - const struct brw_stage_state *stage_state = &brw->gs.base; - const struct gl_program *gs_prog = brw->programs[MESA_SHADER_GEOMETRY]; - /* BRW_NEW_GEOMETRY_PROGRAM */ - bool active = GFX_VER >= 6 && gs_prog; - - /* BRW_NEW_GS_PROG_DATA */ - struct brw_stage_prog_data *stage_prog_data = stage_state->prog_data; - UNUSED const struct brw_vue_prog_data *vue_prog_data = - brw_vue_prog_data(stage_prog_data); -#if GFX_VER >= 7 - const struct brw_gs_prog_data *gs_prog_data = - brw_gs_prog_data(stage_prog_data); -#endif - -#if GFX_VER == 6 - brw_batch_emit(brw, GENX(3DSTATE_CONSTANT_GS), cgs) { - if (active && stage_state->push_const_size != 0) { - cgs.Buffer0Valid = true; - cgs.ConstantBody.PointertoConstantBuffer0 = stage_state->push_const_offset; - cgs.ConstantBody.ConstantBuffer0ReadLength = stage_state->push_const_size - 1; - } - } -#endif - -#if GFX_VERx10 == 70 - /** - * From Graphics BSpec: 3D-Media-GPGPU Engine > 3D Pipeline Stages > - * Geometry > Geometry Shader > State: - * - * "Note: Because of corruption in IVB:GT2, software needs to flush the - * whole fixed function pipeline when the GS enable changes value in - * the 3DSTATE_GS." - * - * The hardware architects have clarified that in this context "flush the - * whole fixed function pipeline" means to emit a PIPE_CONTROL with the "CS - * Stall" bit set. - */ - if (devinfo->gt == 2 && brw->gs.enabled != active) - gfx7_emit_cs_stall_flush(brw); -#endif - -#if GFX_VER >= 6 - brw_batch_emit(brw, GENX(3DSTATE_GS), gs) { -#else - ctx->NewDriverState |= BRW_NEW_GFX4_UNIT_STATE; - brw_state_emit(brw, GENX(GS_STATE), 32, &brw->ff_gs.state_offset, gs) { -#endif - -#if GFX_VER >= 6 - if (active) { - INIT_THREAD_DISPATCH_FIELDS(gs, Vertex); - -#if GFX_VER >= 7 - gs.OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1; - gs.OutputTopology = gs_prog_data->output_topology; - gs.ControlDataHeaderSize = - gs_prog_data->control_data_header_size_hwords; - - gs.InstanceControl = gs_prog_data->invocations - 1; - gs.DispatchMode = vue_prog_data->dispatch_mode; - - gs.IncludePrimitiveID = gs_prog_data->include_primitive_id; - - gs.ControlDataFormat = gs_prog_data->control_data_format; -#endif - - /* Note: the meaning of the GFX7_GS_REORDER_TRAILING bit changes between - * Ivy Bridge and Haswell. - * - * On Ivy Bridge, setting this bit causes the vertices of a triangle - * strip to be delivered to the geometry shader in an order that does - * not strictly follow the OpenGL spec, but preserves triangle - * orientation. For example, if the vertices are (1, 2, 3, 4, 5), then - * the geometry shader sees triangles: - * - * (1, 2, 3), (2, 4, 3), (3, 4, 5) - * - * (Clearing the bit is even worse, because it fails to preserve - * orientation). - * - * Triangle strips with adjacency always ordered in a way that preserves - * triangle orientation but does not strictly follow the OpenGL spec, - * regardless of the setting of this bit. - * - * On Haswell, both triangle strips and triangle strips with adjacency - * are always ordered in a way that preserves triangle orientation. - * Setting this bit causes the ordering to strictly follow the OpenGL - * spec. - * - * So in either case we want to set the bit. Unfortunately on Ivy - * Bridge this will get the order close to correct but not perfect. - */ - gs.ReorderMode = TRAILING; - gs.MaximumNumberofThreads = - GFX_VER == 8 ? (devinfo->max_gs_threads / 2 - 1) - : (devinfo->max_gs_threads - 1); - -#if GFX_VER < 7 - gs.SOStatisticsEnable = true; - if (gs_prog->info.has_transform_feedback_varyings) - gs.SVBIPayloadEnable = _mesa_is_xfb_active_and_unpaused(ctx); - - /* GFX6_GS_SPF_MODE and GFX6_GS_VECTOR_MASK_ENABLE are enabled as it - * was previously done for gfx6. - * - * TODO: test with both disabled to see if the HW is behaving - * as expected, like in gfx7. - */ - gs.SingleProgramFlow = true; - gs.VectorMaskEnable = true; -#endif - -#if GFX_VER >= 8 - gs.ExpectedVertexCount = gs_prog_data->vertices_in; - - if (gs_prog_data->static_vertex_count != -1) { - gs.StaticOutput = true; - gs.StaticOutputVertexCount = gs_prog_data->static_vertex_count; - } - gs.IncludeVertexHandles = vue_prog_data->include_vue_handles; - - gs.UserClipDistanceCullTestEnableBitmask = - vue_prog_data->cull_distance_mask; - - const int urb_entry_write_offset = 1; - const uint32_t urb_entry_output_length = - DIV_ROUND_UP(vue_prog_data->vue_map.num_slots, 2) - - urb_entry_write_offset; - - gs.VertexURBEntryOutputReadOffset = urb_entry_write_offset; - gs.VertexURBEntryOutputLength = MAX2(urb_entry_output_length, 1); -#endif - } -#endif - -#if GFX_VER <= 6 - if (!active && brw->ff_gs.prog_active) { - /* In gfx6, transform feedback for the VS stage is done with an - * ad-hoc GS program. This function provides the needed 3DSTATE_GS - * for this. - */ - gs.KernelStartPointer = KSP(brw, brw->ff_gs.prog_offset); - gs.SingleProgramFlow = true; - gs.DispatchGRFStartRegisterForURBData = GFX_VER == 6 ? 2 : 1; - gs.VertexURBEntryReadLength = brw->ff_gs.prog_data->urb_read_length; - -#if GFX_VER <= 5 - gs.GRFRegisterCount = - DIV_ROUND_UP(brw->ff_gs.prog_data->total_grf, 16) - 1; - /* BRW_NEW_URB_FENCE */ - gs.NumberofURBEntries = brw->urb.nr_gs_entries; - gs.URBEntryAllocationSize = brw->urb.vsize - 1; - gs.MaximumNumberofThreads = brw->urb.nr_gs_entries >= 8 ? 1 : 0; - gs.FloatingPointMode = FLOATING_POINT_MODE_Alternate; -#else - gs.Enable = true; - gs.VectorMaskEnable = true; - gs.SVBIPayloadEnable = true; - gs.SVBIPostIncrementEnable = true; - gs.SVBIPostIncrementValue = - brw->ff_gs.prog_data->svbi_postincrement_value; - gs.SOStatisticsEnable = true; - gs.MaximumNumberofThreads = devinfo->max_gs_threads - 1; -#endif - } -#endif - if (!active && !brw->ff_gs.prog_active) { -#if GFX_VER < 8 - gs.DispatchGRFStartRegisterForURBData = 1; -#if GFX_VER >= 7 - gs.IncludeVertexHandles = true; -#endif -#endif - } - -#if GFX_VER >= 6 - gs.StatisticsEnable = true; -#endif -#if GFX_VER == 5 || GFX_VER == 6 - gs.RenderingEnabled = true; -#endif -#if GFX_VER <= 5 - gs.MaximumVPIndex = brw->clip.viewport_count - 1; -#endif - } - -#if GFX_VER == 6 - brw->gs.enabled = active; -#endif -} - -static const struct brw_tracked_state genX(gs_state) = { - .dirty = { - .mesa = (GFX_VER == 6 ? _NEW_PROGRAM_CONSTANTS : 0), - .brw = BRW_NEW_BATCH | - BRW_NEW_BLORP | - (GFX_VER <= 5 ? BRW_NEW_PUSH_CONSTANT_ALLOCATION | - BRW_NEW_PROGRAM_CACHE | - BRW_NEW_URB_FENCE | - BRW_NEW_VIEWPORT_COUNT - : 0) | - (GFX_VER >= 6 ? BRW_NEW_CONTEXT | - BRW_NEW_GEOMETRY_PROGRAM | - BRW_NEW_GS_PROG_DATA - : 0) | - (GFX_VER < 7 ? BRW_NEW_FF_GS_PROG_DATA : 0), - }, - .emit = genX(upload_gs_state), -}; - -/* ---------------------------------------------------------------------- */ - -UNUSED static GLenum -fix_dual_blend_alpha_to_one(GLenum function) -{ - switch (function) { - case GL_SRC1_ALPHA: - return GL_ONE; - - case GL_ONE_MINUS_SRC1_ALPHA: - return GL_ZERO; - } - - return function; -} - -#define blend_factor(x) brw_translate_blend_factor(x) -#define blend_eqn(x) brw_translate_blend_equation(x) - -/** - * Modify blend function to force destination alpha to 1.0 - * - * If \c function specifies a blend function that uses destination alpha, - * replace it with a function that hard-wires destination alpha to 1.0. This - * is used when rendering to xRGB targets. - */ -static GLenum -brw_fix_xRGB_alpha(GLenum function) -{ - switch (function) { - case GL_DST_ALPHA: - return GL_ONE; - - case GL_ONE_MINUS_DST_ALPHA: - case GL_SRC_ALPHA_SATURATE: - return GL_ZERO; - } - - return function; -} - -#if GFX_VER >= 6 -typedef struct GENX(BLEND_STATE_ENTRY) BLEND_ENTRY_GENXML; -#else -typedef struct GENX(COLOR_CALC_STATE) BLEND_ENTRY_GENXML; -#endif - -UNUSED static bool -set_blend_entry_bits(struct brw_context *brw, BLEND_ENTRY_GENXML *entry, int i, - bool alpha_to_one) -{ - struct gl_context *ctx = &brw->ctx; - - /* _NEW_BUFFERS */ - const struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i]; - - bool independent_alpha_blend = false; - - /* Used for implementing the following bit of GL_EXT_texture_integer: - * "Per-fragment operations that require floating-point color - * components, including multisample alpha operations, alpha test, - * blending, and dithering, have no effect when the corresponding - * colors are written to an integer color buffer." - */ - const bool integer = ctx->DrawBuffer->_IntegerBuffers & (0x1 << i); - - const unsigned blend_enabled = GFX_VER >= 6 ? - ctx->Color.BlendEnabled & (1 << i) : ctx->Color.BlendEnabled; - - /* _NEW_COLOR */ - if (ctx->Color.ColorLogicOpEnabled) { - GLenum rb_type = rb ? _mesa_get_format_datatype(rb->Format) - : GL_UNSIGNED_NORMALIZED; - WARN_ONCE(ctx->Color.LogicOp != GL_COPY && - rb_type != GL_UNSIGNED_NORMALIZED && - rb_type != GL_FLOAT, "Ignoring %s logic op on %s " - "renderbuffer\n", - _mesa_enum_to_string(ctx->Color.LogicOp), - _mesa_enum_to_string(rb_type)); - if (GFX_VER >= 8 || rb_type == GL_UNSIGNED_NORMALIZED) { - entry->LogicOpEnable = true; - entry->LogicOpFunction = ctx->Color._LogicOp; - } - } else if (blend_enabled && - ctx->Color._AdvancedBlendMode == BLEND_NONE - && (GFX_VER <= 5 || !integer)) { - GLenum eqRGB = ctx->Color.Blend[i].EquationRGB; - GLenum eqA = ctx->Color.Blend[i].EquationA; - GLenum srcRGB = ctx->Color.Blend[i].SrcRGB; - GLenum dstRGB = ctx->Color.Blend[i].DstRGB; - GLenum srcA = ctx->Color.Blend[i].SrcA; - GLenum dstA = ctx->Color.Blend[i].DstA; - - if (eqRGB == GL_MIN || eqRGB == GL_MAX) - srcRGB = dstRGB = GL_ONE; - - if (eqA == GL_MIN || eqA == GL_MAX) - srcA = dstA = GL_ONE; - - /* Due to hardware limitations, the destination may have information - * in an alpha channel even when the format specifies no alpha - * channel. In order to avoid getting any incorrect blending due to - * that alpha channel, coerce the blend factors to values that will - * not read the alpha channel, but will instead use the correct - * implicit value for alpha. - */ - if (rb && !_mesa_base_format_has_channel(rb->_BaseFormat, - GL_TEXTURE_ALPHA_TYPE)) { - srcRGB = brw_fix_xRGB_alpha(srcRGB); - srcA = brw_fix_xRGB_alpha(srcA); - dstRGB = brw_fix_xRGB_alpha(dstRGB); - dstA = brw_fix_xRGB_alpha(dstA); - } - - /* From the BLEND_STATE docs, DWord 0, Bit 29 (AlphaToOne Enable): - * "If Dual Source Blending is enabled, this bit must be disabled." - * - * We override SRC1_ALPHA to ONE and ONE_MINUS_SRC1_ALPHA to ZERO, - * and leave it enabled anyway. - */ - if (GFX_VER >= 6 && ctx->Color._BlendUsesDualSrc & (1 << i) && alpha_to_one) { - srcRGB = fix_dual_blend_alpha_to_one(srcRGB); - srcA = fix_dual_blend_alpha_to_one(srcA); - dstRGB = fix_dual_blend_alpha_to_one(dstRGB); - dstA = fix_dual_blend_alpha_to_one(dstA); - } - - /* BRW_NEW_FS_PROG_DATA */ - const struct brw_wm_prog_data *wm_prog_data = - brw_wm_prog_data(brw->wm.base.prog_data); - - /* The Dual Source Blending documentation says: - * - * "If SRC1 is included in a src/dst blend factor and - * a DualSource RT Write message is not used, results - * are UNDEFINED. (This reflects the same restriction in DX APIs, - * where undefined results are produced if “o1” is not written - * by a PS – there are no default values defined). - * If SRC1 is not included in a src/dst blend factor, - * dual source blending must be disabled." - * - * There is no way to gracefully fix this undefined situation - * so we just disable the blending to prevent possible issues. - */ - entry->ColorBufferBlendEnable = - !(ctx->Color._BlendUsesDualSrc & 0x1) || wm_prog_data->dual_src_blend; - - entry->DestinationBlendFactor = blend_factor(dstRGB); - entry->SourceBlendFactor = blend_factor(srcRGB); - entry->DestinationAlphaBlendFactor = blend_factor(dstA); - entry->SourceAlphaBlendFactor = blend_factor(srcA); - entry->ColorBlendFunction = blend_eqn(eqRGB); - entry->AlphaBlendFunction = blend_eqn(eqA); - - if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) - independent_alpha_blend = true; - } - - return independent_alpha_blend; -} - -#if GFX_VER >= 6 -static void -genX(upload_blend_state)(struct brw_context *brw) -{ - struct gl_context *ctx = &brw->ctx; - int size; - - /* We need at least one BLEND_STATE written, because we might do - * thread dispatch even if _NumColorDrawBuffers is 0 (for example - * for computed depth or alpha test), which will do an FB write - * with render target 0, which will reference BLEND_STATE[0] for - * alpha test enable. - */ - int nr_draw_buffers = ctx->DrawBuffer->_NumColorDrawBuffers; - if (nr_draw_buffers == 0 && ctx->Color.AlphaEnabled) - nr_draw_buffers = 1; - - size = GENX(BLEND_STATE_ENTRY_length) * 4 * nr_draw_buffers; -#if GFX_VER >= 8 - size += GENX(BLEND_STATE_length) * 4; -#endif - - uint32_t *blend_map; - blend_map = brw_state_batch(brw, size, 64, &brw->cc.blend_state_offset); - -#if GFX_VER >= 8 - struct GENX(BLEND_STATE) blend = { 0 }; - { -#else - for (int i = 0; i < nr_draw_buffers; i++) { - struct GENX(BLEND_STATE_ENTRY) entry = { 0 }; -#define blend entry -#endif - /* OpenGL specification 3.3 (page 196), section 4.1.3 says: - * "If drawbuffer zero is not NONE and the buffer it references has an - * integer format, the SAMPLE_ALPHA_TO_COVERAGE and SAMPLE_ALPHA_TO_ONE - * operations are skipped." - */ - if (!(ctx->DrawBuffer->_IntegerBuffers & 0x1)) { - /* _NEW_MULTISAMPLE */ - if (_mesa_is_multisample_enabled(ctx)) { - if (ctx->Multisample.SampleAlphaToCoverage) { - blend.AlphaToCoverageEnable = true; - blend.AlphaToCoverageDitherEnable = GFX_VER >= 7; - } - if (ctx->Multisample.SampleAlphaToOne) - blend.AlphaToOneEnable = true; - } - - /* _NEW_COLOR */ - if (ctx->Color.AlphaEnabled) { - blend.AlphaTestEnable = true; - blend.AlphaTestFunction = - brw_translate_compare_func(ctx->Color.AlphaFunc); - } - - if (ctx->Color.DitherFlag) { - blend.ColorDitherEnable = true; - } - } - -#if GFX_VER >= 8 - for (int i = 0; i < nr_draw_buffers; i++) { - struct GENX(BLEND_STATE_ENTRY) entry = { 0 }; -#else - { -#endif - blend.IndependentAlphaBlendEnable = - set_blend_entry_bits(brw, &entry, i, blend.AlphaToOneEnable) || - blend.IndependentAlphaBlendEnable; - - /* See section 8.1.6 "Pre-Blend Color Clamping" of the - * SandyBridge PRM Volume 2 Part 1 for HW requirements. - * - * We do our ARB_color_buffer_float CLAMP_FRAGMENT_COLOR - * clamping in the fragment shader. For its clamping of - * blending, the spec says: - * - * "RESOLVED: For fixed-point color buffers, the inputs and - * the result of the blending equation are clamped. For - * floating-point color buffers, no clamping occurs." - * - * So, generally, we want clamping to the render target's range. - * And, good news, the hardware tables for both pre- and - * post-blend color clamping are either ignored, or any are - * allowed, or clamping is required but RT range clamping is a - * valid option. - */ - entry.PreBlendColorClampEnable = true; - entry.PostBlendColorClampEnable = true; - entry.ColorClampRange = COLORCLAMP_RTFORMAT; - - entry.WriteDisableRed = !GET_COLORMASK_BIT(ctx->Color.ColorMask, i, 0); - entry.WriteDisableGreen = !GET_COLORMASK_BIT(ctx->Color.ColorMask, i, 1); - entry.WriteDisableBlue = !GET_COLORMASK_BIT(ctx->Color.ColorMask, i, 2); - entry.WriteDisableAlpha = !GET_COLORMASK_BIT(ctx->Color.ColorMask, i, 3); - -#if GFX_VER >= 8 - GENX(BLEND_STATE_ENTRY_pack)(NULL, &blend_map[1 + i * 2], &entry); -#else - GENX(BLEND_STATE_ENTRY_pack)(NULL, &blend_map[i * 2], &entry); -#endif - } - } - -#if GFX_VER >= 8 - GENX(BLEND_STATE_pack)(NULL, blend_map, &blend); -#endif - -#if GFX_VER < 7 - brw_batch_emit(brw, GENX(3DSTATE_CC_STATE_POINTERS), ptr) { - ptr.PointertoBLEND_STATE = brw->cc.blend_state_offset; - ptr.BLEND_STATEChange = true; - } -#else - brw_batch_emit(brw, GENX(3DSTATE_BLEND_STATE_POINTERS), ptr) { - ptr.BlendStatePointer = brw->cc.blend_state_offset; -#if GFX_VER >= 8 - ptr.BlendStatePointerValid = true; -#endif - } -#endif -} - -UNUSED static const struct brw_tracked_state genX(blend_state) = { - .dirty = { - .mesa = _NEW_BUFFERS | - _NEW_COLOR | - _NEW_MULTISAMPLE, - .brw = BRW_NEW_BATCH | - BRW_NEW_BLORP | - BRW_NEW_FS_PROG_DATA | - BRW_NEW_STATE_BASE_ADDRESS, - }, - .emit = genX(upload_blend_state), -}; -#endif - -/* ---------------------------------------------------------------------- */ - -#if GFX_VER >= 7 -UNUSED static const uint32_t push_constant_opcodes[] = { - [MESA_SHADER_VERTEX] = 21, - [MESA_SHADER_TESS_CTRL] = 25, /* HS */ - [MESA_SHADER_TESS_EVAL] = 26, /* DS */ - [MESA_SHADER_GEOMETRY] = 22, - [MESA_SHADER_FRAGMENT] = 23, - [MESA_SHADER_COMPUTE] = 0, -}; - -static void -genX(upload_push_constant_packets)(struct brw_context *brw) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - struct gl_context *ctx = &brw->ctx; - - UNUSED uint32_t mocs = brw_mocs(&brw->isl_dev, NULL); - - struct brw_stage_state *stage_states[] = { - &brw->vs.base, - &brw->tcs.base, - &brw->tes.base, - &brw->gs.base, - &brw->wm.base, - }; - - - if (GFX_VERx10 == 70 && - devinfo->platform == INTEL_PLATFORM_IVB && - stage_states[MESA_SHADER_VERTEX]->push_constants_dirty) - gfx7_emit_vs_workaround_flush(brw); - - for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) { - struct brw_stage_state *stage_state = stage_states[stage]; - UNUSED struct gl_program *prog = ctx->_Shader->CurrentProgram[stage]; - - if (!stage_state->push_constants_dirty) - continue; - - brw_batch_emit(brw, GENX(3DSTATE_CONSTANT_VS), pkt) { - pkt._3DCommandSubOpcode = push_constant_opcodes[stage]; -#if GFX_VER >= 9 - pkt.MOCS = mocs; -#elif GFX_VER < 8 - /* MOCS is MBZ on Gfx8 so we skip it there */ - pkt.ConstantBody.MOCS = mocs; -#endif - if (stage_state->prog_data) { -#if GFX_VERx10 >= 75 - /* The Skylake PRM contains the following restriction: - * - * "The driver must ensure The following case does not occur - * without a flush to the 3D engine: 3DSTATE_CONSTANT_* with - * buffer 3 read length equal to zero committed followed by a - * 3DSTATE_CONSTANT_* with buffer 0 read length not equal to - * zero committed." - * - * To avoid this, we program the buffers in the highest slots. - * This way, slot 0 is only used if slot 3 is also used. - */ - int n = 3; - - for (int i = 3; i >= 0; i--) { - const struct brw_ubo_range *range = - &stage_state->prog_data->ubo_ranges[i]; - - if (range->length == 0) - continue; - - const struct gl_uniform_block *block = - prog->sh.UniformBlocks[range->block]; - const struct gl_buffer_binding *binding = - &ctx->UniformBufferBindings[block->Binding]; - - if (!binding->BufferObject) { - static unsigned msg_id = 0; - _mesa_gl_debugf(ctx, &msg_id, MESA_DEBUG_SOURCE_API, - MESA_DEBUG_TYPE_UNDEFINED, - MESA_DEBUG_SEVERITY_HIGH, - "UBO %d unbound, %s shader uniform data " - "will be undefined.", - range->block, - _mesa_shader_stage_to_string(stage)); - continue; - } - - assert(binding->Offset % 32 == 0); - - struct brw_bo *bo = brw_bufferobj_buffer(brw, - brw_buffer_object(binding->BufferObject), - binding->Offset, range->length * 32, false); - - pkt.ConstantBody.ReadLength[n] = range->length; - pkt.ConstantBody.Buffer[n] = - ro_bo(bo, range->start * 32 + binding->Offset); - n--; - } - - if (stage_state->push_const_size > 0) { - assert(n >= 0); - pkt.ConstantBody.ReadLength[n] = stage_state->push_const_size; - pkt.ConstantBody.Buffer[n] = - ro_bo(stage_state->push_const_bo, - stage_state->push_const_offset); - } -#else - pkt.ConstantBody.ReadLength[0] = stage_state->push_const_size; - pkt.ConstantBody.Buffer[0].offset = stage_state->push_const_offset; -#endif - } - } - - stage_state->push_constants_dirty = false; - brw->ctx.NewDriverState |= GFX_VER >= 9 ? BRW_NEW_SURFACES : 0; - } -} - -const struct brw_tracked_state genX(push_constant_packets) = { - .dirty = { - .mesa = 0, - .brw = BRW_NEW_DRAW_CALL, - }, - .emit = genX(upload_push_constant_packets), -}; -#endif - -#if GFX_VER >= 6 -static void -genX(upload_vs_push_constants)(struct brw_context *brw) -{ - struct brw_stage_state *stage_state = &brw->vs.base; - - /* BRW_NEW_VERTEX_PROGRAM */ - const struct gl_program *vp = brw->programs[MESA_SHADER_VERTEX]; - /* BRW_NEW_VS_PROG_DATA */ - const struct brw_stage_prog_data *prog_data = brw->vs.base.prog_data; - - gfx6_upload_push_constants(brw, vp, prog_data, stage_state); -} - -static const struct brw_tracked_state genX(vs_push_constants) = { - .dirty = { - .mesa = _NEW_PROGRAM_CONSTANTS | - _NEW_TRANSFORM, - .brw = BRW_NEW_BATCH | - BRW_NEW_BLORP | - BRW_NEW_VERTEX_PROGRAM | - BRW_NEW_VS_PROG_DATA, - }, - .emit = genX(upload_vs_push_constants), -}; - -static void -genX(upload_gs_push_constants)(struct brw_context *brw) -{ - struct brw_stage_state *stage_state = &brw->gs.base; - - /* BRW_NEW_GEOMETRY_PROGRAM */ - const struct gl_program *gp = brw->programs[MESA_SHADER_GEOMETRY]; - - /* BRW_NEW_GS_PROG_DATA */ - struct brw_stage_prog_data *prog_data = brw->gs.base.prog_data; - - gfx6_upload_push_constants(brw, gp, prog_data, stage_state); -} - -static const struct brw_tracked_state genX(gs_push_constants) = { - .dirty = { - .mesa = _NEW_PROGRAM_CONSTANTS | - _NEW_TRANSFORM, - .brw = BRW_NEW_BATCH | - BRW_NEW_BLORP | - BRW_NEW_GEOMETRY_PROGRAM | - BRW_NEW_GS_PROG_DATA, - }, - .emit = genX(upload_gs_push_constants), -}; - -static void -genX(upload_wm_push_constants)(struct brw_context *brw) -{ - struct brw_stage_state *stage_state = &brw->wm.base; - /* BRW_NEW_FRAGMENT_PROGRAM */ - const struct gl_program *fp = brw->programs[MESA_SHADER_FRAGMENT]; - /* BRW_NEW_FS_PROG_DATA */ - const struct brw_stage_prog_data *prog_data = brw->wm.base.prog_data; - - gfx6_upload_push_constants(brw, fp, prog_data, stage_state); -} - -static const struct brw_tracked_state genX(wm_push_constants) = { - .dirty = { - .mesa = _NEW_PROGRAM_CONSTANTS, - .brw = BRW_NEW_BATCH | - BRW_NEW_BLORP | - BRW_NEW_FRAGMENT_PROGRAM | - BRW_NEW_FS_PROG_DATA, - }, - .emit = genX(upload_wm_push_constants), -}; -#endif - -/* ---------------------------------------------------------------------- */ - -#if GFX_VER >= 6 -static unsigned -genX(determine_sample_mask)(struct brw_context *brw) -{ - struct gl_context *ctx = &brw->ctx; - float coverage = 1.0f; - float coverage_invert = false; - unsigned sample_mask = ~0u; - - /* BRW_NEW_NUM_SAMPLES */ - unsigned num_samples = brw->num_samples; - - if (_mesa_is_multisample_enabled(ctx)) { - if (ctx->Multisample.SampleCoverage) { - coverage = ctx->Multisample.SampleCoverageValue; - coverage_invert = ctx->Multisample.SampleCoverageInvert; - } - if (ctx->Multisample.SampleMask) { - sample_mask = ctx->Multisample.SampleMaskValue; - } - } - - if (num_samples > 1) { - int coverage_int = (int) (num_samples * coverage + 0.5f); - uint32_t coverage_bits = (1 << coverage_int) - 1; - if (coverage_invert) - coverage_bits ^= (1 << num_samples) - 1; - return coverage_bits & sample_mask; - } else { - return 1; - } -} - -static void -genX(emit_3dstate_multisample2)(struct brw_context *brw, - unsigned num_samples) -{ - unsigned log2_samples = ffs(num_samples) - 1; - - brw_batch_emit(brw, GENX(3DSTATE_MULTISAMPLE), multi) { - multi.PixelLocation = CENTER; - multi.NumberofMultisamples = log2_samples; -#if GFX_VER == 6 - INTEL_SAMPLE_POS_4X(multi.Sample); -#elif GFX_VER == 7 - switch (num_samples) { - case 1: - INTEL_SAMPLE_POS_1X(multi.Sample); - break; - case 2: - INTEL_SAMPLE_POS_2X(multi.Sample); - break; - case 4: - INTEL_SAMPLE_POS_4X(multi.Sample); - break; - case 8: - INTEL_SAMPLE_POS_8X(multi.Sample); - break; - default: - break; - } -#endif - } -} - -static void -genX(upload_multisample_state)(struct brw_context *brw) -{ - assert(brw->num_samples > 0 && brw->num_samples <= 16); - - genX(emit_3dstate_multisample2)(brw, brw->num_samples); - - brw_batch_emit(brw, GENX(3DSTATE_SAMPLE_MASK), sm) { - sm.SampleMask = genX(determine_sample_mask)(brw); - } -} - -static const struct brw_tracked_state genX(multisample_state) = { - .dirty = { - .mesa = _NEW_MULTISAMPLE | - (GFX_VER == 10 ? _NEW_BUFFERS : 0), - .brw = BRW_NEW_BLORP | - BRW_NEW_CONTEXT | - BRW_NEW_NUM_SAMPLES, - }, - .emit = genX(upload_multisample_state) -}; -#endif - -/* ---------------------------------------------------------------------- */ - -static void -genX(upload_color_calc_state)(struct brw_context *brw) -{ - struct gl_context *ctx = &brw->ctx; - - brw_state_emit(brw, GENX(COLOR_CALC_STATE), 64, &brw->cc.state_offset, cc) { -#if GFX_VER <= 5 - cc.IndependentAlphaBlendEnable = - set_blend_entry_bits(brw, &cc, 0, false); - set_depth_stencil_bits(brw, &cc); - - if (ctx->Color.AlphaEnabled && - ctx->DrawBuffer->_NumColorDrawBuffers <= 1) { - cc.AlphaTestEnable = true; - cc.AlphaTestFunction = - brw_translate_compare_func(ctx->Color.AlphaFunc); - } - - cc.ColorDitherEnable = ctx->Color.DitherFlag; - - cc.StatisticsEnable = brw->stats_wm; - - cc.CCViewportStatePointer = - ro_bo(brw->batch.state.bo, brw->cc.vp_offset); -#else - /* _NEW_COLOR */ - cc.BlendConstantColorRed = ctx->Color.BlendColorUnclamped[0]; - cc.BlendConstantColorGreen = ctx->Color.BlendColorUnclamped[1]; - cc.BlendConstantColorBlue = ctx->Color.BlendColorUnclamped[2]; - cc.BlendConstantColorAlpha = ctx->Color.BlendColorUnclamped[3]; - -#if GFX_VER < 9 - /* _NEW_STENCIL */ - cc.StencilReferenceValue = _mesa_get_stencil_ref(ctx, 0); - cc.BackfaceStencilReferenceValue = - _mesa_get_stencil_ref(ctx, ctx->Stencil._BackFace); -#endif - -#endif - - /* _NEW_COLOR */ - UNCLAMPED_FLOAT_TO_UBYTE(cc.AlphaReferenceValueAsUNORM8, - ctx->Color.AlphaRef); - } - -#if GFX_VER >= 6 - brw_batch_emit(brw, GENX(3DSTATE_CC_STATE_POINTERS), ptr) { - ptr.ColorCalcStatePointer = brw->cc.state_offset; -#if GFX_VER != 7 - ptr.ColorCalcStatePointerValid = true; -#endif - } -#else - brw->ctx.NewDriverState |= BRW_NEW_GFX4_UNIT_STATE; -#endif -} - -UNUSED static const struct brw_tracked_state genX(color_calc_state) = { - .dirty = { - .mesa = _NEW_COLOR | - _NEW_STENCIL | - (GFX_VER <= 5 ? _NEW_BUFFERS | - _NEW_DEPTH - : 0), - .brw = BRW_NEW_BATCH | - BRW_NEW_BLORP | - (GFX_VER <= 5 ? BRW_NEW_CC_VP | - BRW_NEW_STATS_WM - : BRW_NEW_CC_STATE | - BRW_NEW_STATE_BASE_ADDRESS), - }, - .emit = genX(upload_color_calc_state), -}; - - -/* ---------------------------------------------------------------------- */ - -#if GFX_VERx10 == 75 -static void -genX(upload_color_calc_and_blend_state)(struct brw_context *brw) -{ - genX(upload_blend_state)(brw); - genX(upload_color_calc_state)(brw); -} - -/* On Haswell when BLEND_STATE is emitted CC_STATE should also be re-emitted, - * this workarounds the flickering shadows in several games. - */ -static const struct brw_tracked_state genX(cc_and_blend_state) = { - .dirty = { - .mesa = _NEW_BUFFERS | - _NEW_COLOR | - _NEW_STENCIL | - _NEW_MULTISAMPLE, - .brw = BRW_NEW_BATCH | - BRW_NEW_BLORP | - BRW_NEW_CC_STATE | - BRW_NEW_FS_PROG_DATA | - BRW_NEW_STATE_BASE_ADDRESS, - }, - .emit = genX(upload_color_calc_and_blend_state), -}; -#endif - -/* ---------------------------------------------------------------------- */ - -#if GFX_VER >= 7 -static void -genX(upload_sbe)(struct brw_context *brw) -{ - struct gl_context *ctx = &brw->ctx; - /* BRW_NEW_FRAGMENT_PROGRAM */ - UNUSED const struct gl_program *fp = brw->programs[MESA_SHADER_FRAGMENT]; - /* BRW_NEW_FS_PROG_DATA */ - const struct brw_wm_prog_data *wm_prog_data = - brw_wm_prog_data(brw->wm.base.prog_data); -#if GFX_VER >= 8 - struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) attr_overrides[16] = { { 0 } }; -#else -#define attr_overrides sbe.Attribute -#endif - uint32_t urb_entry_read_length; - uint32_t urb_entry_read_offset; - uint32_t point_sprite_enables; - - brw_batch_emit(brw, GENX(3DSTATE_SBE), sbe) { - sbe.AttributeSwizzleEnable = true; - sbe.NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs; - - /* _NEW_BUFFERS */ - bool flip_y = ctx->DrawBuffer->FlipY; - - /* _NEW_POINT - * - * Window coordinates in an FBO are inverted, which means point - * sprite origin must be inverted. - */ - if ((ctx->Point.SpriteOrigin == GL_LOWER_LEFT) == flip_y) - sbe.PointSpriteTextureCoordinateOrigin = LOWERLEFT; - else - sbe.PointSpriteTextureCoordinateOrigin = UPPERLEFT; - - /* _NEW_POINT | _NEW_LIGHT | _NEW_PROGRAM, - * BRW_NEW_FS_PROG_DATA | BRW_NEW_FRAGMENT_PROGRAM | - * BRW_NEW_GS_PROG_DATA | BRW_NEW_PRIMITIVE | BRW_NEW_TES_PROG_DATA | - * BRW_NEW_VUE_MAP_GEOM_OUT - */ - genX(calculate_attr_overrides)(brw, - attr_overrides, - &point_sprite_enables, - &urb_entry_read_length, - &urb_entry_read_offset); - - /* Typically, the URB entry read length and offset should be programmed - * in 3DSTATE_VS and 3DSTATE_GS; SBE inherits it from the last active - * stage which produces geometry. However, we don't know the proper - * value until we call calculate_attr_overrides(). - * - * To fit with our existing code, we override the inherited values and - * specify it here directly, as we did on previous generations. - */ - sbe.VertexURBEntryReadLength = urb_entry_read_length; - sbe.VertexURBEntryReadOffset = urb_entry_read_offset; - sbe.PointSpriteTextureCoordinateEnable = point_sprite_enables; - sbe.ConstantInterpolationEnable = wm_prog_data->flat_inputs; - -#if GFX_VER >= 8 - sbe.ForceVertexURBEntryReadLength = true; - sbe.ForceVertexURBEntryReadOffset = true; -#endif - -#if GFX_VER >= 9 - /* prepare the active component dwords */ - for (int i = 0; i < 32; i++) - sbe.AttributeActiveComponentFormat[i] = ACTIVE_COMPONENT_XYZW; -#endif - } - -#if GFX_VER >= 8 - brw_batch_emit(brw, GENX(3DSTATE_SBE_SWIZ), sbes) { - for (int i = 0; i < 16; i++) - sbes.Attribute[i] = attr_overrides[i]; - } -#endif - -#undef attr_overrides -} - -static const struct brw_tracked_state genX(sbe_state) = { - .dirty = { - .mesa = _NEW_BUFFERS | - _NEW_LIGHT | - _NEW_POINT | - _NEW_POLYGON | - _NEW_PROGRAM, - .brw = BRW_NEW_BLORP | - BRW_NEW_CONTEXT | - BRW_NEW_FRAGMENT_PROGRAM | - BRW_NEW_FS_PROG_DATA | - BRW_NEW_GS_PROG_DATA | - BRW_NEW_TES_PROG_DATA | - BRW_NEW_VUE_MAP_GEOM_OUT | - (GFX_VER == 7 ? BRW_NEW_PRIMITIVE - : 0), - }, - .emit = genX(upload_sbe), -}; -#endif - -/* ---------------------------------------------------------------------- */ - -#if GFX_VER >= 7 -/** - * Outputs the 3DSTATE_SO_DECL_LIST command. - * - * The data output is a series of 64-bit entries containing a SO_DECL per - * stream. We only have one stream of rendering coming out of the GS unit, so - * we only emit stream 0 (low 16 bits) SO_DECLs. - */ -static void -genX(upload_3dstate_so_decl_list)(struct brw_context *brw, - const struct brw_vue_map *vue_map) -{ - struct gl_context *ctx = &brw->ctx; - /* BRW_NEW_TRANSFORM_FEEDBACK */ - struct gl_transform_feedback_object *xfb_obj = - ctx->TransformFeedback.CurrentObject; - const struct gl_transform_feedback_info *linked_xfb_info = - xfb_obj->program->sh.LinkedTransformFeedback; - struct GENX(SO_DECL) so_decl[MAX_VERTEX_STREAMS][128]; - int buffer_mask[MAX_VERTEX_STREAMS] = {0, 0, 0, 0}; - int next_offset[MAX_VERTEX_STREAMS] = {0, 0, 0, 0}; - int decls[MAX_VERTEX_STREAMS] = {0, 0, 0, 0}; - int max_decls = 0; - STATIC_ASSERT(ARRAY_SIZE(so_decl[0]) >= MAX_PROGRAM_OUTPUTS); - - memset(so_decl, 0, sizeof(so_decl)); - - /* Construct the list of SO_DECLs to be emitted. The formatting of the - * command feels strange -- each dword pair contains a SO_DECL per stream. - */ - for (unsigned i = 0; i < linked_xfb_info->NumOutputs; i++) { - const struct gl_transform_feedback_output *output = - &linked_xfb_info->Outputs[i]; - const int buffer = output->OutputBuffer; - const int varying = output->OutputRegister; - const unsigned stream_id = output->StreamId; - assert(stream_id < MAX_VERTEX_STREAMS); - - buffer_mask[stream_id] |= 1 << buffer; - - assert(vue_map->varying_to_slot[varying] >= 0); - - /* Mesa doesn't store entries for gl_SkipComponents in the Outputs[] - * array. Instead, it simply increments DstOffset for the following - * input by the number of components that should be skipped. - * - * Our hardware is unusual in that it requires us to program SO_DECLs - * for fake "hole" components, rather than simply taking the offset - * for each real varying. Each hole can have size 1, 2, 3, or 4; we - * program as many size = 4 holes as we can, then a final hole to - * accommodate the final 1, 2, or 3 remaining. - */ - int skip_components = output->DstOffset - next_offset[buffer]; - - while (skip_components > 0) { - so_decl[stream_id][decls[stream_id]++] = (struct GENX(SO_DECL)) { - .HoleFlag = 1, - .OutputBufferSlot = output->OutputBuffer, - .ComponentMask = (1 << MIN2(skip_components, 4)) - 1, - }; - skip_components -= 4; - } - - next_offset[buffer] = output->DstOffset + output->NumComponents; - - so_decl[stream_id][decls[stream_id]++] = (struct GENX(SO_DECL)) { - .OutputBufferSlot = output->OutputBuffer, - .RegisterIndex = vue_map->varying_to_slot[varying], - .ComponentMask = - ((1 << output->NumComponents) - 1) << output->ComponentOffset, - }; - - if (decls[stream_id] > max_decls) - max_decls = decls[stream_id]; - } - - uint32_t *dw; - dw = brw_batch_emitn(brw, GENX(3DSTATE_SO_DECL_LIST), 3 + 2 * max_decls, - .StreamtoBufferSelects0 = buffer_mask[0], - .StreamtoBufferSelects1 = buffer_mask[1], - .StreamtoBufferSelects2 = buffer_mask[2], - .StreamtoBufferSelects3 = buffer_mask[3], - .NumEntries0 = decls[0], - .NumEntries1 = decls[1], - .NumEntries2 = decls[2], - .NumEntries3 = decls[3]); - - for (int i = 0; i < max_decls; i++) { - GENX(SO_DECL_ENTRY_pack)( - brw, dw + 2 + i * 2, - &(struct GENX(SO_DECL_ENTRY)) { - .Stream0Decl = so_decl[0][i], - .Stream1Decl = so_decl[1][i], - .Stream2Decl = so_decl[2][i], - .Stream3Decl = so_decl[3][i], - }); - } -} - -static void -genX(upload_3dstate_so_buffers)(struct brw_context *brw) -{ - struct gl_context *ctx = &brw->ctx; - /* BRW_NEW_TRANSFORM_FEEDBACK */ - struct gl_transform_feedback_object *xfb_obj = - ctx->TransformFeedback.CurrentObject; -#if GFX_VER < 8 - const struct gl_transform_feedback_info *linked_xfb_info = - xfb_obj->program->sh.LinkedTransformFeedback; -#else - struct brw_transform_feedback_object *brw_obj = - (struct brw_transform_feedback_object *) xfb_obj; -#endif - - /* Set up the up to 4 output buffers. These are the ranges defined in the - * gl_transform_feedback_object. - */ - for (int i = 0; i < 4; i++) { - struct brw_buffer_object *bufferobj = - brw_buffer_object(xfb_obj->Buffers[i]); - uint32_t start = xfb_obj->Offset[i]; - uint32_t end = ALIGN(start + xfb_obj->Size[i], 4); - uint32_t const size = end - start; - - if (!bufferobj || !size) { - brw_batch_emit(brw, GENX(3DSTATE_SO_BUFFER), sob) { - sob.SOBufferIndex = i; - sob.MOCS = brw_mocs(&brw->isl_dev, NULL); - } - continue; - } - - assert(start % 4 == 0); - struct brw_bo *bo = - brw_bufferobj_buffer(brw, bufferobj, start, size, true); - assert(end <= bo->size); - - brw_batch_emit(brw, GENX(3DSTATE_SO_BUFFER), sob) { - sob.SOBufferIndex = i; - - sob.SurfaceBaseAddress = rw_bo(bo, start); - sob.MOCS = brw_mocs(&brw->isl_dev, bo); -#if GFX_VER < 8 - sob.SurfacePitch = linked_xfb_info->Buffers[i].Stride * 4; - sob.SurfaceEndAddress = rw_bo(bo, end); -#else - sob.SOBufferEnable = true; - sob.StreamOffsetWriteEnable = true; - sob.StreamOutputBufferOffsetAddressEnable = true; - - sob.SurfaceSize = MAX2(xfb_obj->Size[i] / 4, 1) - 1; - sob.StreamOutputBufferOffsetAddress = - rw_bo(brw_obj->offset_bo, i * sizeof(uint32_t)); - - if (brw_obj->zero_offsets) { - /* Zero out the offset and write that to offset_bo */ - sob.StreamOffset = 0; - } else { - /* Use offset_bo as the "Stream Offset." */ - sob.StreamOffset = 0xFFFFFFFF; - } -#endif - } - } - -#if GFX_VER >= 8 - brw_obj->zero_offsets = false; -#endif -} - -static bool -query_active(struct gl_query_object *q) -{ - return q && q->Active; -} - -static void -genX(upload_3dstate_streamout)(struct brw_context *brw, bool active, - const struct brw_vue_map *vue_map) -{ - struct gl_context *ctx = &brw->ctx; - /* BRW_NEW_TRANSFORM_FEEDBACK */ - struct gl_transform_feedback_object *xfb_obj = - ctx->TransformFeedback.CurrentObject; - - brw_batch_emit(brw, GENX(3DSTATE_STREAMOUT), sos) { - if (active) { - int urb_entry_read_offset = 0; - int urb_entry_read_length = (vue_map->num_slots + 1) / 2 - - urb_entry_read_offset; - - sos.SOFunctionEnable = true; - sos.SOStatisticsEnable = true; - - /* BRW_NEW_RASTERIZER_DISCARD */ - if (ctx->RasterDiscard) { - if (!query_active(ctx->Query.PrimitivesGenerated[0])) { - sos.RenderingDisable = true; - } else { - perf_debug("Rasterizer discard with a GL_PRIMITIVES_GENERATED " - "query active relies on the clipper.\n"); - } - } - - /* _NEW_LIGHT */ - if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) - sos.ReorderMode = TRAILING; - -#if GFX_VER < 8 - sos.SOBufferEnable0 = xfb_obj->Buffers[0] != NULL; - sos.SOBufferEnable1 = xfb_obj->Buffers[1] != NULL; - sos.SOBufferEnable2 = xfb_obj->Buffers[2] != NULL; - sos.SOBufferEnable3 = xfb_obj->Buffers[3] != NULL; -#else - const struct gl_transform_feedback_info *linked_xfb_info = - xfb_obj->program->sh.LinkedTransformFeedback; - /* Set buffer pitches; 0 means unbound. */ - if (xfb_obj->Buffers[0]) - sos.Buffer0SurfacePitch = linked_xfb_info->Buffers[0].Stride * 4; - if (xfb_obj->Buffers[1]) - sos.Buffer1SurfacePitch = linked_xfb_info->Buffers[1].Stride * 4; - if (xfb_obj->Buffers[2]) - sos.Buffer2SurfacePitch = linked_xfb_info->Buffers[2].Stride * 4; - if (xfb_obj->Buffers[3]) - sos.Buffer3SurfacePitch = linked_xfb_info->Buffers[3].Stride * 4; -#endif - - /* We always read the whole vertex. This could be reduced at some - * point by reading less and offsetting the register index in the - * SO_DECLs. - */ - sos.Stream0VertexReadOffset = urb_entry_read_offset; - sos.Stream0VertexReadLength = urb_entry_read_length - 1; - sos.Stream1VertexReadOffset = urb_entry_read_offset; - sos.Stream1VertexReadLength = urb_entry_read_length - 1; - sos.Stream2VertexReadOffset = urb_entry_read_offset; - sos.Stream2VertexReadLength = urb_entry_read_length - 1; - sos.Stream3VertexReadOffset = urb_entry_read_offset; - sos.Stream3VertexReadLength = urb_entry_read_length - 1; - } - } -} - -static void -genX(upload_sol)(struct brw_context *brw) -{ - struct gl_context *ctx = &brw->ctx; - /* BRW_NEW_TRANSFORM_FEEDBACK */ - bool active = _mesa_is_xfb_active_and_unpaused(ctx); - - if (active) { - genX(upload_3dstate_so_buffers)(brw); - - /* BRW_NEW_VUE_MAP_GEOM_OUT */ - genX(upload_3dstate_so_decl_list)(brw, &brw->vue_map_geom_out); - } - - /* Finally, set up the SOL stage. This command must always follow updates to - * the nonpipelined SOL state (3DSTATE_SO_BUFFER, 3DSTATE_SO_DECL_LIST) or - * MMIO register updates (current performed by the kernel at each batch - * emit). - */ - genX(upload_3dstate_streamout)(brw, active, &brw->vue_map_geom_out); -} - -static const struct brw_tracked_state genX(sol_state) = { - .dirty = { - .mesa = _NEW_LIGHT, - .brw = BRW_NEW_BATCH | - BRW_NEW_BLORP | - BRW_NEW_RASTERIZER_DISCARD | - BRW_NEW_VUE_MAP_GEOM_OUT | - BRW_NEW_TRANSFORM_FEEDBACK, - }, - .emit = genX(upload_sol), -}; -#endif - -/* ---------------------------------------------------------------------- */ - -#if GFX_VER >= 7 -static void -genX(upload_ps)(struct brw_context *brw) -{ - UNUSED const struct gl_context *ctx = &brw->ctx; - UNUSED const struct intel_device_info *devinfo = &brw->screen->devinfo; - - /* BRW_NEW_FS_PROG_DATA */ - const struct brw_wm_prog_data *prog_data = - brw_wm_prog_data(brw->wm.base.prog_data); - const struct brw_stage_state *stage_state = &brw->wm.base; - -#if GFX_VER < 8 -#endif - - brw_batch_emit(brw, GENX(3DSTATE_PS), ps) { - /* Initialize the execution mask with VMask. Otherwise, derivatives are - * incorrect for subspans where some of the pixels are unlit. We believe - * the bit just didn't take effect in previous generations. - */ - ps.VectorMaskEnable = GFX_VER >= 8; - - /* Wa_1606682166: - * "Incorrect TDL's SSP address shift in SARB for 16:6 & 18:8 modes. - * Disable the Sampler state prefetch functionality in the SARB by - * programming 0xB000[30] to '1'." - */ - ps.SamplerCount = GFX_VER == 11 ? - 0 : DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4); - - /* BRW_NEW_FS_PROG_DATA */ - ps.BindingTableEntryCount = prog_data->base.binding_table.size_bytes / 4; - - if (prog_data->base.use_alt_mode) - ps.FloatingPointMode = Alternate; - - /* Haswell requires the sample mask to be set in this packet as well as - * in 3DSTATE_SAMPLE_MASK; the values should match. - */ - - /* _NEW_BUFFERS, _NEW_MULTISAMPLE */ -#if GFX_VERx10 == 75 - ps.SampleMask = genX(determine_sample_mask(brw)); -#endif - - /* 3DSTATE_PS expects the number of threads per PSD, which is always 64 - * for pre Gfx11 and 128 for gfx11+; On gfx11+ If a programmed value is - * k, it implies 2(k+1) threads. It implicitly scales for different GT - * levels (which have some # of PSDs). - * - * In Gfx8 the format is U8-2 whereas in Gfx9+ it is U9-1. - */ -#if GFX_VER >= 9 - ps.MaximumNumberofThreadsPerPSD = 64 - 1; -#elif GFX_VER >= 8 - ps.MaximumNumberofThreadsPerPSD = 64 - 2; -#else - ps.MaximumNumberofThreads = devinfo->max_wm_threads - 1; -#endif - - if (prog_data->base.nr_params > 0 || - prog_data->base.ubo_ranges[0].length > 0) - ps.PushConstantEnable = true; - -#if GFX_VER < 8 - /* From the IVB PRM, volume 2 part 1, page 287: - * "This bit is inserted in the PS payload header and made available to - * the DataPort (either via the message header or via header bypass) to - * indicate that oMask data (one or two phases) is included in Render - * Target Write messages. If present, the oMask data is used to mask off - * samples." - */ - ps.oMaskPresenttoRenderTarget = prog_data->uses_omask; - - /* The hardware wedges if you have this bit set but don't turn on any - * dual source blend factors. - * - * BRW_NEW_FS_PROG_DATA | _NEW_COLOR - */ - ps.DualSourceBlendEnable = prog_data->dual_src_blend && - (ctx->Color.BlendEnabled & 1) && - ctx->Color._BlendUsesDualSrc & 0x1; - - /* BRW_NEW_FS_PROG_DATA */ - ps.AttributeEnable = (prog_data->num_varying_inputs != 0); -#endif - - /* From the documentation for this packet: - * "If the PS kernel does not need the Position XY Offsets to - * compute a Position Value, then this field should be programmed - * to POSOFFSET_NONE." - * - * "SW Recommendation: If the PS kernel needs the Position Offsets - * to compute a Position XY value, this field should match Position - * ZW Interpolation Mode to ensure a consistent position.xyzw - * computation." - * - * We only require XY sample offsets. So, this recommendation doesn't - * look useful at the moment. We might need this in future. - */ - if (prog_data->uses_pos_offset) - ps.PositionXYOffsetSelect = POSOFFSET_SAMPLE; - else - ps.PositionXYOffsetSelect = POSOFFSET_NONE; - - ps._8PixelDispatchEnable = prog_data->dispatch_8; - ps._16PixelDispatchEnable = prog_data->dispatch_16; - ps._32PixelDispatchEnable = prog_data->dispatch_32; - - /* From the Sky Lake PRM 3DSTATE_PS::32 Pixel Dispatch Enable: - * - * "When NUM_MULTISAMPLES = 16 or FORCE_SAMPLE_COUNT = 16, SIMD32 - * Dispatch must not be enabled for PER_PIXEL dispatch mode." - * - * Since 16x MSAA is first introduced on SKL, we don't need to apply - * the workaround on any older hardware. - * - * BRW_NEW_NUM_SAMPLES - */ - if (GFX_VER >= 9 && !prog_data->persample_dispatch && - brw->num_samples == 16) { - assert(ps._8PixelDispatchEnable || ps._16PixelDispatchEnable); - ps._32PixelDispatchEnable = false; - } - - ps.DispatchGRFStartRegisterForConstantSetupData0 = - brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 0); - ps.DispatchGRFStartRegisterForConstantSetupData1 = - brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 1); - ps.DispatchGRFStartRegisterForConstantSetupData2 = - brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 2); - - ps.KernelStartPointer0 = stage_state->prog_offset + - brw_wm_prog_data_prog_offset(prog_data, ps, 0); - ps.KernelStartPointer1 = stage_state->prog_offset + - brw_wm_prog_data_prog_offset(prog_data, ps, 1); - ps.KernelStartPointer2 = stage_state->prog_offset + - brw_wm_prog_data_prog_offset(prog_data, ps, 2); - - if (prog_data->base.total_scratch) { - ps.ScratchSpaceBasePointer = - rw_32_bo(stage_state->scratch_bo, - ffs(stage_state->per_thread_scratch) - 11); - } - } -} - -static const struct brw_tracked_state genX(ps_state) = { - .dirty = { - .mesa = _NEW_MULTISAMPLE | - (GFX_VER < 8 ? _NEW_BUFFERS | - _NEW_COLOR - : 0), - .brw = BRW_NEW_BATCH | - BRW_NEW_BLORP | - BRW_NEW_FS_PROG_DATA | - (GFX_VER >= 9 ? BRW_NEW_NUM_SAMPLES : 0), - }, - .emit = genX(upload_ps), -}; -#endif - -/* ---------------------------------------------------------------------- */ - -#if GFX_VER >= 7 -static void -genX(upload_hs_state)(struct brw_context *brw) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - struct brw_stage_state *stage_state = &brw->tcs.base; - struct brw_stage_prog_data *stage_prog_data = stage_state->prog_data; - const struct brw_vue_prog_data *vue_prog_data = - brw_vue_prog_data(stage_prog_data); - - /* BRW_NEW_TES_PROG_DATA */ - struct brw_tcs_prog_data *tcs_prog_data = - brw_tcs_prog_data(stage_prog_data); - - if (!tcs_prog_data) { - brw_batch_emit(brw, GENX(3DSTATE_HS), hs); - } else { - brw_batch_emit(brw, GENX(3DSTATE_HS), hs) { - INIT_THREAD_DISPATCH_FIELDS(hs, Vertex); - - hs.InstanceCount = tcs_prog_data->instances - 1; - hs.IncludeVertexHandles = true; - - hs.MaximumNumberofThreads = devinfo->max_tcs_threads - 1; - -#if GFX_VER >= 9 - hs.DispatchMode = vue_prog_data->dispatch_mode; - hs.IncludePrimitiveID = tcs_prog_data->include_primitive_id; -#endif - } - } -} - -static const struct brw_tracked_state genX(hs_state) = { - .dirty = { - .mesa = 0, - .brw = BRW_NEW_BATCH | - BRW_NEW_BLORP | - BRW_NEW_TCS_PROG_DATA | - BRW_NEW_TESS_PROGRAMS, - }, - .emit = genX(upload_hs_state), -}; - -static void -genX(upload_ds_state)(struct brw_context *brw) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - const struct brw_stage_state *stage_state = &brw->tes.base; - struct brw_stage_prog_data *stage_prog_data = stage_state->prog_data; - - /* BRW_NEW_TES_PROG_DATA */ - const struct brw_tes_prog_data *tes_prog_data = - brw_tes_prog_data(stage_prog_data); - const struct brw_vue_prog_data *vue_prog_data = - brw_vue_prog_data(stage_prog_data); - - if (!tes_prog_data) { - brw_batch_emit(brw, GENX(3DSTATE_DS), ds); - } else { - assert(GFX_VER < 11 || - vue_prog_data->dispatch_mode == DISPATCH_MODE_SIMD8); - - brw_batch_emit(brw, GENX(3DSTATE_DS), ds) { - INIT_THREAD_DISPATCH_FIELDS(ds, Patch); - - ds.MaximumNumberofThreads = devinfo->max_tes_threads - 1; - ds.ComputeWCoordinateEnable = - tes_prog_data->domain == BRW_TESS_DOMAIN_TRI; - -#if GFX_VER >= 8 - if (vue_prog_data->dispatch_mode == DISPATCH_MODE_SIMD8) - ds.DispatchMode = DISPATCH_MODE_SIMD8_SINGLE_PATCH; - ds.UserClipDistanceCullTestEnableBitmask = - vue_prog_data->cull_distance_mask; -#endif - } - } -} - -static const struct brw_tracked_state genX(ds_state) = { - .dirty = { - .mesa = 0, - .brw = BRW_NEW_BATCH | - BRW_NEW_BLORP | - BRW_NEW_TESS_PROGRAMS | - BRW_NEW_TES_PROG_DATA, - }, - .emit = genX(upload_ds_state), -}; - -/* ---------------------------------------------------------------------- */ - -static void -upload_te_state(struct brw_context *brw) -{ - /* BRW_NEW_TESS_PROGRAMS */ - bool active = brw->programs[MESA_SHADER_TESS_EVAL]; - - /* BRW_NEW_TES_PROG_DATA */ - const struct brw_tes_prog_data *tes_prog_data = - brw_tes_prog_data(brw->tes.base.prog_data); - - if (active) { - brw_batch_emit(brw, GENX(3DSTATE_TE), te) { - te.Partitioning = tes_prog_data->partitioning; - te.OutputTopology = tes_prog_data->output_topology; - te.TEDomain = tes_prog_data->domain; - te.TEEnable = true; - te.MaximumTessellationFactorOdd = 63.0; - te.MaximumTessellationFactorNotOdd = 64.0; - } - } else { - brw_batch_emit(brw, GENX(3DSTATE_TE), te); - } -} - -static const struct brw_tracked_state genX(te_state) = { - .dirty = { - .mesa = 0, - .brw = BRW_NEW_BLORP | - BRW_NEW_CONTEXT | - BRW_NEW_TES_PROG_DATA | - BRW_NEW_TESS_PROGRAMS, - }, - .emit = upload_te_state, -}; - -/* ---------------------------------------------------------------------- */ - -static void -genX(upload_tes_push_constants)(struct brw_context *brw) -{ - struct brw_stage_state *stage_state = &brw->tes.base; - /* BRW_NEW_TESS_PROGRAMS */ - const struct gl_program *tep = brw->programs[MESA_SHADER_TESS_EVAL]; - - /* BRW_NEW_TES_PROG_DATA */ - const struct brw_stage_prog_data *prog_data = brw->tes.base.prog_data; - gfx6_upload_push_constants(brw, tep, prog_data, stage_state); -} - -static const struct brw_tracked_state genX(tes_push_constants) = { - .dirty = { - .mesa = _NEW_PROGRAM_CONSTANTS, - .brw = BRW_NEW_BATCH | - BRW_NEW_BLORP | - BRW_NEW_TESS_PROGRAMS | - BRW_NEW_TES_PROG_DATA, - }, - .emit = genX(upload_tes_push_constants), -}; - -static void -genX(upload_tcs_push_constants)(struct brw_context *brw) -{ - struct brw_stage_state *stage_state = &brw->tcs.base; - /* BRW_NEW_TESS_PROGRAMS */ - const struct gl_program *tcp = brw->programs[MESA_SHADER_TESS_CTRL]; - - /* BRW_NEW_TCS_PROG_DATA */ - const struct brw_stage_prog_data *prog_data = brw->tcs.base.prog_data; - - gfx6_upload_push_constants(brw, tcp, prog_data, stage_state); -} - -static const struct brw_tracked_state genX(tcs_push_constants) = { - .dirty = { - .mesa = _NEW_PROGRAM_CONSTANTS, - .brw = BRW_NEW_BATCH | - BRW_NEW_BLORP | - BRW_NEW_DEFAULT_TESS_LEVELS | - BRW_NEW_TESS_PROGRAMS | - BRW_NEW_TCS_PROG_DATA, - }, - .emit = genX(upload_tcs_push_constants), -}; - -#endif - -/* ---------------------------------------------------------------------- */ - -#if GFX_VER >= 7 -static void -genX(upload_cs_push_constants)(struct brw_context *brw) -{ - struct brw_stage_state *stage_state = &brw->cs.base; - - /* BRW_NEW_COMPUTE_PROGRAM */ - const struct gl_program *cp = brw->programs[MESA_SHADER_COMPUTE]; - - if (cp) { - /* BRW_NEW_CS_PROG_DATA */ - struct brw_cs_prog_data *cs_prog_data = - brw_cs_prog_data(brw->cs.base.prog_data); - - _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_COMPUTE); - brw_upload_cs_push_constants(brw, cp, cs_prog_data, stage_state); - } -} - -const struct brw_tracked_state genX(cs_push_constants) = { - .dirty = { - .mesa = _NEW_PROGRAM_CONSTANTS, - .brw = BRW_NEW_BATCH | - BRW_NEW_BLORP | - BRW_NEW_COMPUTE_PROGRAM | - BRW_NEW_CS_PROG_DATA, - }, - .emit = genX(upload_cs_push_constants), -}; - -/** - * Creates a new CS constant buffer reflecting the current CS program's - * constants, if needed by the CS program. - */ -static void -genX(upload_cs_pull_constants)(struct brw_context *brw) -{ - struct brw_stage_state *stage_state = &brw->cs.base; - - /* BRW_NEW_COMPUTE_PROGRAM */ - struct brw_program *cp = - (struct brw_program *) brw->programs[MESA_SHADER_COMPUTE]; - - /* BRW_NEW_CS_PROG_DATA */ - const struct brw_stage_prog_data *prog_data = brw->cs.base.prog_data; - - _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_COMPUTE); - /* _NEW_PROGRAM_CONSTANTS */ - brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &cp->program, - stage_state, prog_data); -} - -const struct brw_tracked_state genX(cs_pull_constants) = { - .dirty = { - .mesa = _NEW_PROGRAM_CONSTANTS, - .brw = BRW_NEW_BATCH | - BRW_NEW_BLORP | - BRW_NEW_COMPUTE_PROGRAM | - BRW_NEW_CS_PROG_DATA, - }, - .emit = genX(upload_cs_pull_constants), -}; - -static void -genX(upload_cs_state)(struct brw_context *brw) -{ - if (!brw->cs.base.prog_data) - return; - - uint32_t offset; - uint32_t *desc = (uint32_t*) brw_state_batch( - brw, GENX(INTERFACE_DESCRIPTOR_DATA_length) * sizeof(uint32_t), 64, - &offset); - - struct brw_stage_state *stage_state = &brw->cs.base; - struct brw_stage_prog_data *prog_data = stage_state->prog_data; - struct brw_cs_prog_data *cs_prog_data = brw_cs_prog_data(prog_data); - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - const struct brw_cs_dispatch_info dispatch = - brw_cs_get_dispatch_info(devinfo, cs_prog_data, brw->compute.group_size); - - if (INTEL_DEBUG(DEBUG_SHADER_TIME)) { - brw_emit_buffer_surface_state( - brw, &stage_state->surf_offset[ - prog_data->binding_table.shader_time_start], - brw->shader_time.bo, 0, ISL_FORMAT_RAW, - brw->shader_time.bo->size, 1, - RELOC_WRITE); - } - - uint32_t *bind = brw_state_batch(brw, prog_data->binding_table.size_bytes, - 32, &stage_state->bind_bo_offset); - - /* The MEDIA_VFE_STATE documentation for Gfx8+ says: - * - * "A stalling PIPE_CONTROL is required before MEDIA_VFE_STATE unless - * the only bits that are changed are scoreboard related: Scoreboard - * Enable, Scoreboard Type, Scoreboard Mask, Scoreboard * Delta. For - * these scoreboard related states, a MEDIA_STATE_FLUSH is sufficient." - * - * Earlier generations say "MI_FLUSH" instead of "stalling PIPE_CONTROL", - * but MI_FLUSH isn't really a thing, so we assume they meant PIPE_CONTROL. - */ - brw_emit_pipe_control_flush(brw, PIPE_CONTROL_CS_STALL); - - brw_batch_emit(brw, GENX(MEDIA_VFE_STATE), vfe) { - if (prog_data->total_scratch) { - uint32_t per_thread_scratch_value; - - if (GFX_VER >= 8) { - /* Broadwell's Per Thread Scratch Space is in the range [0, 11] - * where 0 = 1k, 1 = 2k, 2 = 4k, ..., 11 = 2M. - */ - per_thread_scratch_value = ffs(stage_state->per_thread_scratch) - 11; - } else if (GFX_VERx10 == 75) { - /* Haswell's Per Thread Scratch Space is in the range [0, 10] - * where 0 = 2k, 1 = 4k, 2 = 8k, ..., 10 = 2M. - */ - per_thread_scratch_value = ffs(stage_state->per_thread_scratch) - 12; - } else { - /* Earlier platforms use the range [0, 11] to mean [1kB, 12kB] - * where 0 = 1kB, 1 = 2kB, 2 = 3kB, ..., 11 = 12kB. - */ - per_thread_scratch_value = stage_state->per_thread_scratch / 1024 - 1; - } - vfe.ScratchSpaceBasePointer = rw_32_bo(stage_state->scratch_bo, 0); - vfe.PerThreadScratchSpace = per_thread_scratch_value; - } - - vfe.MaximumNumberofThreads = - devinfo->max_cs_threads * devinfo->subslice_total - 1; - vfe.NumberofURBEntries = GFX_VER >= 8 ? 2 : 0; -#if GFX_VER < 11 - vfe.ResetGatewayTimer = - Resettingrelativetimerandlatchingtheglobaltimestamp; -#endif -#if GFX_VER < 9 - vfe.BypassGatewayControl = BypassingOpenGatewayCloseGatewayprotocol; -#endif -#if GFX_VER == 7 - vfe.GPGPUMode = true; -#endif - - /* We are uploading duplicated copies of push constant uniforms for each - * thread. Although the local id data needs to vary per thread, it won't - * change for other uniform data. Unfortunately this duplication is - * required for gfx7. As of Haswell, this duplication can be avoided, - * but this older mechanism with duplicated data continues to work. - * - * FINISHME: As of Haswell, we could make use of the - * INTERFACE_DESCRIPTOR_DATA "Cross-Thread Constant Data Read Length" - * field to only store one copy of uniform data. - * - * FINISHME: Broadwell adds a new alternative "Indirect Payload Storage" - * which is described in the GPGPU_WALKER command and in the Broadwell - * PRM Volume 7: 3D Media GPGPU, under Media GPGPU Pipeline => Mode of - * Operations => GPGPU Mode => Indirect Payload Storage. - * - * Note: The constant data is built in brw_upload_cs_push_constants - * below. - */ - vfe.URBEntryAllocationSize = GFX_VER >= 8 ? 2 : 0; - - const uint32_t vfe_curbe_allocation = - ALIGN(cs_prog_data->push.per_thread.regs * dispatch.threads + - cs_prog_data->push.cross_thread.regs, 2); - vfe.CURBEAllocationSize = vfe_curbe_allocation; - } - - const unsigned push_const_size = - brw_cs_push_const_total_size(cs_prog_data, dispatch.threads); - if (push_const_size > 0) { - brw_batch_emit(brw, GENX(MEDIA_CURBE_LOAD), curbe) { - curbe.CURBETotalDataLength = ALIGN(push_const_size, 64); - curbe.CURBEDataStartAddress = stage_state->push_const_offset; - } - } - - /* BRW_NEW_SURFACES and BRW_NEW_*_CONSTBUF */ - memcpy(bind, stage_state->surf_offset, - prog_data->binding_table.size_bytes); - const uint64_t ksp = brw->cs.base.prog_offset + - brw_cs_prog_data_prog_offset(cs_prog_data, - dispatch.simd_size); - const struct GENX(INTERFACE_DESCRIPTOR_DATA) idd = { - .KernelStartPointer = ksp, - .SamplerStatePointer = stage_state->sampler_offset, - /* Wa_1606682166 */ - .SamplerCount = GFX_VER == 11 ? 0 : - DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4), - .BindingTablePointer = stage_state->bind_bo_offset, - .ConstantURBEntryReadLength = cs_prog_data->push.per_thread.regs, - .NumberofThreadsinGPGPUThreadGroup = dispatch.threads, - .SharedLocalMemorySize = encode_slm_size(GFX_VER, - prog_data->total_shared), - .BarrierEnable = cs_prog_data->uses_barrier, -#if GFX_VERx10 >= 75 - .CrossThreadConstantDataReadLength = - cs_prog_data->push.cross_thread.regs, -#endif - }; - - GENX(INTERFACE_DESCRIPTOR_DATA_pack)(brw, desc, &idd); - - brw_batch_emit(brw, GENX(MEDIA_INTERFACE_DESCRIPTOR_LOAD), load) { - load.InterfaceDescriptorTotalLength = - GENX(INTERFACE_DESCRIPTOR_DATA_length) * sizeof(uint32_t); - load.InterfaceDescriptorDataStartAddress = offset; - } -} - -static const struct brw_tracked_state genX(cs_state) = { - .dirty = { - .mesa = _NEW_PROGRAM_CONSTANTS, - .brw = BRW_NEW_BATCH | - BRW_NEW_BLORP | - BRW_NEW_CS_PROG_DATA | - BRW_NEW_SAMPLER_STATE_TABLE | - BRW_NEW_SURFACES, - }, - .emit = genX(upload_cs_state) -}; - -#define GPGPU_DISPATCHDIMX 0x2500 -#define GPGPU_DISPATCHDIMY 0x2504 -#define GPGPU_DISPATCHDIMZ 0x2508 - -#define MI_PREDICATE_SRC0 0x2400 -#define MI_PREDICATE_SRC1 0x2408 - -static void -prepare_indirect_gpgpu_walker(struct brw_context *brw) -{ - GLintptr indirect_offset = brw->compute.num_work_groups_offset; - struct brw_bo *bo = brw->compute.num_work_groups_bo; - - emit_lrm(brw, GPGPU_DISPATCHDIMX, ro_bo(bo, indirect_offset + 0)); - emit_lrm(brw, GPGPU_DISPATCHDIMY, ro_bo(bo, indirect_offset + 4)); - emit_lrm(brw, GPGPU_DISPATCHDIMZ, ro_bo(bo, indirect_offset + 8)); - -#if GFX_VER <= 7 - /* Clear upper 32-bits of SRC0 and all 64-bits of SRC1 */ - emit_lri(brw, MI_PREDICATE_SRC0 + 4, 0); - emit_lri(brw, MI_PREDICATE_SRC1 , 0); - emit_lri(brw, MI_PREDICATE_SRC1 + 4, 0); - - /* Load compute_dispatch_indirect_x_size into SRC0 */ - emit_lrm(brw, MI_PREDICATE_SRC0, ro_bo(bo, indirect_offset + 0)); - - /* predicate = (compute_dispatch_indirect_x_size == 0); */ - brw_batch_emit(brw, GENX(MI_PREDICATE), mip) { - mip.LoadOperation = LOAD_LOAD; - mip.CombineOperation = COMBINE_SET; - mip.CompareOperation = COMPARE_SRCS_EQUAL; - } - - /* Load compute_dispatch_indirect_y_size into SRC0 */ - emit_lrm(brw, MI_PREDICATE_SRC0, ro_bo(bo, indirect_offset + 4)); - - /* predicate |= (compute_dispatch_indirect_y_size == 0); */ - brw_batch_emit(brw, GENX(MI_PREDICATE), mip) { - mip.LoadOperation = LOAD_LOAD; - mip.CombineOperation = COMBINE_OR; - mip.CompareOperation = COMPARE_SRCS_EQUAL; - } - - /* Load compute_dispatch_indirect_z_size into SRC0 */ - emit_lrm(brw, MI_PREDICATE_SRC0, ro_bo(bo, indirect_offset + 8)); - - /* predicate |= (compute_dispatch_indirect_z_size == 0); */ - brw_batch_emit(brw, GENX(MI_PREDICATE), mip) { - mip.LoadOperation = LOAD_LOAD; - mip.CombineOperation = COMBINE_OR; - mip.CompareOperation = COMPARE_SRCS_EQUAL; - } - - /* predicate = !predicate; */ -#define COMPARE_FALSE 1 - brw_batch_emit(brw, GENX(MI_PREDICATE), mip) { - mip.LoadOperation = LOAD_LOADINV; - mip.CombineOperation = COMBINE_OR; - mip.CompareOperation = COMPARE_FALSE; - } -#endif -} - -static void -genX(emit_gpgpu_walker)(struct brw_context *brw) -{ - const GLuint *num_groups = brw->compute.num_work_groups; - - bool indirect = brw->compute.num_work_groups_bo != NULL; - if (indirect) - prepare_indirect_gpgpu_walker(brw); - - const struct brw_cs_dispatch_info dispatch = - brw_cs_get_dispatch_info(&brw->screen->devinfo, - brw_cs_prog_data(brw->cs.base.prog_data), - brw->compute.group_size); - - brw_batch_emit(brw, GENX(GPGPU_WALKER), ggw) { - ggw.IndirectParameterEnable = indirect; - ggw.PredicateEnable = GFX_VER <= 7 && indirect; - ggw.SIMDSize = dispatch.simd_size / 16; - ggw.ThreadDepthCounterMaximum = 0; - ggw.ThreadHeightCounterMaximum = 0; - ggw.ThreadWidthCounterMaximum = dispatch.threads - 1; - ggw.ThreadGroupIDXDimension = num_groups[0]; - ggw.ThreadGroupIDYDimension = num_groups[1]; - ggw.ThreadGroupIDZDimension = num_groups[2]; - ggw.RightExecutionMask = dispatch.right_mask; - ggw.BottomExecutionMask = 0xffffffff; - } - - brw_batch_emit(brw, GENX(MEDIA_STATE_FLUSH), msf); -} - -#endif - -/* ---------------------------------------------------------------------- */ - -#if GFX_VER >= 8 -static void -genX(upload_raster)(struct brw_context *brw) -{ - const struct gl_context *ctx = &brw->ctx; - - /* _NEW_BUFFERS */ - const bool flip_y = ctx->DrawBuffer->FlipY; - - /* _NEW_POLYGON */ - const struct gl_polygon_attrib *polygon = &ctx->Polygon; - - /* _NEW_POINT */ - const struct gl_point_attrib *point = &ctx->Point; - - brw_batch_emit(brw, GENX(3DSTATE_RASTER), raster) { - if (brw->polygon_front_bit != flip_y) - raster.FrontWinding = CounterClockwise; - - if (polygon->CullFlag) { - switch (polygon->CullFaceMode) { - case GL_FRONT: - raster.CullMode = CULLMODE_FRONT; - break; - case GL_BACK: - raster.CullMode = CULLMODE_BACK; - break; - case GL_FRONT_AND_BACK: - raster.CullMode = CULLMODE_BOTH; - break; - default: - unreachable("not reached"); - } - } else { - raster.CullMode = CULLMODE_NONE; - } - - raster.SmoothPointEnable = point->SmoothFlag; - - raster.DXMultisampleRasterizationEnable = - _mesa_is_multisample_enabled(ctx); - - raster.GlobalDepthOffsetEnableSolid = polygon->OffsetFill; - raster.GlobalDepthOffsetEnableWireframe = polygon->OffsetLine; - raster.GlobalDepthOffsetEnablePoint = polygon->OffsetPoint; - - switch (polygon->FrontMode) { - case GL_FILL: - raster.FrontFaceFillMode = FILL_MODE_SOLID; - break; - case GL_LINE: - raster.FrontFaceFillMode = FILL_MODE_WIREFRAME; - break; - case GL_POINT: - raster.FrontFaceFillMode = FILL_MODE_POINT; - break; - default: - unreachable("not reached"); - } - - switch (polygon->BackMode) { - case GL_FILL: - raster.BackFaceFillMode = FILL_MODE_SOLID; - break; - case GL_LINE: - raster.BackFaceFillMode = FILL_MODE_WIREFRAME; - break; - case GL_POINT: - raster.BackFaceFillMode = FILL_MODE_POINT; - break; - default: - unreachable("not reached"); - } - - /* _NEW_LINE */ - raster.AntialiasingEnable = ctx->Line.SmoothFlag; - -#if GFX_VER == 10 - /* _NEW_BUFFERS - * Antialiasing Enable bit MUST not be set when NUM_MULTISAMPLES > 1. - */ - const bool multisampled_fbo = - _mesa_geometric_samples(ctx->DrawBuffer) > 1; - if (multisampled_fbo) - raster.AntialiasingEnable = false; -#endif - - /* _NEW_SCISSOR */ - raster.ScissorRectangleEnable = ctx->Scissor.EnableFlags; - - /* _NEW_TRANSFORM */ -#if GFX_VER < 9 - if (!(ctx->Transform.DepthClampNear && - ctx->Transform.DepthClampFar)) - raster.ViewportZClipTestEnable = true; -#endif - -#if GFX_VER >= 9 - if (!ctx->Transform.DepthClampNear) - raster.ViewportZNearClipTestEnable = true; - - if (!ctx->Transform.DepthClampFar) - raster.ViewportZFarClipTestEnable = true; -#endif - - /* BRW_NEW_CONSERVATIVE_RASTERIZATION */ -#if GFX_VER >= 9 - raster.ConservativeRasterizationEnable = - ctx->IntelConservativeRasterization; -#endif - - raster.GlobalDepthOffsetClamp = polygon->OffsetClamp; - raster.GlobalDepthOffsetScale = polygon->OffsetFactor; - - raster.GlobalDepthOffsetConstant = polygon->OffsetUnits * 2; - } -} - -static const struct brw_tracked_state genX(raster_state) = { - .dirty = { - .mesa = _NEW_BUFFERS | - _NEW_LINE | - _NEW_MULTISAMPLE | - _NEW_POINT | - _NEW_POLYGON | - _NEW_SCISSOR | - _NEW_TRANSFORM, - .brw = BRW_NEW_BLORP | - BRW_NEW_CONTEXT | - BRW_NEW_CONSERVATIVE_RASTERIZATION, - }, - .emit = genX(upload_raster), -}; -#endif - -/* ---------------------------------------------------------------------- */ - -#if GFX_VER >= 8 -static void -genX(upload_ps_extra)(struct brw_context *brw) -{ - UNUSED struct gl_context *ctx = &brw->ctx; - - const struct brw_wm_prog_data *prog_data = - brw_wm_prog_data(brw->wm.base.prog_data); - - brw_batch_emit(brw, GENX(3DSTATE_PS_EXTRA), psx) { - psx.PixelShaderValid = true; - psx.PixelShaderComputedDepthMode = prog_data->computed_depth_mode; - psx.PixelShaderKillsPixel = prog_data->uses_kill; - psx.AttributeEnable = prog_data->num_varying_inputs != 0; - psx.PixelShaderUsesSourceDepth = prog_data->uses_src_depth; - psx.PixelShaderUsesSourceW = prog_data->uses_src_w; - psx.PixelShaderIsPerSample = prog_data->persample_dispatch; - - /* _NEW_MULTISAMPLE | BRW_NEW_CONSERVATIVE_RASTERIZATION */ - if (prog_data->uses_sample_mask) { -#if GFX_VER >= 9 - if (prog_data->post_depth_coverage) - psx.InputCoverageMaskState = ICMS_DEPTH_COVERAGE; - else if (prog_data->inner_coverage && ctx->IntelConservativeRasterization) - psx.InputCoverageMaskState = ICMS_INNER_CONSERVATIVE; - else - psx.InputCoverageMaskState = ICMS_NORMAL; -#else - psx.PixelShaderUsesInputCoverageMask = true; -#endif - } - - psx.oMaskPresenttoRenderTarget = prog_data->uses_omask; -#if GFX_VER >= 9 - psx.PixelShaderPullsBary = prog_data->pulls_bary; - psx.PixelShaderComputesStencil = prog_data->computed_stencil; -#endif - - /* The stricter cross-primitive coherency guarantees that the hardware - * gives us with the "Accesses UAV" bit set for at least one shader stage - * and the "UAV coherency required" bit set on the 3DPRIMITIVE command - * are redundant within the current image, atomic counter and SSBO GL - * APIs, which all have very loose ordering and coherency requirements - * and generally rely on the application to insert explicit barriers when - * a shader invocation is expected to see the memory writes performed by - * the invocations of some previous primitive. Regardless of the value - * of "UAV coherency required", the "Accesses UAV" bits will implicitly - * cause an in most cases useless DC flush when the lowermost stage with - * the bit set finishes execution. - * - * It would be nice to disable it, but in some cases we can't because on - * Gfx8+ it also has an influence on rasterization via the PS UAV-only - * signal (which could be set independently from the coherency mechanism - * in the 3DSTATE_WM command on Gfx7), and because in some cases it will - * determine whether the hardware skips execution of the fragment shader - * or not via the ThreadDispatchEnable signal. However if we know that - * GFX8_PS_BLEND_HAS_WRITEABLE_RT is going to be set and - * GFX8_PSX_PIXEL_SHADER_NO_RT_WRITE is not set it shouldn't make any - * difference so we may just disable it here. - * - * Gfx8 hardware tries to compute ThreadDispatchEnable for us but doesn't - * take into account KillPixels when no depth or stencil writes are - * enabled. In order for occlusion queries to work correctly with no - * attachments, we need to force-enable here. - * - * BRW_NEW_FS_PROG_DATA | BRW_NEW_FRAGMENT_PROGRAM | _NEW_BUFFERS | - * _NEW_COLOR - */ - if ((prog_data->has_side_effects || prog_data->uses_kill) && - !brw_color_buffer_write_enabled(brw)) - psx.PixelShaderHasUAV = true; - } -} - -const struct brw_tracked_state genX(ps_extra) = { - .dirty = { - .mesa = _NEW_BUFFERS | _NEW_COLOR, - .brw = BRW_NEW_BLORP | - BRW_NEW_CONTEXT | - BRW_NEW_FRAGMENT_PROGRAM | - BRW_NEW_FS_PROG_DATA | - BRW_NEW_CONSERVATIVE_RASTERIZATION, - }, - .emit = genX(upload_ps_extra), -}; -#endif - -/* ---------------------------------------------------------------------- */ - -#if GFX_VER >= 8 -static void -genX(upload_ps_blend)(struct brw_context *brw) -{ - struct gl_context *ctx = &brw->ctx; - - /* _NEW_BUFFERS */ - struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[0]; - const bool buffer0_is_integer = ctx->DrawBuffer->_IntegerBuffers & 0x1; - - /* _NEW_COLOR */ - struct gl_colorbuffer_attrib *color = &ctx->Color; - - brw_batch_emit(brw, GENX(3DSTATE_PS_BLEND), pb) { - /* BRW_NEW_FRAGMENT_PROGRAM | _NEW_BUFFERS | _NEW_COLOR */ - pb.HasWriteableRT = brw_color_buffer_write_enabled(brw); - - bool alpha_to_one = false; - - if (!buffer0_is_integer) { - /* _NEW_MULTISAMPLE */ - - if (_mesa_is_multisample_enabled(ctx)) { - pb.AlphaToCoverageEnable = ctx->Multisample.SampleAlphaToCoverage; - alpha_to_one = ctx->Multisample.SampleAlphaToOne; - } - - pb.AlphaTestEnable = color->AlphaEnabled; - } - - /* Used for implementing the following bit of GL_EXT_texture_integer: - * "Per-fragment operations that require floating-point color - * components, including multisample alpha operations, alpha test, - * blending, and dithering, have no effect when the corresponding - * colors are written to an integer color buffer." - * - * The OpenGL specification 3.3 (page 196), section 4.1.3 says: - * "If drawbuffer zero is not NONE and the buffer it references has an - * integer format, the SAMPLE_ALPHA_TO_COVERAGE and SAMPLE_ALPHA_TO_ONE - * operations are skipped." - */ - if (rb && !buffer0_is_integer && (color->BlendEnabled & 1)) { - GLenum eqRGB = color->Blend[0].EquationRGB; - GLenum eqA = color->Blend[0].EquationA; - GLenum srcRGB = color->Blend[0].SrcRGB; - GLenum dstRGB = color->Blend[0].DstRGB; - GLenum srcA = color->Blend[0].SrcA; - GLenum dstA = color->Blend[0].DstA; - - if (eqRGB == GL_MIN || eqRGB == GL_MAX) - srcRGB = dstRGB = GL_ONE; - - if (eqA == GL_MIN || eqA == GL_MAX) - srcA = dstA = GL_ONE; - - /* Due to hardware limitations, the destination may have information - * in an alpha channel even when the format specifies no alpha - * channel. In order to avoid getting any incorrect blending due to - * that alpha channel, coerce the blend factors to values that will - * not read the alpha channel, but will instead use the correct - * implicit value for alpha. - */ - if (!_mesa_base_format_has_channel(rb->_BaseFormat, - GL_TEXTURE_ALPHA_TYPE)) { - srcRGB = brw_fix_xRGB_alpha(srcRGB); - srcA = brw_fix_xRGB_alpha(srcA); - dstRGB = brw_fix_xRGB_alpha(dstRGB); - dstA = brw_fix_xRGB_alpha(dstA); - } - - /* Alpha to One doesn't work with Dual Color Blending. Override - * SRC1_ALPHA to ONE and ONE_MINUS_SRC1_ALPHA to ZERO. - */ - if (alpha_to_one && color->_BlendUsesDualSrc & 0x1) { - srcRGB = fix_dual_blend_alpha_to_one(srcRGB); - srcA = fix_dual_blend_alpha_to_one(srcA); - dstRGB = fix_dual_blend_alpha_to_one(dstRGB); - dstA = fix_dual_blend_alpha_to_one(dstA); - } - - /* BRW_NEW_FS_PROG_DATA */ - const struct brw_wm_prog_data *wm_prog_data = - brw_wm_prog_data(brw->wm.base.prog_data); - - /* The Dual Source Blending documentation says: - * - * "If SRC1 is included in a src/dst blend factor and - * a DualSource RT Write message is not used, results - * are UNDEFINED. (This reflects the same restriction in DX APIs, - * where undefined results are produced if “o1” is not written - * by a PS – there are no default values defined). - * If SRC1 is not included in a src/dst blend factor, - * dual source blending must be disabled." - * - * There is no way to gracefully fix this undefined situation - * so we just disable the blending to prevent possible issues. - */ - pb.ColorBufferBlendEnable = - !(color->_BlendUsesDualSrc & 0x1) || wm_prog_data->dual_src_blend; - pb.SourceAlphaBlendFactor = brw_translate_blend_factor(srcA); - pb.DestinationAlphaBlendFactor = brw_translate_blend_factor(dstA); - pb.SourceBlendFactor = brw_translate_blend_factor(srcRGB); - pb.DestinationBlendFactor = brw_translate_blend_factor(dstRGB); - - pb.IndependentAlphaBlendEnable = - srcA != srcRGB || dstA != dstRGB || eqA != eqRGB; - } - } -} - -static const struct brw_tracked_state genX(ps_blend) = { - .dirty = { - .mesa = _NEW_BUFFERS | - _NEW_COLOR | - _NEW_MULTISAMPLE, - .brw = BRW_NEW_BLORP | - BRW_NEW_CONTEXT | - BRW_NEW_FRAGMENT_PROGRAM | - BRW_NEW_FS_PROG_DATA, - }, - .emit = genX(upload_ps_blend) -}; -#endif - -/* ---------------------------------------------------------------------- */ - -#if GFX_VER >= 8 -static void -genX(emit_vf_topology)(struct brw_context *brw) -{ - brw_batch_emit(brw, GENX(3DSTATE_VF_TOPOLOGY), vftopo) { - vftopo.PrimitiveTopologyType = brw->primitive; - } -} - -static const struct brw_tracked_state genX(vf_topology) = { - .dirty = { - .mesa = 0, - .brw = BRW_NEW_BLORP | - BRW_NEW_PRIMITIVE, - }, - .emit = genX(emit_vf_topology), -}; -#endif - -/* ---------------------------------------------------------------------- */ - -#if GFX_VER >= 7 -static void -genX(emit_mi_report_perf_count)(struct brw_context *brw, - struct brw_bo *bo, - uint32_t offset_in_bytes, - uint32_t report_id) -{ - brw_batch_emit(brw, GENX(MI_REPORT_PERF_COUNT), mi_rpc) { - mi_rpc.MemoryAddress = ggtt_bo(bo, offset_in_bytes); - mi_rpc.ReportID = report_id; - } -} -#endif - -/* ---------------------------------------------------------------------- */ - -/** - * Emit a 3DSTATE_SAMPLER_STATE_POINTERS_{VS,HS,GS,DS,PS} packet. - */ -static void -genX(emit_sampler_state_pointers_xs)(UNUSED struct brw_context *brw, - UNUSED struct brw_stage_state *stage_state) -{ -#if GFX_VER >= 7 - static const uint16_t packet_headers[] = { - [MESA_SHADER_VERTEX] = 43, - [MESA_SHADER_TESS_CTRL] = 44, - [MESA_SHADER_TESS_EVAL] = 45, - [MESA_SHADER_GEOMETRY] = 46, - [MESA_SHADER_FRAGMENT] = 47, - }; - - /* Ivybridge requires a workaround flush before VS packets. */ - if (GFX_VERx10 == 70 && - stage_state->stage == MESA_SHADER_VERTEX) { - gfx7_emit_vs_workaround_flush(brw); - } - - brw_batch_emit(brw, GENX(3DSTATE_SAMPLER_STATE_POINTERS_VS), ptr) { - ptr._3DCommandSubOpcode = packet_headers[stage_state->stage]; - ptr.PointertoVSSamplerState = stage_state->sampler_offset; - } -#endif -} - -UNUSED static bool -has_component(mesa_format format, int i) -{ - if (_mesa_is_format_color_format(format)) - return _mesa_format_has_color_component(format, i); - - /* depth and stencil have only one component */ - return i == 0; -} - -/** - * Upload SAMPLER_BORDER_COLOR_STATE. - */ -static void -genX(upload_default_color)(struct brw_context *brw, - const struct gl_sampler_object *sampler, - UNUSED mesa_format format, - GLenum base_format, - bool is_integer_format, bool is_stencil_sampling, - uint32_t *sdc_offset) -{ - union gl_color_union color; - - switch (base_format) { - case GL_DEPTH_COMPONENT: - /* GL specs that border color for depth textures is taken from the - * R channel, while the hardware uses A. Spam R into all the - * channels for safety. - */ - color.ui[0] = sampler->Attrib.state.border_color.ui[0]; - color.ui[1] = sampler->Attrib.state.border_color.ui[0]; - color.ui[2] = sampler->Attrib.state.border_color.ui[0]; - color.ui[3] = sampler->Attrib.state.border_color.ui[0]; - break; - case GL_ALPHA: - color.ui[0] = 0u; - color.ui[1] = 0u; - color.ui[2] = 0u; - color.ui[3] = sampler->Attrib.state.border_color.ui[3]; - break; - case GL_INTENSITY: - color.ui[0] = sampler->Attrib.state.border_color.ui[0]; - color.ui[1] = sampler->Attrib.state.border_color.ui[0]; - color.ui[2] = sampler->Attrib.state.border_color.ui[0]; - color.ui[3] = sampler->Attrib.state.border_color.ui[0]; - break; - case GL_LUMINANCE: - color.ui[0] = sampler->Attrib.state.border_color.ui[0]; - color.ui[1] = sampler->Attrib.state.border_color.ui[0]; - color.ui[2] = sampler->Attrib.state.border_color.ui[0]; - color.ui[3] = float_as_int(1.0); - break; - case GL_LUMINANCE_ALPHA: - color.ui[0] = sampler->Attrib.state.border_color.ui[0]; - color.ui[1] = sampler->Attrib.state.border_color.ui[0]; - color.ui[2] = sampler->Attrib.state.border_color.ui[0]; - color.ui[3] = sampler->Attrib.state.border_color.ui[3]; - break; - default: - color.ui[0] = sampler->Attrib.state.border_color.ui[0]; - color.ui[1] = sampler->Attrib.state.border_color.ui[1]; - color.ui[2] = sampler->Attrib.state.border_color.ui[2]; - color.ui[3] = sampler->Attrib.state.border_color.ui[3]; - break; - } - - /* In some cases we use an RGBA surface format for GL RGB textures, - * where we've initialized the A channel to 1.0. We also have to set - * the border color alpha to 1.0 in that case. - */ - if (base_format == GL_RGB) - color.ui[3] = float_as_int(1.0); - - int alignment = 32; - if (GFX_VER >= 8) { - alignment = 64; - } else if (GFX_VERx10 == 75 && (is_integer_format || is_stencil_sampling)) { - alignment = 512; - } - - uint32_t *sdc = brw_state_batch( - brw, GENX(SAMPLER_BORDER_COLOR_STATE_length) * sizeof(uint32_t), - alignment, sdc_offset); - - struct GENX(SAMPLER_BORDER_COLOR_STATE) state = { 0 }; - -#define ASSIGN(dst, src) \ - do { \ - dst = src; \ - } while (0) - -#define ASSIGNu16(dst, src) \ - do { \ - dst = (uint16_t)src; \ - } while (0) - -#define ASSIGNu8(dst, src) \ - do { \ - dst = (uint8_t)src; \ - } while (0) - -#define BORDER_COLOR_ATTR(macro, _color_type, src) \ - macro(state.BorderColor ## _color_type ## Red, src[0]); \ - macro(state.BorderColor ## _color_type ## Green, src[1]); \ - macro(state.BorderColor ## _color_type ## Blue, src[2]); \ - macro(state.BorderColor ## _color_type ## Alpha, src[3]); - -#if GFX_VER >= 8 - /* On Broadwell, the border color is represented as four 32-bit floats, - * integers, or unsigned values, interpreted according to the surface - * format. This matches the sampler->BorderColor union exactly; just - * memcpy the values. - */ - BORDER_COLOR_ATTR(ASSIGN, 32bit, color.ui); -#elif GFX_VERx10 == 75 - if (is_integer_format || is_stencil_sampling) { - bool stencil = format == MESA_FORMAT_S_UINT8 || is_stencil_sampling; - const int bits_per_channel = - _mesa_get_format_bits(format, stencil ? GL_STENCIL_BITS : GL_RED_BITS); - - /* From the Haswell PRM, "Command Reference: Structures", Page 36: - * "If any color channel is missing from the surface format, - * corresponding border color should be programmed as zero and if - * alpha channel is missing, corresponding Alpha border color should - * be programmed as 1." - */ - unsigned c[4] = { 0, 0, 0, 1 }; - for (int i = 0; i < 4; i++) { - if (has_component(format, i)) - c[i] = color.ui[i]; - } - - switch (bits_per_channel) { - case 8: - /* Copy RGBA in order. */ - BORDER_COLOR_ATTR(ASSIGNu8, 8bit, c); - break; - case 10: - /* R10G10B10A2_UINT is treated like a 16-bit format. */ - case 16: - BORDER_COLOR_ATTR(ASSIGNu16, 16bit, c); - break; - case 32: - if (base_format == GL_RG) { - /* Careful inspection of the tables reveals that for RG32 formats, - * the green channel needs to go where blue normally belongs. - */ - state.BorderColor32bitRed = c[0]; - state.BorderColor32bitBlue = c[1]; - state.BorderColor32bitAlpha = 1; - } else { - /* Copy RGBA in order. */ - BORDER_COLOR_ATTR(ASSIGN, 32bit, c); - } - break; - default: - assert(!"Invalid number of bits per channel in integer format."); - break; - } - } else { - BORDER_COLOR_ATTR(ASSIGN, Float, color.f); - } -#elif GFX_VER == 5 || GFX_VER == 6 - BORDER_COLOR_ATTR(UNCLAMPED_FLOAT_TO_UBYTE, Unorm, color.f); - BORDER_COLOR_ATTR(UNCLAMPED_FLOAT_TO_USHORT, Unorm16, color.f); - BORDER_COLOR_ATTR(UNCLAMPED_FLOAT_TO_SHORT, Snorm16, color.f); - -#define MESA_FLOAT_TO_HALF(dst, src) \ - dst = _mesa_float_to_half(src); - - BORDER_COLOR_ATTR(MESA_FLOAT_TO_HALF, Float16, color.f); - -#undef MESA_FLOAT_TO_HALF - - state.BorderColorSnorm8Red = state.BorderColorSnorm16Red >> 8; - state.BorderColorSnorm8Green = state.BorderColorSnorm16Green >> 8; - state.BorderColorSnorm8Blue = state.BorderColorSnorm16Blue >> 8; - state.BorderColorSnorm8Alpha = state.BorderColorSnorm16Alpha >> 8; - - BORDER_COLOR_ATTR(ASSIGN, Float, color.f); -#elif GFX_VER == 4 - BORDER_COLOR_ATTR(ASSIGN, , color.f); -#else - BORDER_COLOR_ATTR(ASSIGN, Float, color.f); -#endif - -#undef ASSIGN -#undef BORDER_COLOR_ATTR - - GENX(SAMPLER_BORDER_COLOR_STATE_pack)(brw, sdc, &state); -} - -static uint32_t -translate_wrap_mode(GLenum wrap, UNUSED bool using_nearest) -{ - switch (wrap) { - case GL_REPEAT: - return TCM_WRAP; - case GL_CLAMP: -#if GFX_VER >= 8 - /* GL_CLAMP is the weird mode where coordinates are clamped to - * [0.0, 1.0], so linear filtering of coordinates outside of - * [0.0, 1.0] give you half edge texel value and half border - * color. - * - * Gfx8+ supports this natively. - */ - return TCM_HALF_BORDER; -#else - /* On Gfx4-7.5, we clamp the coordinates in the fragment shader - * and set clamp_border here, which gets the result desired. - * We just use clamp(_to_edge) for nearest, because for nearest - * clamping to 1.0 gives border color instead of the desired - * edge texels. - */ - if (using_nearest) - return TCM_CLAMP; - else - return TCM_CLAMP_BORDER; -#endif - case GL_CLAMP_TO_EDGE: - return TCM_CLAMP; - case GL_CLAMP_TO_BORDER: - return TCM_CLAMP_BORDER; - case GL_MIRRORED_REPEAT: - return TCM_MIRROR; - case GL_MIRROR_CLAMP_TO_EDGE: - return TCM_MIRROR_ONCE; - default: - return TCM_WRAP; - } -} - -/** - * Return true if the given wrap mode requires the border color to exist. - */ -static bool -wrap_mode_needs_border_color(unsigned wrap_mode) -{ -#if GFX_VER >= 8 - return wrap_mode == TCM_CLAMP_BORDER || - wrap_mode == TCM_HALF_BORDER; -#else - return wrap_mode == TCM_CLAMP_BORDER; -#endif -} - -/** - * Sets the sampler state for a single unit based off of the sampler key - * entry. - */ -static void -genX(update_sampler_state)(struct brw_context *brw, - GLenum target, bool tex_cube_map_seamless, - GLfloat tex_unit_lod_bias, - mesa_format format, GLenum base_format, - const struct gl_texture_object *texObj, - const struct gl_sampler_object *sampler, - uint32_t *sampler_state) -{ - struct GENX(SAMPLER_STATE) samp_st = { 0 }; - - /* Select min and mip filters. */ - switch (sampler->Attrib.MinFilter) { - case GL_NEAREST: - samp_st.MinModeFilter = MAPFILTER_NEAREST; - samp_st.MipModeFilter = MIPFILTER_NONE; - break; - case GL_LINEAR: - samp_st.MinModeFilter = MAPFILTER_LINEAR; - samp_st.MipModeFilter = MIPFILTER_NONE; - break; - case GL_NEAREST_MIPMAP_NEAREST: - samp_st.MinModeFilter = MAPFILTER_NEAREST; - samp_st.MipModeFilter = MIPFILTER_NEAREST; - break; - case GL_LINEAR_MIPMAP_NEAREST: - samp_st.MinModeFilter = MAPFILTER_LINEAR; - samp_st.MipModeFilter = MIPFILTER_NEAREST; - break; - case GL_NEAREST_MIPMAP_LINEAR: - samp_st.MinModeFilter = MAPFILTER_NEAREST; - samp_st.MipModeFilter = MIPFILTER_LINEAR; - break; - case GL_LINEAR_MIPMAP_LINEAR: - samp_st.MinModeFilter = MAPFILTER_LINEAR; - samp_st.MipModeFilter = MIPFILTER_LINEAR; - break; - default: - unreachable("not reached"); - } - - /* Select mag filter. */ - samp_st.MagModeFilter = sampler->Attrib.MagFilter == GL_LINEAR ? - MAPFILTER_LINEAR : MAPFILTER_NEAREST; - - /* Enable anisotropic filtering if desired. */ - samp_st.MaximumAnisotropy = RATIO21; - - if (sampler->Attrib.MaxAnisotropy > 1.0f) { - if (samp_st.MinModeFilter == MAPFILTER_LINEAR) - samp_st.MinModeFilter = MAPFILTER_ANISOTROPIC; - if (samp_st.MagModeFilter == MAPFILTER_LINEAR) - samp_st.MagModeFilter = MAPFILTER_ANISOTROPIC; - - if (sampler->Attrib.MaxAnisotropy > 2.0f) { - samp_st.MaximumAnisotropy = - MIN2((sampler->Attrib.MaxAnisotropy - 2) / 2, RATIO161); - } - } - - /* Set address rounding bits if not using nearest filtering. */ - if (samp_st.MinModeFilter != MAPFILTER_NEAREST) { - samp_st.UAddressMinFilterRoundingEnable = true; - samp_st.VAddressMinFilterRoundingEnable = true; - samp_st.RAddressMinFilterRoundingEnable = true; - } - - if (samp_st.MagModeFilter != MAPFILTER_NEAREST) { - samp_st.UAddressMagFilterRoundingEnable = true; - samp_st.VAddressMagFilterRoundingEnable = true; - samp_st.RAddressMagFilterRoundingEnable = true; - } - - bool either_nearest = - sampler->Attrib.MinFilter == GL_NEAREST || sampler->Attrib.MagFilter == GL_NEAREST; - unsigned wrap_s = translate_wrap_mode(sampler->Attrib.WrapS, either_nearest); - unsigned wrap_t = translate_wrap_mode(sampler->Attrib.WrapT, either_nearest); - unsigned wrap_r = translate_wrap_mode(sampler->Attrib.WrapR, either_nearest); - - if (target == GL_TEXTURE_CUBE_MAP || - target == GL_TEXTURE_CUBE_MAP_ARRAY) { - /* Cube maps must use the same wrap mode for all three coordinate - * dimensions. Prior to Haswell, only CUBE and CLAMP are valid. - * - * Ivybridge and Baytrail seem to have problems with CUBE mode and - * integer formats. Fall back to CLAMP for now. - */ - if ((tex_cube_map_seamless || sampler->Attrib.CubeMapSeamless) && - !(GFX_VERx10 == 70 && texObj->_IsIntegerFormat)) { - wrap_s = TCM_CUBE; - wrap_t = TCM_CUBE; - wrap_r = TCM_CUBE; - } else { - wrap_s = TCM_CLAMP; - wrap_t = TCM_CLAMP; - wrap_r = TCM_CLAMP; - } - } else if (target == GL_TEXTURE_1D) { - /* There's a bug in 1D texture sampling - it actually pays - * attention to the wrap_t value, though it should not. - * Override the wrap_t value here to GL_REPEAT to keep - * any nonexistent border pixels from floating in. - */ - wrap_t = TCM_WRAP; - } - - samp_st.TCXAddressControlMode = wrap_s; - samp_st.TCYAddressControlMode = wrap_t; - samp_st.TCZAddressControlMode = wrap_r; - - samp_st.ShadowFunction = - sampler->Attrib.CompareMode == GL_COMPARE_R_TO_TEXTURE_ARB ? - brw_translate_shadow_compare_func(sampler->Attrib.CompareFunc) : 0; - -#if GFX_VER >= 7 - /* Set shadow function. */ - samp_st.AnisotropicAlgorithm = - samp_st.MinModeFilter == MAPFILTER_ANISOTROPIC ? - EWAApproximation : LEGACY; -#endif - -#if GFX_VER >= 6 - samp_st.NonnormalizedCoordinateEnable = target == GL_TEXTURE_RECTANGLE; -#endif - - const float hw_max_lod = GFX_VER >= 7 ? 14 : 13; - samp_st.MinLOD = CLAMP(sampler->Attrib.MinLod, 0, hw_max_lod); - samp_st.MaxLOD = CLAMP(sampler->Attrib.MaxLod, 0, hw_max_lod); - samp_st.TextureLODBias = - CLAMP(tex_unit_lod_bias + sampler->Attrib.LodBias, -16, 15); - -#if GFX_VER == 6 - samp_st.BaseMipLevel = - CLAMP(texObj->Attrib.MinLevel + texObj->Attrib.BaseLevel, 0, hw_max_lod); - samp_st.MinandMagStateNotEqual = - samp_st.MinModeFilter != samp_st.MagModeFilter; -#endif - - /* Upload the border color if necessary. If not, just point it at - * offset 0 (the start of the batch) - the color should be ignored, - * but that address won't fault in case something reads it anyway. - */ - uint32_t border_color_offset = 0; - if (wrap_mode_needs_border_color(wrap_s) || - wrap_mode_needs_border_color(wrap_t) || - wrap_mode_needs_border_color(wrap_r)) { - genX(upload_default_color)(brw, sampler, format, base_format, - texObj->_IsIntegerFormat, - texObj->StencilSampling, - &border_color_offset); - } -#if GFX_VER < 6 - samp_st.BorderColorPointer = - ro_bo(brw->batch.state.bo, border_color_offset); -#else - samp_st.BorderColorPointer = border_color_offset; -#endif - -#if GFX_VER >= 8 - samp_st.LODPreClampMode = CLAMP_MODE_OGL; -#else - samp_st.LODPreClampEnable = true; -#endif - - GENX(SAMPLER_STATE_pack)(brw, sampler_state, &samp_st); -} - -static void -update_sampler_state(struct brw_context *brw, - int unit, - uint32_t *sampler_state) -{ - struct gl_context *ctx = &brw->ctx; - const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; - const struct gl_texture_object *texObj = texUnit->_Current; - const struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit); - - /* These don't use samplers at all. */ - if (texObj->Target == GL_TEXTURE_BUFFER) - return; - - struct gl_texture_image *firstImage = texObj->Image[0][texObj->Attrib.BaseLevel]; - genX(update_sampler_state)(brw, texObj->Target, - ctx->Texture.CubeMapSeamless, - texUnit->LodBias, - firstImage->TexFormat, firstImage->_BaseFormat, - texObj, sampler, - sampler_state); -} - -static void -genX(upload_sampler_state_table)(struct brw_context *brw, - struct gl_program *prog, - struct brw_stage_state *stage_state) -{ - struct gl_context *ctx = &brw->ctx; - uint32_t sampler_count = stage_state->sampler_count; - - GLbitfield SamplersUsed = prog->SamplersUsed; - - if (sampler_count == 0) - return; - - /* SAMPLER_STATE is 4 DWords on all platforms. */ - const int dwords = GENX(SAMPLER_STATE_length); - const int size_in_bytes = dwords * sizeof(uint32_t); - - uint32_t *sampler_state = brw_state_batch(brw, - sampler_count * size_in_bytes, - 32, &stage_state->sampler_offset); - /* memset(sampler_state, 0, sampler_count * size_in_bytes); */ - - for (unsigned s = 0; s < sampler_count; s++) { - if (SamplersUsed & (1 << s)) { - const unsigned unit = prog->SamplerUnits[s]; - if (ctx->Texture.Unit[unit]._Current) { - update_sampler_state(brw, unit, sampler_state); - } - } - - sampler_state += dwords; - } - - if (GFX_VER >= 7 && stage_state->stage != MESA_SHADER_COMPUTE) { - /* Emit a 3DSTATE_SAMPLER_STATE_POINTERS_XS packet. */ - genX(emit_sampler_state_pointers_xs)(brw, stage_state); - } else { - /* Flag that the sampler state table pointer has changed; later atoms - * will handle it. - */ - brw->ctx.NewDriverState |= BRW_NEW_SAMPLER_STATE_TABLE; - } -} - -static void -genX(upload_fs_samplers)(struct brw_context *brw) -{ - /* BRW_NEW_FRAGMENT_PROGRAM */ - struct gl_program *fs = brw->programs[MESA_SHADER_FRAGMENT]; - genX(upload_sampler_state_table)(brw, fs, &brw->wm.base); -} - -static const struct brw_tracked_state genX(fs_samplers) = { - .dirty = { - .mesa = _NEW_TEXTURE, - .brw = BRW_NEW_BATCH | - BRW_NEW_BLORP | - BRW_NEW_FRAGMENT_PROGRAM, - }, - .emit = genX(upload_fs_samplers), -}; - -static void -genX(upload_vs_samplers)(struct brw_context *brw) -{ - /* BRW_NEW_VERTEX_PROGRAM */ - struct gl_program *vs = brw->programs[MESA_SHADER_VERTEX]; - genX(upload_sampler_state_table)(brw, vs, &brw->vs.base); -} - -static const struct brw_tracked_state genX(vs_samplers) = { - .dirty = { - .mesa = _NEW_TEXTURE, - .brw = BRW_NEW_BATCH | - BRW_NEW_BLORP | - BRW_NEW_VERTEX_PROGRAM, - }, - .emit = genX(upload_vs_samplers), -}; - -#if GFX_VER >= 6 -static void -genX(upload_gs_samplers)(struct brw_context *brw) -{ - /* BRW_NEW_GEOMETRY_PROGRAM */ - struct gl_program *gs = brw->programs[MESA_SHADER_GEOMETRY]; - if (!gs) - return; - - genX(upload_sampler_state_table)(brw, gs, &brw->gs.base); -} - - -static const struct brw_tracked_state genX(gs_samplers) = { - .dirty = { - .mesa = _NEW_TEXTURE, - .brw = BRW_NEW_BATCH | - BRW_NEW_BLORP | - BRW_NEW_GEOMETRY_PROGRAM, - }, - .emit = genX(upload_gs_samplers), -}; -#endif - -#if GFX_VER >= 7 -static void -genX(upload_tcs_samplers)(struct brw_context *brw) -{ - /* BRW_NEW_TESS_PROGRAMS */ - struct gl_program *tcs = brw->programs[MESA_SHADER_TESS_CTRL]; - if (!tcs) - return; - - genX(upload_sampler_state_table)(brw, tcs, &brw->tcs.base); -} - -static const struct brw_tracked_state genX(tcs_samplers) = { - .dirty = { - .mesa = _NEW_TEXTURE, - .brw = BRW_NEW_BATCH | - BRW_NEW_BLORP | - BRW_NEW_TESS_PROGRAMS, - }, - .emit = genX(upload_tcs_samplers), -}; -#endif - -#if GFX_VER >= 7 -static void -genX(upload_tes_samplers)(struct brw_context *brw) -{ - /* BRW_NEW_TESS_PROGRAMS */ - struct gl_program *tes = brw->programs[MESA_SHADER_TESS_EVAL]; - if (!tes) - return; - - genX(upload_sampler_state_table)(brw, tes, &brw->tes.base); -} - -static const struct brw_tracked_state genX(tes_samplers) = { - .dirty = { - .mesa = _NEW_TEXTURE, - .brw = BRW_NEW_BATCH | - BRW_NEW_BLORP | - BRW_NEW_TESS_PROGRAMS, - }, - .emit = genX(upload_tes_samplers), -}; -#endif - -#if GFX_VER >= 7 -static void -genX(upload_cs_samplers)(struct brw_context *brw) -{ - /* BRW_NEW_COMPUTE_PROGRAM */ - struct gl_program *cs = brw->programs[MESA_SHADER_COMPUTE]; - if (!cs) - return; - - genX(upload_sampler_state_table)(brw, cs, &brw->cs.base); -} - -const struct brw_tracked_state genX(cs_samplers) = { - .dirty = { - .mesa = _NEW_TEXTURE, - .brw = BRW_NEW_BATCH | - BRW_NEW_BLORP | - BRW_NEW_COMPUTE_PROGRAM, - }, - .emit = genX(upload_cs_samplers), -}; -#endif - -/* ---------------------------------------------------------------------- */ - -#if GFX_VER <= 5 - -static void genX(upload_blend_constant_color)(struct brw_context *brw) -{ - struct gl_context *ctx = &brw->ctx; - - brw_batch_emit(brw, GENX(3DSTATE_CONSTANT_COLOR), blend_cc) { - blend_cc.BlendConstantColorRed = ctx->Color.BlendColorUnclamped[0]; - blend_cc.BlendConstantColorGreen = ctx->Color.BlendColorUnclamped[1]; - blend_cc.BlendConstantColorBlue = ctx->Color.BlendColorUnclamped[2]; - blend_cc.BlendConstantColorAlpha = ctx->Color.BlendColorUnclamped[3]; - } -} - -static const struct brw_tracked_state genX(blend_constant_color) = { - .dirty = { - .mesa = _NEW_COLOR, - .brw = BRW_NEW_CONTEXT | - BRW_NEW_BLORP, - }, - .emit = genX(upload_blend_constant_color) -}; -#endif - -/* ---------------------------------------------------------------------- */ - -void -genX(init_atoms)(struct brw_context *brw) -{ -#if GFX_VER < 6 - static const struct brw_tracked_state *render_atoms[] = - { - &genX(vf_statistics), - - /* Once all the programs are done, we know how large urb entry - * sizes need to be and can decide if we need to change the urb - * layout. - */ - &brw_curbe_offsets, - &brw_recalculate_urb_fence, - - &genX(cc_vp), - &genX(color_calc_state), - - /* Surface state setup. Must come before the VS/WM unit. The binding - * table upload must be last. - */ - &brw_vs_pull_constants, - &brw_wm_pull_constants, - &brw_renderbuffer_surfaces, - &brw_renderbuffer_read_surfaces, - &brw_texture_surfaces, - &brw_vs_binding_table, - &brw_wm_binding_table, - - &genX(fs_samplers), - &genX(vs_samplers), - - /* These set up state for brw_psp_urb_cbs */ - &genX(wm_state), - &genX(sf_clip_viewport), - &genX(sf_state), - &genX(vs_state), /* always required, enabled or not */ - &genX(clip_state), - &genX(gs_state), - - /* Command packets: - */ - &brw_binding_table_pointers, - &genX(blend_constant_color), - - &brw_depthbuffer, - - &genX(polygon_stipple), - &genX(polygon_stipple_offset), - - &genX(line_stipple), - - &brw_psp_urb_cbs, - - &genX(drawing_rect), - &brw_indices, /* must come before brw_vertices */ - &genX(index_buffer), - &genX(vertices), - - &brw_constant_buffer - }; -#elif GFX_VER == 6 - static const struct brw_tracked_state *render_atoms[] = - { - &genX(vf_statistics), - - &genX(sf_clip_viewport), - - /* Command packets: */ - - &genX(cc_vp), - - &gfx6_urb, - &genX(blend_state), /* must do before cc unit */ - &genX(color_calc_state), /* must do before cc unit */ - &genX(depth_stencil_state), /* must do before cc unit */ - - &genX(vs_push_constants), /* Before vs_state */ - &genX(gs_push_constants), /* Before gs_state */ - &genX(wm_push_constants), /* Before wm_state */ - - /* Surface state setup. Must come before the VS/WM unit. The binding - * table upload must be last. - */ - &brw_vs_pull_constants, - &brw_vs_ubo_surfaces, - &brw_gs_pull_constants, - &brw_gs_ubo_surfaces, - &brw_wm_pull_constants, - &brw_wm_ubo_surfaces, - &gfx6_renderbuffer_surfaces, - &brw_renderbuffer_read_surfaces, - &brw_texture_surfaces, - &gfx6_sol_surface, - &brw_vs_binding_table, - &gfx6_gs_binding_table, - &brw_wm_binding_table, - - &genX(fs_samplers), - &genX(vs_samplers), - &genX(gs_samplers), - &gfx6_sampler_state, - &genX(multisample_state), - - &genX(vs_state), - &genX(gs_state), - &genX(clip_state), - &genX(sf_state), - &genX(wm_state), - - &genX(scissor_state), - - &gfx6_binding_table_pointers, - - &brw_depthbuffer, - - &genX(polygon_stipple), - &genX(polygon_stipple_offset), - - &genX(line_stipple), - - &genX(drawing_rect), - - &brw_indices, /* must come before brw_vertices */ - &genX(index_buffer), - &genX(vertices), - }; -#elif GFX_VER == 7 - static const struct brw_tracked_state *render_atoms[] = - { - &genX(vf_statistics), - - /* Command packets: */ - - &genX(cc_vp), - &genX(sf_clip_viewport), - - &gfx7_l3_state, - &gfx7_push_constant_space, - &gfx7_urb, -#if GFX_VERx10 == 75 - &genX(cc_and_blend_state), -#else - &genX(blend_state), /* must do before cc unit */ - &genX(color_calc_state), /* must do before cc unit */ -#endif - &genX(depth_stencil_state), /* must do before cc unit */ - - &brw_vs_image_surfaces, /* Before vs push/pull constants and binding table */ - &brw_tcs_image_surfaces, /* Before tcs push/pull constants and binding table */ - &brw_tes_image_surfaces, /* Before tes push/pull constants and binding table */ - &brw_gs_image_surfaces, /* Before gs push/pull constants and binding table */ - &brw_wm_image_surfaces, /* Before wm push/pull constants and binding table */ - - &genX(vs_push_constants), /* Before vs_state */ - &genX(tcs_push_constants), - &genX(tes_push_constants), - &genX(gs_push_constants), /* Before gs_state */ - &genX(wm_push_constants), /* Before wm_surfaces and constant_buffer */ - - /* Surface state setup. Must come before the VS/WM unit. The binding - * table upload must be last. - */ - &brw_vs_pull_constants, - &brw_vs_ubo_surfaces, - &brw_tcs_pull_constants, - &brw_tcs_ubo_surfaces, - &brw_tes_pull_constants, - &brw_tes_ubo_surfaces, - &brw_gs_pull_constants, - &brw_gs_ubo_surfaces, - &brw_wm_pull_constants, - &brw_wm_ubo_surfaces, - &gfx6_renderbuffer_surfaces, - &brw_renderbuffer_read_surfaces, - &brw_texture_surfaces, - - &genX(push_constant_packets), - - &brw_vs_binding_table, - &brw_tcs_binding_table, - &brw_tes_binding_table, - &brw_gs_binding_table, - &brw_wm_binding_table, - - &genX(fs_samplers), - &genX(vs_samplers), - &genX(tcs_samplers), - &genX(tes_samplers), - &genX(gs_samplers), - &genX(multisample_state), - - &genX(vs_state), - &genX(hs_state), - &genX(te_state), - &genX(ds_state), - &genX(gs_state), - &genX(sol_state), - &genX(clip_state), - &genX(sbe_state), - &genX(sf_state), - &genX(wm_state), - &genX(ps_state), - - &genX(scissor_state), - - &brw_depthbuffer, - - &genX(polygon_stipple), - &genX(polygon_stipple_offset), - - &genX(line_stipple), - - &genX(drawing_rect), - - &brw_indices, /* must come before brw_vertices */ - &genX(index_buffer), - &genX(vertices), - -#if GFX_VERx10 == 75 - &genX(cut_index), -#endif - }; -#elif GFX_VER >= 8 - static const struct brw_tracked_state *render_atoms[] = - { - &genX(vf_statistics), - - &genX(cc_vp), - &genX(sf_clip_viewport), - - &gfx7_l3_state, - &gfx7_push_constant_space, - &gfx7_urb, - &genX(blend_state), - &genX(color_calc_state), - - &brw_vs_image_surfaces, /* Before vs push/pull constants and binding table */ - &brw_tcs_image_surfaces, /* Before tcs push/pull constants and binding table */ - &brw_tes_image_surfaces, /* Before tes push/pull constants and binding table */ - &brw_gs_image_surfaces, /* Before gs push/pull constants and binding table */ - &brw_wm_image_surfaces, /* Before wm push/pull constants and binding table */ - - &genX(vs_push_constants), /* Before vs_state */ - &genX(tcs_push_constants), - &genX(tes_push_constants), - &genX(gs_push_constants), /* Before gs_state */ - &genX(wm_push_constants), /* Before wm_surfaces and constant_buffer */ - - /* Surface state setup. Must come before the VS/WM unit. The binding - * table upload must be last. - */ - &brw_vs_pull_constants, - &brw_vs_ubo_surfaces, - &brw_tcs_pull_constants, - &brw_tcs_ubo_surfaces, - &brw_tes_pull_constants, - &brw_tes_ubo_surfaces, - &brw_gs_pull_constants, - &brw_gs_ubo_surfaces, - &brw_wm_pull_constants, - &brw_wm_ubo_surfaces, - &gfx6_renderbuffer_surfaces, - &brw_renderbuffer_read_surfaces, - &brw_texture_surfaces, - - &genX(push_constant_packets), - - &brw_vs_binding_table, - &brw_tcs_binding_table, - &brw_tes_binding_table, - &brw_gs_binding_table, - &brw_wm_binding_table, - - &genX(fs_samplers), - &genX(vs_samplers), - &genX(tcs_samplers), - &genX(tes_samplers), - &genX(gs_samplers), - &genX(multisample_state), - - &genX(vs_state), - &genX(hs_state), - &genX(te_state), - &genX(ds_state), - &genX(gs_state), - &genX(sol_state), - &genX(clip_state), - &genX(raster_state), - &genX(sbe_state), - &genX(sf_state), - &genX(ps_blend), - &genX(ps_extra), - &genX(ps_state), - &genX(depth_stencil_state), - &genX(wm_state), - - &genX(scissor_state), - - &brw_depthbuffer, - - &genX(polygon_stipple), - &genX(polygon_stipple_offset), - - &genX(line_stipple), - - &genX(drawing_rect), - - &genX(vf_topology), - - &brw_indices, - &genX(index_buffer), - &genX(vertices), - - &genX(cut_index), - &gfx8_pma_fix, - }; -#endif - - STATIC_ASSERT(ARRAY_SIZE(render_atoms) <= ARRAY_SIZE(brw->render_atoms)); - brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE, - render_atoms, ARRAY_SIZE(render_atoms)); - -#if GFX_VER >= 7 - static const struct brw_tracked_state *compute_atoms[] = - { - &gfx7_l3_state, - &brw_cs_image_surfaces, - &genX(cs_push_constants), - &genX(cs_pull_constants), - &brw_cs_ubo_surfaces, - &brw_cs_texture_surfaces, - &brw_cs_work_groups_surface, - &genX(cs_samplers), - &genX(cs_state), - }; - - STATIC_ASSERT(ARRAY_SIZE(compute_atoms) <= ARRAY_SIZE(brw->compute_atoms)); - brw_copy_pipeline_atoms(brw, BRW_COMPUTE_PIPELINE, - compute_atoms, ARRAY_SIZE(compute_atoms)); - - brw->vtbl.emit_mi_report_perf_count = genX(emit_mi_report_perf_count); - brw->vtbl.emit_compute_walker = genX(emit_gpgpu_walker); -#endif - - brw->vtbl.emit_state_base_address = genX(emit_state_base_address); - - assert(brw->screen->devinfo.verx10 == GFX_VERx10); -} diff --git a/src/mesa/drivers/dri/i965/gfx4_blorp_exec.h b/src/mesa/drivers/dri/i965/gfx4_blorp_exec.h deleted file mode 100644 index 62a008e..0000000 --- a/src/mesa/drivers/dri/i965/gfx4_blorp_exec.h +++ /dev/null @@ -1,196 +0,0 @@ -/* - * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -static inline struct blorp_address -dynamic_state_address(struct blorp_batch *batch, uint32_t offset) -{ - assert(batch->blorp->driver_ctx == batch->driver_batch); - struct brw_context *brw = batch->driver_batch; - - return (struct blorp_address) { - .buffer = brw->batch.state.bo, - .offset = offset, - }; -} - -static inline struct blorp_address -instruction_state_address(struct blorp_batch *batch, uint32_t offset) -{ - assert(batch->blorp->driver_ctx == batch->driver_batch); - struct brw_context *brw = batch->driver_batch; - - return (struct blorp_address) { - .buffer = brw->cache.bo, - .offset = offset, - }; -} - -static struct blorp_address -blorp_emit_vs_state(struct blorp_batch *batch) -{ - assert(batch->blorp->driver_ctx == batch->driver_batch); - struct brw_context *brw = batch->driver_batch; - - uint32_t offset; - blorp_emit_dynamic(batch, GENX(VS_STATE), vs, 64, &offset) { - vs.Enable = false; - vs.URBEntryAllocationSize = brw->urb.vsize - 1; -#if GFX_VER == 5 - vs.NumberofURBEntries = brw->urb.nr_vs_entries >> 2; -#else - vs.NumberofURBEntries = brw->urb.nr_vs_entries; -#endif - } - - return dynamic_state_address(batch, offset); -} - -static struct blorp_address -blorp_emit_sf_state(struct blorp_batch *batch, - const struct blorp_params *params) -{ - assert(batch->blorp->driver_ctx == batch->driver_batch); - struct brw_context *brw = batch->driver_batch; - const struct brw_sf_prog_data *prog_data = params->sf_prog_data; - - uint32_t offset; - blorp_emit_dynamic(batch, GENX(SF_STATE), sf, 64, &offset) { -#if GFX_VER == 4 - sf.KernelStartPointer = - instruction_state_address(batch, params->sf_prog_kernel); -#else - sf.KernelStartPointer = params->sf_prog_kernel; -#endif - sf.GRFRegisterCount = DIV_ROUND_UP(prog_data->total_grf, 16) - 1; - sf.VertexURBEntryReadLength = prog_data->urb_read_length; - sf.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET; - sf.DispatchGRFStartRegisterForURBData = 3; - - sf.URBEntryAllocationSize = brw->urb.sfsize - 1; - sf.NumberofURBEntries = brw->urb.nr_sf_entries; - -#if GFX_VER == 5 - sf.MaximumNumberofThreads = MIN2(48, brw->urb.nr_sf_entries) - 1; -#else - sf.MaximumNumberofThreads = MIN2(24, brw->urb.nr_sf_entries) - 1; -#endif - - sf.ViewportTransformEnable = false; - - sf.CullMode = CULLMODE_NONE; - } - - return dynamic_state_address(batch, offset); -} - -static struct blorp_address -blorp_emit_wm_state(struct blorp_batch *batch, - const struct blorp_params *params) -{ - const struct brw_wm_prog_data *prog_data = params->wm_prog_data; - - uint32_t offset; - blorp_emit_dynamic(batch, GENX(WM_STATE), wm, 64, &offset) { - if (params->src.enabled) { - /* Iron Lake can't do sampler prefetch */ - wm.SamplerCount = (GFX_VER != 5); - wm.BindingTableEntryCount = 2; - uint32_t sampler = blorp_emit_sampler_state(batch); - wm.SamplerStatePointer = dynamic_state_address(batch, sampler); - } - - if (prog_data) { - wm.DispatchGRFStartRegisterForConstantSetupData0 = - prog_data->base.dispatch_grf_start_reg; - wm.SetupURBEntryReadLength = prog_data->num_varying_inputs * 2; - wm.SetupURBEntryReadOffset = 0; - - wm.DepthCoefficientURBReadOffset = 1; - wm.PixelShaderKillsPixel = prog_data->uses_kill; - wm.ThreadDispatchEnable = true; - wm.EarlyDepthTestEnable = true; - - wm._8PixelDispatchEnable = prog_data->dispatch_8; - wm._16PixelDispatchEnable = prog_data->dispatch_16; - wm._32PixelDispatchEnable = prog_data->dispatch_32; - -#if GFX_VER == 4 - wm.KernelStartPointer0 = - instruction_state_address(batch, params->wm_prog_kernel); - wm.GRFRegisterCount0 = brw_wm_prog_data_reg_blocks(prog_data, wm, 0); -#else - wm.KernelStartPointer0 = params->wm_prog_kernel + - brw_wm_prog_data_prog_offset(prog_data, wm, 0); - wm.KernelStartPointer1 = params->wm_prog_kernel + - brw_wm_prog_data_prog_offset(prog_data, wm, 1); - wm.KernelStartPointer2 = params->wm_prog_kernel + - brw_wm_prog_data_prog_offset(prog_data, wm, 2); - wm.GRFRegisterCount0 = brw_wm_prog_data_reg_blocks(prog_data, wm, 0); - wm.GRFRegisterCount1 = brw_wm_prog_data_reg_blocks(prog_data, wm, 1); - wm.GRFRegisterCount2 = brw_wm_prog_data_reg_blocks(prog_data, wm, 2); -#endif - } - - wm.MaximumNumberofThreads = - batch->blorp->compiler->devinfo->max_wm_threads - 1; - } - - return dynamic_state_address(batch, offset); -} - -static struct blorp_address -blorp_emit_color_calc_state(struct blorp_batch *batch) -{ - uint32_t cc_viewport = blorp_emit_cc_viewport(batch); - - uint32_t offset; - blorp_emit_dynamic(batch, GENX(COLOR_CALC_STATE), cc, 64, &offset) { - cc.CCViewportStatePointer = dynamic_state_address(batch, cc_viewport); - } - - return dynamic_state_address(batch, offset); -} - -static void -blorp_emit_pipeline(struct blorp_batch *batch, - const struct blorp_params *params) -{ - assert(batch->blorp->driver_ctx == batch->driver_batch); - struct brw_context *brw = batch->driver_batch; - - emit_urb_config(batch, params, NULL); - - blorp_emit(batch, GENX(3DSTATE_PIPELINED_POINTERS), pp) { - pp.PointertoVSState = blorp_emit_vs_state(batch); - pp.GSEnable = false; - pp.ClipEnable = false; - pp.PointertoSFState = blorp_emit_sf_state(batch, params); - pp.PointertoWMState = blorp_emit_wm_state(batch, params); - pp.PointertoColorCalcState = blorp_emit_color_calc_state(batch); - } - - brw_upload_urb_fence(brw); - - blorp_emit(batch, GENX(CS_URB_STATE), curb); - blorp_emit(batch, GENX(CONSTANT_BUFFER), curb); -} diff --git a/src/mesa/drivers/dri/i965/gfx6_clip_state.c b/src/mesa/drivers/dri/i965/gfx6_clip_state.c deleted file mode 100644 index 8e3fae7..0000000 --- a/src/mesa/drivers/dri/i965/gfx6_clip_state.c +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright © 2009 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Eric Anholt - * - */ - -#include "brw_context.h" -#include "brw_state.h" -#include "brw_defines.h" -#include "compiler/brw_eu_defines.h" -#include "brw_util.h" -#include "brw_batch.h" -#include "main/fbobject.h" -#include "main/framebuffer.h" - -bool -brw_is_drawing_points(const struct brw_context *brw) -{ - /* Determine if the primitives *reaching the SF* are points */ - /* _NEW_POLYGON */ - if (brw->ctx.Polygon.FrontMode == GL_POINT || - brw->ctx.Polygon.BackMode == GL_POINT) { - return true; - } - - if (brw->gs.base.prog_data) { - /* BRW_NEW_GS_PROG_DATA */ - return brw_gs_prog_data(brw->gs.base.prog_data)->output_topology == - _3DPRIM_POINTLIST; - } else if (brw->tes.base.prog_data) { - /* BRW_NEW_TES_PROG_DATA */ - return brw_tes_prog_data(brw->tes.base.prog_data)->output_topology == - BRW_TESS_OUTPUT_TOPOLOGY_POINT; - } else { - /* BRW_NEW_PRIMITIVE */ - return brw->primitive == _3DPRIM_POINTLIST; - } -} - -bool -brw_is_drawing_lines(const struct brw_context *brw) -{ - /* Determine if the primitives *reaching the SF* are points */ - /* _NEW_POLYGON */ - if (brw->ctx.Polygon.FrontMode == GL_LINE || - brw->ctx.Polygon.BackMode == GL_LINE) { - return true; - } - - if (brw->gs.base.prog_data) { - /* BRW_NEW_GS_PROG_DATA */ - return brw_gs_prog_data(brw->gs.base.prog_data)->output_topology == - _3DPRIM_LINESTRIP; - } else if (brw->tes.base.prog_data) { - /* BRW_NEW_TES_PROG_DATA */ - return brw_tes_prog_data(brw->tes.base.prog_data)->output_topology == - BRW_TESS_OUTPUT_TOPOLOGY_LINE; - } else { - /* BRW_NEW_PRIMITIVE */ - switch (brw->primitive) { - case _3DPRIM_LINELIST: - case _3DPRIM_LINESTRIP: - case _3DPRIM_LINELOOP: - return true; - } - } - return false; -} diff --git a/src/mesa/drivers/dri/i965/gfx6_constant_state.c b/src/mesa/drivers/dri/i965/gfx6_constant_state.c deleted file mode 100644 index 1f0e9fb..0000000 --- a/src/mesa/drivers/dri/i965/gfx6_constant_state.c +++ /dev/null @@ -1,361 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "brw_context.h" -#include "brw_cs.h" -#include "brw_state.h" -#include "brw_defines.h" -#include "brw_program.h" -#include "brw_batch.h" -#include "brw_buffer_objects.h" -#include "program/prog_parameter.h" -#include "main/shaderapi.h" - -static uint32_t -f_as_u32(float f) -{ - union fi fi = { .f = f }; - return fi.ui; -} - -static uint32_t -brw_param_value(struct brw_context *brw, - const struct gl_program *prog, - const struct brw_stage_state *stage_state, - uint32_t param) -{ - struct gl_context *ctx = &brw->ctx; - - switch (BRW_PARAM_DOMAIN(param)) { - case BRW_PARAM_DOMAIN_BUILTIN: - if (param == BRW_PARAM_BUILTIN_ZERO) { - return 0; - } else if (BRW_PARAM_BUILTIN_IS_CLIP_PLANE(param)) { - gl_clip_plane *clip_planes = brw_select_clip_planes(ctx); - unsigned idx = BRW_PARAM_BUILTIN_CLIP_PLANE_IDX(param); - unsigned comp = BRW_PARAM_BUILTIN_CLIP_PLANE_COMP(param); - return ((uint32_t *)clip_planes[idx])[comp]; - } else if (param >= BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X && - param <= BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_W) { - unsigned i = param - BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X; - return f_as_u32(ctx->TessCtrlProgram.patch_default_outer_level[i]); - } else if (param == BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X) { - return f_as_u32(ctx->TessCtrlProgram.patch_default_inner_level[0]); - } else if (param == BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_Y) { - return f_as_u32(ctx->TessCtrlProgram.patch_default_inner_level[1]); - } else if (param >= BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_X && - param <= BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_Z) { - unsigned i = param - BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_X; - return brw->compute.group_size[i]; - } else { - unreachable("Invalid param builtin"); - } - - case BRW_PARAM_DOMAIN_PARAMETER: { - unsigned idx = BRW_PARAM_PARAMETER_IDX(param); - unsigned offset = prog->Parameters->Parameters[idx].ValueOffset; - unsigned comp = BRW_PARAM_PARAMETER_COMP(param); - assert(idx < prog->Parameters->NumParameters); - return prog->Parameters->ParameterValues[offset + comp].u; - } - - case BRW_PARAM_DOMAIN_UNIFORM: { - unsigned idx = BRW_PARAM_UNIFORM_IDX(param); - assert(idx < prog->sh.data->NumUniformDataSlots); - return prog->sh.data->UniformDataSlots[idx].u; - } - - case BRW_PARAM_DOMAIN_IMAGE: { - unsigned idx = BRW_PARAM_IMAGE_IDX(param); - unsigned offset = BRW_PARAM_IMAGE_OFFSET(param); - assert(offset < ARRAY_SIZE(stage_state->image_param)); - return ((uint32_t *)&stage_state->image_param[idx])[offset]; - } - - default: - unreachable("Invalid param domain"); - } -} - - -void -brw_populate_constant_data(struct brw_context *brw, - const struct gl_program *prog, - const struct brw_stage_state *stage_state, - void *void_dst, - const uint32_t *param, - unsigned nr_params) -{ - uint32_t *dst = void_dst; - for (unsigned i = 0; i < nr_params; i++) - dst[i] = brw_param_value(brw, prog, stage_state, param[i]); -} - - -/** - * Creates a streamed BO containing the push constants for the VS or GS on - * gfx6+. - * - * Push constants are constant values (such as GLSL uniforms) that are - * pre-loaded into a shader stage's register space at thread spawn time. - * - * Not all GLSL uniforms will be uploaded as push constants: The hardware has - * a limitation of 32 or 64 EU registers (256 or 512 floats) per stage to be - * uploaded as push constants, while GL 4.4 requires at least 1024 components - * to be usable for the VS. Plus, currently we always use pull constants - * instead of push constants when doing variable-index array access. - * - * See brw_curbe.c for the equivalent gfx4/5 code. - */ -void -gfx6_upload_push_constants(struct brw_context *brw, - const struct gl_program *prog, - const struct brw_stage_prog_data *prog_data, - struct brw_stage_state *stage_state) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - struct gl_context *ctx = &brw->ctx; - - bool active = prog_data && - (stage_state->stage != MESA_SHADER_TESS_CTRL || - brw->programs[MESA_SHADER_TESS_EVAL]); - - if (active) - _mesa_shader_write_subroutine_indices(ctx, stage_state->stage); - - if (!active || prog_data->nr_params == 0) { - stage_state->push_const_size = 0; - } else { - /* Updates the ParamaterValues[i] pointers for all parameters of the - * basic type of PROGRAM_STATE_VAR. - */ - /* XXX: Should this happen somewhere before to get our state flag set? */ - if (prog) - _mesa_load_state_parameters(ctx, prog->Parameters); - - int i; - const int size = prog_data->nr_params * sizeof(gl_constant_value); - gl_constant_value *param; - if (devinfo->verx10 >= 75) { - param = brw_upload_space(&brw->upload, size, 32, - &stage_state->push_const_bo, - &stage_state->push_const_offset); - } else { - param = brw_state_batch(brw, size, 32, - &stage_state->push_const_offset); - } - - STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float)); - - /* _NEW_PROGRAM_CONSTANTS - * - * Also _NEW_TRANSFORM -- we may reference clip planes other than as a - * side effect of dereferencing uniforms, so _NEW_PROGRAM_CONSTANTS - * wouldn't be set for them. - */ - brw_populate_constant_data(brw, prog, stage_state, param, - prog_data->param, - prog_data->nr_params); - - if (0) { - fprintf(stderr, "%s constants:\n", - _mesa_shader_stage_to_string(stage_state->stage)); - for (i = 0; i < prog_data->nr_params; i++) { - if ((i & 7) == 0) - fprintf(stderr, "g%d: ", - prog_data->dispatch_grf_start_reg + i / 8); - fprintf(stderr, "%8f ", param[i].f); - if ((i & 7) == 7) - fprintf(stderr, "\n"); - } - if ((i & 7) != 0) - fprintf(stderr, "\n"); - fprintf(stderr, "\n"); - } - - stage_state->push_const_size = ALIGN(prog_data->nr_params, 8) / 8; - /* We can only push 32 registers of constants at a time. */ - - /* From the SNB PRM (vol2, part 1, section 3.2.1.4: 3DSTATE_CONSTANT_VS: - * - * "The sum of all four read length fields (each incremented to - * represent the actual read length) must be less than or equal to - * 32" - * - * From the IVB PRM (vol2, part 1, section 3.2.1.3: 3DSTATE_CONSTANT_VS: - * - * "The sum of all four read length fields must be less than or - * equal to the size of 64" - * - * The other shader stages all match the VS's limits. - */ - assert(stage_state->push_const_size <= 32); - } - - stage_state->push_constants_dirty = true; -} - - -/** - * Creates a temporary BO containing the pull constant data for the shader - * stage, and the SURFACE_STATE struct that points at it. - * - * Pull constants are GLSL uniforms (and other constant data) beyond what we - * could fit as push constants, or that have variable-index array access - * (which is easiest to support using pull constants, and avoids filling - * register space with mostly-unused data). - * - * Compare this path to brw_curbe.c for gfx4/5 push constants, and - * gfx6_vs_state.c for gfx6+ push constants. - */ -void -brw_upload_pull_constants(struct brw_context *brw, - GLbitfield64 brw_new_constbuf, - const struct gl_program *prog, - struct brw_stage_state *stage_state, - const struct brw_stage_prog_data *prog_data) -{ - unsigned i; - uint32_t surf_index = prog_data->binding_table.pull_constants_start; - - if (!prog_data->nr_pull_params) { - if (stage_state->surf_offset[surf_index]) { - stage_state->surf_offset[surf_index] = 0; - brw->ctx.NewDriverState |= brw_new_constbuf; - } - return; - } - - /* Updates the ParamaterValues[i] pointers for all parameters of the - * basic type of PROGRAM_STATE_VAR. - */ - _mesa_load_state_parameters(&brw->ctx, prog->Parameters); - - /* BRW_NEW_*_PROG_DATA | _NEW_PROGRAM_CONSTANTS */ - uint32_t size = prog_data->nr_pull_params * 4; - struct brw_bo *const_bo = NULL; - uint32_t const_offset; - gl_constant_value *constants = brw_upload_space(&brw->upload, size, 64, - &const_bo, &const_offset); - - STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float)); - - brw_populate_constant_data(brw, prog, stage_state, constants, - prog_data->pull_param, - prog_data->nr_pull_params); - - if (0) { - for (i = 0; i < ALIGN(prog_data->nr_pull_params, 4) / 4; i++) { - const gl_constant_value *row = &constants[i * 4]; - fprintf(stderr, "const surface %3d: %4.3f %4.3f %4.3f %4.3f\n", - i, row[0].f, row[1].f, row[2].f, row[3].f); - } - } - - brw_emit_buffer_surface_state(brw, &stage_state->surf_offset[surf_index], - const_bo, const_offset, - ISL_FORMAT_R32G32B32A32_FLOAT, - size, 1, 0); - - brw_bo_unreference(const_bo); - - brw->ctx.NewDriverState |= brw_new_constbuf; -} - -/** - * Creates a region containing the push constants for the CS on gfx7+. - * - * Push constants are constant values (such as GLSL uniforms) that are - * pre-loaded into a shader stage's register space at thread spawn time. - * - * For other stages, see brw_curbe.c:brw_upload_constant_buffer for the - * equivalent gfx4/5 code and gfx6_vs_state.c:gfx6_upload_push_constants for - * gfx6+. - */ -void -brw_upload_cs_push_constants(struct brw_context *brw, - const struct gl_program *prog, - const struct brw_cs_prog_data *cs_prog_data, - struct brw_stage_state *stage_state) -{ - struct gl_context *ctx = &brw->ctx; - const struct brw_stage_prog_data *prog_data = - (struct brw_stage_prog_data*) cs_prog_data; - - /* Updates the ParamaterValues[i] pointers for all parameters of the - * basic type of PROGRAM_STATE_VAR. - */ - /* XXX: Should this happen somewhere before to get our state flag set? */ - _mesa_load_state_parameters(ctx, prog->Parameters); - - const struct brw_cs_dispatch_info dispatch = - brw_cs_get_dispatch_info(&brw->screen->devinfo, cs_prog_data, - brw->compute.group_size); - const unsigned push_const_size = - brw_cs_push_const_total_size(cs_prog_data, dispatch.threads); - - if (push_const_size == 0) { - stage_state->push_const_size = 0; - return; - } - - - uint32_t *param = - brw_state_batch(brw, ALIGN(push_const_size, 64), - 64, &stage_state->push_const_offset); - assert(param); - - STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float)); - - if (cs_prog_data->push.cross_thread.size > 0) { - uint32_t *param_copy = param; - for (unsigned i = 0; - i < cs_prog_data->push.cross_thread.dwords; - i++) { - assert(prog_data->param[i] != BRW_PARAM_BUILTIN_SUBGROUP_ID); - param_copy[i] = brw_param_value(brw, prog, stage_state, - prog_data->param[i]); - } - } - - if (cs_prog_data->push.per_thread.size > 0) { - for (unsigned t = 0; t < dispatch.threads; t++) { - unsigned dst = - 8 * (cs_prog_data->push.per_thread.regs * t + - cs_prog_data->push.cross_thread.regs); - unsigned src = cs_prog_data->push.cross_thread.dwords; - for ( ; src < prog_data->nr_params; src++, dst++) { - if (prog_data->param[src] == BRW_PARAM_BUILTIN_SUBGROUP_ID) { - param[dst] = t; - } else { - param[dst] = brw_param_value(brw, prog, stage_state, - prog_data->param[src]); - } - } - } - } - - stage_state->push_const_size = - cs_prog_data->push.cross_thread.regs + - cs_prog_data->push.per_thread.regs; -} diff --git a/src/mesa/drivers/dri/i965/gfx6_multisample_state.c b/src/mesa/drivers/dri/i965/gfx6_multisample_state.c deleted file mode 100644 index b5f2c3b..0000000 --- a/src/mesa/drivers/dri/i965/gfx6_multisample_state.c +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "brw_batch.h" - -#include "brw_context.h" -#include "brw_defines.h" -#include "brw_multisample_state.h" -#include "main/framebuffer.h" - -void -gfx6_get_sample_position(struct gl_context *ctx, - struct gl_framebuffer *fb, - GLuint index, GLfloat *result) -{ - uint8_t bits; - - switch (_mesa_geometric_samples(fb)) { - case 1: - result[0] = result[1] = 0.5f; - return; - case 2: - bits = brw_multisample_positions_1x_2x >> (8 * index); - break; - case 4: - bits = brw_multisample_positions_4x >> (8 * index); - break; - case 8: - bits = brw_multisample_positions_8x[index >> 2] >> (8 * (index & 3)); - break; - case 16: - bits = brw_multisample_positions_16x[index >> 2] >> (8 * (index & 3)); - break; - default: - unreachable("Not implemented"); - } - - /* Convert from U0.4 back to a floating point coordinate. */ - result[0] = ((bits >> 4) & 0xf) / 16.0f; - result[1] = (bits & 0xf) / 16.0f; -} diff --git a/src/mesa/drivers/dri/i965/gfx6_queryobj.c b/src/mesa/drivers/dri/i965/gfx6_queryobj.c deleted file mode 100644 index 85db77f..0000000 --- a/src/mesa/drivers/dri/i965/gfx6_queryobj.c +++ /dev/null @@ -1,560 +0,0 @@ -/* - * Copyright © 2008 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Eric Anholt - * Kenneth Graunke - */ - -/** @file gfx6_queryobj.c - * - * Support for query objects (GL_ARB_occlusion_query, GL_ARB_timer_query, - * GL_EXT_transform_feedback, and friends) on platforms that support - * hardware contexts (Gfx6+). - */ -#include "brw_context.h" -#include "brw_defines.h" -#include "brw_state.h" -#include "perf/intel_perf_regs.h" -#include "brw_batch.h" -#include "brw_buffer_objects.h" - -static inline void -set_query_availability(struct brw_context *brw, struct brw_query_object *query, - bool available) -{ - /* For platforms that support ARB_query_buffer_object, we write the - * query availability for "pipelined" queries. - * - * Most counter snapshots are written by the command streamer, by - * doing a CS stall and then MI_STORE_REGISTER_MEM. For these - * counters, the CS stall guarantees that the results will be - * available when subsequent CS commands run. So we don't need to - * do any additional tracking. - * - * Other counters (occlusion queries and timestamp) are written by - * PIPE_CONTROL, without a CS stall. This means that we can't be - * sure whether the writes have landed yet or not. Performing a - * PIPE_CONTROL with an immediate write will synchronize with - * those earlier writes, so we write 1 when the value has landed. - */ - if (brw->ctx.Extensions.ARB_query_buffer_object && - brw_is_query_pipelined(query)) { - unsigned flags = PIPE_CONTROL_WRITE_IMMEDIATE; - - if (available) { - /* Order available *after* the query results. */ - flags |= PIPE_CONTROL_FLUSH_ENABLE; - } else { - /* Make it unavailable *before* any pipelined reads. */ - flags |= PIPE_CONTROL_CS_STALL; - } - - brw_emit_pipe_control_write(brw, flags, - query->bo, 2 * sizeof(uint64_t), - available); - } -} - -static void -write_primitives_generated(struct brw_context *brw, - struct brw_bo *query_bo, int stream, int idx) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - brw_emit_mi_flush(brw); - - if (devinfo->ver >= 7 && stream > 0) { - brw_store_register_mem64(brw, query_bo, - GFX7_SO_PRIM_STORAGE_NEEDED(stream), - idx * sizeof(uint64_t)); - } else { - brw_store_register_mem64(brw, query_bo, CL_INVOCATION_COUNT, - idx * sizeof(uint64_t)); - } -} - -static void -write_xfb_primitives_written(struct brw_context *brw, - struct brw_bo *bo, int stream, int idx) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - brw_emit_mi_flush(brw); - - if (devinfo->ver >= 7) { - brw_store_register_mem64(brw, bo, GFX7_SO_NUM_PRIMS_WRITTEN(stream), - idx * sizeof(uint64_t)); - } else { - brw_store_register_mem64(brw, bo, GFX6_SO_NUM_PRIMS_WRITTEN, - idx * sizeof(uint64_t)); - } -} - -static void -write_xfb_overflow_streams(struct gl_context *ctx, - struct brw_bo *bo, int stream, int count, - int idx) -{ - struct brw_context *brw = brw_context(ctx); - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - brw_emit_mi_flush(brw); - - for (int i = 0; i < count; i++) { - int w_idx = 4 * i + idx; - int g_idx = 4 * i + idx + 2; - - if (devinfo->ver >= 7) { - brw_store_register_mem64(brw, bo, - GFX7_SO_NUM_PRIMS_WRITTEN(stream + i), - g_idx * sizeof(uint64_t)); - brw_store_register_mem64(brw, bo, - GFX7_SO_PRIM_STORAGE_NEEDED(stream + i), - w_idx * sizeof(uint64_t)); - } else { - brw_store_register_mem64(brw, bo, - GFX6_SO_NUM_PRIMS_WRITTEN, - g_idx * sizeof(uint64_t)); - brw_store_register_mem64(brw, bo, - GFX6_SO_PRIM_STORAGE_NEEDED, - w_idx * sizeof(uint64_t)); - } - } -} - -static bool -check_xfb_overflow_streams(uint64_t *results, int count) -{ - bool overflow = false; - - for (int i = 0; i < count; i++) { - uint64_t *result_i = &results[4 * i]; - - if ((result_i[3] - result_i[2]) != (result_i[1] - result_i[0])) { - overflow = true; - break; - } - } - - return overflow; -} - -static inline int -pipeline_target_to_index(int target) -{ - if (target == GL_GEOMETRY_SHADER_INVOCATIONS) - return MAX_PIPELINE_STATISTICS - 1; - else - return target - GL_VERTICES_SUBMITTED_ARB; -} - -static void -emit_pipeline_stat(struct brw_context *brw, struct brw_bo *bo, - int stream, int target, int idx) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - /* One source of confusion is the tessellation shader statistics. The - * hardware has no statistics specific to the TE unit. Ideally we could have - * the HS primitives for TESS_CONTROL_SHADER_PATCHES_ARB, and the DS - * invocations as the register for TESS_CONTROL_SHADER_PATCHES_ARB. - * Unfortunately we don't have HS primitives, we only have HS invocations. - */ - - /* Everything except GEOMETRY_SHADER_INVOCATIONS can be kept in a simple - * lookup table - */ - static const uint32_t target_to_register[] = { - IA_VERTICES_COUNT, /* VERTICES_SUBMITTED */ - IA_PRIMITIVES_COUNT, /* PRIMITIVES_SUBMITTED */ - VS_INVOCATION_COUNT, /* VERTEX_SHADER_INVOCATIONS */ - HS_INVOCATION_COUNT, /* TESS_CONTROL_SHADER_PATCHES */ - DS_INVOCATION_COUNT, /* TESS_EVALUATION_SHADER_INVOCATIONS */ - GS_PRIMITIVES_COUNT, /* GEOMETRY_SHADER_PRIMITIVES_EMITTED */ - PS_INVOCATION_COUNT, /* FRAGMENT_SHADER_INVOCATIONS */ - CS_INVOCATION_COUNT, /* COMPUTE_SHADER_INVOCATIONS */ - CL_INVOCATION_COUNT, /* CLIPPING_INPUT_PRIMITIVES */ - CL_PRIMITIVES_COUNT, /* CLIPPING_OUTPUT_PRIMITIVES */ - GS_INVOCATION_COUNT /* This one is special... */ - }; - STATIC_ASSERT(ARRAY_SIZE(target_to_register) == MAX_PIPELINE_STATISTICS); - uint32_t reg = target_to_register[pipeline_target_to_index(target)]; - /* Gfx6 GS code counts full primitives, that is, it won't count individual - * triangles in a triangle strip. Use CL_INVOCATION_COUNT for that. - */ - if (devinfo->ver == 6 && target == GL_GEOMETRY_SHADER_PRIMITIVES_EMITTED_ARB) - reg = CL_INVOCATION_COUNT; - assert(reg != 0); - - /* Emit a flush to make sure various parts of the pipeline are complete and - * we get an accurate value - */ - brw_emit_mi_flush(brw); - - brw_store_register_mem64(brw, bo, reg, idx * sizeof(uint64_t)); -} - - -/** - * Wait on the query object's BO and calculate the final result. - */ -static void -gfx6_queryobj_get_results(struct gl_context *ctx, - struct brw_query_object *query) -{ - struct brw_context *brw = brw_context(ctx); - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - if (query->bo == NULL) - return; - - uint64_t *results = brw_bo_map(brw, query->bo, MAP_READ); - switch (query->Base.Target) { - case GL_TIME_ELAPSED: - /* The query BO contains the starting and ending timestamps. - * Subtract the two and convert to nanoseconds. - */ - query->Base.Result = brw_raw_timestamp_delta(brw, results[0], results[1]); - query->Base.Result = intel_device_info_timebase_scale(devinfo, query->Base.Result); - break; - - case GL_TIMESTAMP: - /* The query BO contains a single timestamp value in results[0]. */ - query->Base.Result = intel_device_info_timebase_scale(devinfo, results[0]); - - /* Ensure the scaled timestamp overflows according to - * GL_QUERY_COUNTER_BITS - */ - query->Base.Result &= (1ull << ctx->Const.QueryCounterBits.Timestamp) - 1; - break; - - case GL_SAMPLES_PASSED_ARB: - /* We need to use += rather than = here since some BLT-based operations - * may have added additional samples to our occlusion query value. - */ - query->Base.Result += results[1] - results[0]; - break; - - case GL_ANY_SAMPLES_PASSED: - case GL_ANY_SAMPLES_PASSED_CONSERVATIVE: - if (results[0] != results[1]) - query->Base.Result = true; - break; - - case GL_PRIMITIVES_GENERATED: - case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN: - case GL_VERTICES_SUBMITTED_ARB: - case GL_PRIMITIVES_SUBMITTED_ARB: - case GL_VERTEX_SHADER_INVOCATIONS_ARB: - case GL_GEOMETRY_SHADER_INVOCATIONS: - case GL_GEOMETRY_SHADER_PRIMITIVES_EMITTED_ARB: - case GL_CLIPPING_INPUT_PRIMITIVES_ARB: - case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB: - case GL_COMPUTE_SHADER_INVOCATIONS_ARB: - case GL_TESS_CONTROL_SHADER_PATCHES_ARB: - case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB: - query->Base.Result = results[1] - results[0]; - break; - - case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB: - query->Base.Result = check_xfb_overflow_streams(results, 1); - break; - - case GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB: - query->Base.Result = check_xfb_overflow_streams(results, MAX_VERTEX_STREAMS); - break; - - case GL_FRAGMENT_SHADER_INVOCATIONS_ARB: - query->Base.Result = (results[1] - results[0]); - /* Implement the "WaDividePSInvocationCountBy4:HSW,BDW" workaround: - * "Invocation counter is 4 times actual. WA: SW to divide HW reported - * PS Invocations value by 4." - * - * Prior to Haswell, invocation count was counted by the WM, and it - * buggily counted invocations in units of subspans (2x2 unit). To get the - * correct value, the CS multiplied this by 4. With HSW the logic moved, - * and correctly emitted the number of pixel shader invocations, but, - * whomever forgot to undo the multiply by 4. - */ - if (devinfo->ver == 8 || devinfo->verx10 == 75) - query->Base.Result /= 4; - break; - - default: - unreachable("Unrecognized query target in brw_queryobj_get_results()"); - } - brw_bo_unmap(query->bo); - - /* Now that we've processed the data stored in the query's buffer object, - * we can release it. - */ - brw_bo_unreference(query->bo); - query->bo = NULL; - - query->Base.Ready = true; -} - -/** - * Driver hook for glBeginQuery(). - * - * Initializes driver structures and emits any GPU commands required to begin - * recording data for the query. - */ -static void -gfx6_begin_query(struct gl_context *ctx, struct gl_query_object *q) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_query_object *query = (struct brw_query_object *)q; - - /* Since we're starting a new query, we need to throw away old results. */ - brw_bo_unreference(query->bo); - query->bo = - brw_bo_alloc(brw->bufmgr, "query results", 4096, BRW_MEMZONE_OTHER); - - /* For ARB_query_buffer_object: The result is not available */ - set_query_availability(brw, query, false); - - switch (query->Base.Target) { - case GL_TIME_ELAPSED: - /* For timestamp queries, we record the starting time right away so that - * we measure the full time between BeginQuery and EndQuery. There's - * some debate about whether this is the right thing to do. Our decision - * is based on the following text from the ARB_timer_query extension: - * - * "(5) Should the extension measure total time elapsed between the full - * completion of the BeginQuery and EndQuery commands, or just time - * spent in the graphics library? - * - * RESOLVED: This extension will measure the total time elapsed - * between the full completion of these commands. Future extensions - * may implement a query to determine time elapsed at different stages - * of the graphics pipeline." - * - * We write a starting timestamp now (at index 0). At EndQuery() time, - * we'll write a second timestamp (at index 1), and subtract the two to - * obtain the time elapsed. Notably, this includes time elapsed while - * the system was doing other work, such as running other applications. - */ - brw_write_timestamp(brw, query->bo, 0); - break; - - case GL_ANY_SAMPLES_PASSED: - case GL_ANY_SAMPLES_PASSED_CONSERVATIVE: - case GL_SAMPLES_PASSED_ARB: - brw_write_depth_count(brw, query->bo, 0); - break; - - case GL_PRIMITIVES_GENERATED: - write_primitives_generated(brw, query->bo, query->Base.Stream, 0); - if (query->Base.Stream == 0) - ctx->NewDriverState |= BRW_NEW_RASTERIZER_DISCARD; - break; - - case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN: - write_xfb_primitives_written(brw, query->bo, query->Base.Stream, 0); - break; - - case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB: - write_xfb_overflow_streams(ctx, query->bo, query->Base.Stream, 1, 0); - break; - - case GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB: - write_xfb_overflow_streams(ctx, query->bo, 0, MAX_VERTEX_STREAMS, 0); - break; - - case GL_VERTICES_SUBMITTED_ARB: - case GL_PRIMITIVES_SUBMITTED_ARB: - case GL_VERTEX_SHADER_INVOCATIONS_ARB: - case GL_GEOMETRY_SHADER_INVOCATIONS: - case GL_GEOMETRY_SHADER_PRIMITIVES_EMITTED_ARB: - case GL_FRAGMENT_SHADER_INVOCATIONS_ARB: - case GL_CLIPPING_INPUT_PRIMITIVES_ARB: - case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB: - case GL_COMPUTE_SHADER_INVOCATIONS_ARB: - case GL_TESS_CONTROL_SHADER_PATCHES_ARB: - case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB: - emit_pipeline_stat(brw, query->bo, query->Base.Stream, query->Base.Target, 0); - break; - - default: - unreachable("Unrecognized query target in brw_begin_query()"); - } -} - -/** - * Driver hook for glEndQuery(). - * - * Emits GPU commands to record a final query value, ending any data capturing. - * However, the final result isn't necessarily available until the GPU processes - * those commands. brw_queryobj_get_results() processes the captured data to - * produce the final result. - */ -static void -gfx6_end_query(struct gl_context *ctx, struct gl_query_object *q) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_query_object *query = (struct brw_query_object *)q; - - switch (query->Base.Target) { - case GL_TIME_ELAPSED: - brw_write_timestamp(brw, query->bo, 1); - break; - - case GL_ANY_SAMPLES_PASSED: - case GL_ANY_SAMPLES_PASSED_CONSERVATIVE: - case GL_SAMPLES_PASSED_ARB: - brw_write_depth_count(brw, query->bo, 1); - break; - - case GL_PRIMITIVES_GENERATED: - write_primitives_generated(brw, query->bo, query->Base.Stream, 1); - if (query->Base.Stream == 0) - ctx->NewDriverState |= BRW_NEW_RASTERIZER_DISCARD; - break; - - case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN: - write_xfb_primitives_written(brw, query->bo, query->Base.Stream, 1); - break; - - case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB: - write_xfb_overflow_streams(ctx, query->bo, query->Base.Stream, 1, 1); - break; - - case GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB: - write_xfb_overflow_streams(ctx, query->bo, 0, MAX_VERTEX_STREAMS, 1); - break; - - /* calculate overflow here */ - case GL_VERTICES_SUBMITTED_ARB: - case GL_PRIMITIVES_SUBMITTED_ARB: - case GL_VERTEX_SHADER_INVOCATIONS_ARB: - case GL_GEOMETRY_SHADER_PRIMITIVES_EMITTED_ARB: - case GL_FRAGMENT_SHADER_INVOCATIONS_ARB: - case GL_COMPUTE_SHADER_INVOCATIONS_ARB: - case GL_CLIPPING_INPUT_PRIMITIVES_ARB: - case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB: - case GL_GEOMETRY_SHADER_INVOCATIONS: - case GL_TESS_CONTROL_SHADER_PATCHES_ARB: - case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB: - emit_pipeline_stat(brw, query->bo, - query->Base.Stream, query->Base.Target, 1); - break; - - default: - unreachable("Unrecognized query target in brw_end_query()"); - } - - /* The current batch contains the commands to handle EndQuery(), - * but they won't actually execute until it is flushed. - */ - query->flushed = false; - - /* For ARB_query_buffer_object: The result is now available */ - set_query_availability(brw, query, true); -} - -/** - * Flush the batch if it still references the query object BO. - */ -static void -flush_batch_if_needed(struct brw_context *brw, struct brw_query_object *query) -{ - /* If the batch doesn't reference the BO, it must have been flushed - * (for example, due to being full). Record that it's been flushed. - */ - query->flushed = query->flushed || - !brw_batch_references(&brw->batch, query->bo); - - if (!query->flushed) - brw_batch_flush(brw); -} - -/** - * The WaitQuery() driver hook. - * - * Wait for a query result to become available and return it. This is the - * backing for glGetQueryObjectiv() with the GL_QUERY_RESULT pname. - */ -static void gfx6_wait_query(struct gl_context *ctx, struct gl_query_object *q) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_query_object *query = (struct brw_query_object *)q; - - /* If the application has requested the query result, but this batch is - * still contributing to it, flush it now to finish that work so the - * result will become available (eventually). - */ - flush_batch_if_needed(brw, query); - - gfx6_queryobj_get_results(ctx, query); -} - -/** - * The CheckQuery() driver hook. - * - * Checks whether a query result is ready yet. If not, flushes. - * This is the backing for glGetQueryObjectiv()'s QUERY_RESULT_AVAILABLE pname. - */ -static void gfx6_check_query(struct gl_context *ctx, struct gl_query_object *q) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_query_object *query = (struct brw_query_object *)q; - - /* If query->bo is NULL, we've already gathered the results - this is a - * redundant CheckQuery call. Ignore it. - */ - if (query->bo == NULL) - return; - - /* From the GL_ARB_occlusion_query spec: - * - * "Instead of allowing for an infinite loop, performing a - * QUERY_RESULT_AVAILABLE_ARB will perform a flush if the result is - * not ready yet on the first time it is queried. This ensures that - * the async query will return true in finite time. - */ - flush_batch_if_needed(brw, query); - - if (!brw_bo_busy(query->bo)) { - gfx6_queryobj_get_results(ctx, query); - } -} - -static void -gfx6_query_counter(struct gl_context *ctx, struct gl_query_object *q) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_query_object *query = (struct brw_query_object *)q; - brw_query_counter(ctx, q); - set_query_availability(brw, query, true); -} - -/* Initialize Gfx6+-specific query object functions. */ -void gfx6_init_queryobj_functions(struct dd_function_table *functions) -{ - functions->BeginQuery = gfx6_begin_query; - functions->EndQuery = gfx6_end_query; - functions->CheckQuery = gfx6_check_query; - functions->WaitQuery = gfx6_wait_query; - functions->QueryCounter = gfx6_query_counter; -} diff --git a/src/mesa/drivers/dri/i965/gfx6_sampler_state.c b/src/mesa/drivers/dri/i965/gfx6_sampler_state.c deleted file mode 100644 index 4a5481f..0000000 --- a/src/mesa/drivers/dri/i965/gfx6_sampler_state.c +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Eric Anholt - * - */ - -#include "brw_context.h" -#include "brw_state.h" -#include "brw_defines.h" -#include "brw_batch.h" - -static void -upload_sampler_state_pointers(struct brw_context *brw) -{ - BEGIN_BATCH(4); - OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS << 16 | - VS_SAMPLER_STATE_CHANGE | - GS_SAMPLER_STATE_CHANGE | - PS_SAMPLER_STATE_CHANGE | - (4 - 2)); - OUT_BATCH(brw->vs.base.sampler_offset); /* VS */ - OUT_BATCH(brw->gs.base.sampler_offset); /* GS */ - OUT_BATCH(brw->wm.base.sampler_offset); - ADVANCE_BATCH(); -} - -const struct brw_tracked_state gfx6_sampler_state = { - .dirty = { - .mesa = 0, - .brw = BRW_NEW_BATCH | - BRW_NEW_BLORP | - BRW_NEW_SAMPLER_STATE_TABLE | - BRW_NEW_STATE_BASE_ADDRESS, - }, - .emit = upload_sampler_state_pointers, -}; diff --git a/src/mesa/drivers/dri/i965/gfx6_sol.c b/src/mesa/drivers/dri/i965/gfx6_sol.c deleted file mode 100644 index 56470da..0000000 --- a/src/mesa/drivers/dri/i965/gfx6_sol.c +++ /dev/null @@ -1,522 +0,0 @@ -/* - * Copyright © 2011 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -/** \file gfx6_sol.c - * - * Code to initialize the binding table entries used by transform feedback. - */ - -#include "main/bufferobj.h" -#include "main/macros.h" -#include "brw_context.h" -#include "brw_batch.h" -#include "brw_defines.h" -#include "brw_state.h" -#include "main/transformfeedback.h" -#include "util/u_memory.h" - -static void -gfx6_update_sol_surfaces(struct brw_context *brw) -{ - struct gl_context *ctx = &brw->ctx; - bool xfb_active = _mesa_is_xfb_active_and_unpaused(ctx); - struct gl_transform_feedback_object *xfb_obj; - const struct gl_transform_feedback_info *linked_xfb_info = NULL; - - if (xfb_active) { - /* BRW_NEW_TRANSFORM_FEEDBACK */ - xfb_obj = ctx->TransformFeedback.CurrentObject; - linked_xfb_info = xfb_obj->program->sh.LinkedTransformFeedback; - } - - for (int i = 0; i < BRW_MAX_SOL_BINDINGS; ++i) { - const int surf_index = BRW_GFX6_SOL_BINDING_START + i; - if (xfb_active && i < linked_xfb_info->NumOutputs) { - unsigned buffer = linked_xfb_info->Outputs[i].OutputBuffer; - unsigned buffer_offset = - xfb_obj->Offset[buffer] / 4 + - linked_xfb_info->Outputs[i].DstOffset; - if (brw->programs[MESA_SHADER_GEOMETRY]) { - brw_update_sol_surface( - brw, xfb_obj->Buffers[buffer], - &brw->gs.base.surf_offset[surf_index], - linked_xfb_info->Outputs[i].NumComponents, - linked_xfb_info->Buffers[buffer].Stride, buffer_offset); - } else { - brw_update_sol_surface( - brw, xfb_obj->Buffers[buffer], - &brw->ff_gs.surf_offset[surf_index], - linked_xfb_info->Outputs[i].NumComponents, - linked_xfb_info->Buffers[buffer].Stride, buffer_offset); - } - } else { - if (!brw->programs[MESA_SHADER_GEOMETRY]) - brw->ff_gs.surf_offset[surf_index] = 0; - else - brw->gs.base.surf_offset[surf_index] = 0; - } - } - - brw->ctx.NewDriverState |= BRW_NEW_SURFACES; -} - -const struct brw_tracked_state gfx6_sol_surface = { - .dirty = { - .mesa = 0, - .brw = BRW_NEW_BATCH | - BRW_NEW_BLORP | - BRW_NEW_TRANSFORM_FEEDBACK, - }, - .emit = gfx6_update_sol_surfaces, -}; - -/** - * Constructs the binding table for the WM surface state, which maps unit - * numbers to surface state objects. - */ -static void -brw_gs_upload_binding_table(struct brw_context *brw) -{ - uint32_t *bind; - struct gl_context *ctx = &brw->ctx; - const struct gl_program *prog; - bool need_binding_table = false; - - /* We have two scenarios here: - * 1) We are using a geometry shader only to implement transform feedback - * for a vertex shader (brw->programs[MESA_SHADER_GEOMETRY] == NULL). - * In this case, we only need surfaces for transform feedback in the - * GS stage. - * 2) We have a user-provided geometry shader. In this case we may need - * surfaces for transform feedback and/or other stuff, like textures, - * in the GS stage. - */ - - if (!brw->programs[MESA_SHADER_GEOMETRY]) { - /* BRW_NEW_VERTEX_PROGRAM */ - prog = ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX]; - if (prog) { - /* Skip making a binding table if we don't have anything to put in it */ - const struct gl_transform_feedback_info *linked_xfb_info = - prog->sh.LinkedTransformFeedback; - need_binding_table = linked_xfb_info->NumOutputs > 0; - } - if (!need_binding_table) { - if (brw->ff_gs.bind_bo_offset != 0) { - brw->ctx.NewDriverState |= BRW_NEW_BINDING_TABLE_POINTERS; - brw->ff_gs.bind_bo_offset = 0; - } - return; - } - - /* Might want to calculate nr_surfaces first, to avoid taking up so much - * space for the binding table. Anyway, in this case we know that we only - * use BRW_MAX_SOL_BINDINGS surfaces at most. - */ - bind = brw_state_batch(brw, sizeof(uint32_t) * BRW_MAX_SOL_BINDINGS, - 32, &brw->ff_gs.bind_bo_offset); - - /* BRW_NEW_SURFACES */ - memcpy(bind, brw->ff_gs.surf_offset, - BRW_MAX_SOL_BINDINGS * sizeof(uint32_t)); - } else { - /* BRW_NEW_GEOMETRY_PROGRAM */ - prog = ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY]; - if (prog) { - /* Skip making a binding table if we don't have anything to put in it */ - struct brw_stage_prog_data *prog_data = brw->gs.base.prog_data; - const struct gl_transform_feedback_info *linked_xfb_info = - prog->sh.LinkedTransformFeedback; - need_binding_table = linked_xfb_info->NumOutputs > 0 || - prog_data->binding_table.size_bytes > 0; - } - if (!need_binding_table) { - if (brw->gs.base.bind_bo_offset != 0) { - brw->gs.base.bind_bo_offset = 0; - brw->ctx.NewDriverState |= BRW_NEW_BINDING_TABLE_POINTERS; - } - return; - } - - /* Might want to calculate nr_surfaces first, to avoid taking up so much - * space for the binding table. - */ - bind = brw_state_batch(brw, sizeof(uint32_t) * BRW_MAX_SURFACES, - 32, &brw->gs.base.bind_bo_offset); - - /* BRW_NEW_SURFACES */ - memcpy(bind, brw->gs.base.surf_offset, - BRW_MAX_SURFACES * sizeof(uint32_t)); - } - - brw->ctx.NewDriverState |= BRW_NEW_BINDING_TABLE_POINTERS; -} - -const struct brw_tracked_state gfx6_gs_binding_table = { - .dirty = { - .mesa = 0, - .brw = BRW_NEW_BATCH | - BRW_NEW_BLORP | - BRW_NEW_GEOMETRY_PROGRAM | - BRW_NEW_VERTEX_PROGRAM | - BRW_NEW_SURFACES, - }, - .emit = brw_gs_upload_binding_table, -}; - -struct gl_transform_feedback_object * -brw_new_transform_feedback(struct gl_context *ctx, GLuint name) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_transform_feedback_object *brw_obj = - CALLOC_STRUCT(brw_transform_feedback_object); - if (!brw_obj) - return NULL; - - _mesa_init_transform_feedback_object(&brw_obj->base, name); - - brw_obj->offset_bo = - brw_bo_alloc(brw->bufmgr, "transform feedback offsets", 16, - BRW_MEMZONE_OTHER); - brw_obj->prim_count_bo = - brw_bo_alloc(brw->bufmgr, "xfb primitive counts", 16384, - BRW_MEMZONE_OTHER); - - return &brw_obj->base; -} - -void -brw_delete_transform_feedback(struct gl_context *ctx, - struct gl_transform_feedback_object *obj) -{ - struct brw_transform_feedback_object *brw_obj = - (struct brw_transform_feedback_object *) obj; - - brw_bo_unreference(brw_obj->offset_bo); - brw_bo_unreference(brw_obj->prim_count_bo); - - _mesa_delete_transform_feedback_object(ctx, obj); -} - -/** - * Tally the number of primitives generated so far. - * - * The buffer contains a series of pairs: - * (, ) ; - * (, ) ; - * - * For each stream, we subtract the pair of values (end - start) to get the - * number of primitives generated during one section. We accumulate these - * values, adding them up to get the total number of primitives generated. - * - * Note that we expose one stream pre-Gfx7, so the above is just (start, end). - */ -static void -aggregate_transform_feedback_counter( - struct brw_context *brw, - struct brw_bo *bo, - struct brw_transform_feedback_counter *counter) -{ - const unsigned streams = brw->ctx.Const.MaxVertexStreams; - - /* If the current batch is still contributing to the number of primitives - * generated, flush it now so the results will be present when mapped. - */ - if (brw_batch_references(&brw->batch, bo)) - brw_batch_flush(brw); - - if (unlikely(brw->perf_debug && brw_bo_busy(bo))) - perf_debug("Stalling for # of transform feedback primitives written.\n"); - - uint64_t *prim_counts = brw_bo_map(brw, bo, MAP_READ); - prim_counts += counter->bo_start * streams; - - for (unsigned i = counter->bo_start; i + 1 < counter->bo_end; i += 2) { - for (unsigned s = 0; s < streams; s++) - counter->accum[s] += prim_counts[streams + s] - prim_counts[s]; - - prim_counts += 2 * streams; - } - - brw_bo_unmap(bo); - - /* We've already gathered up the old data; we can safely overwrite it now. */ - counter->bo_start = counter->bo_end = 0; -} - -/** - * Store the SO_NUM_PRIMS_WRITTEN counters for each stream (4 uint64_t values) - * to prim_count_bo. - * - * If prim_count_bo is out of space, gather up the results so far into - * prims_generated[] and allocate a new buffer with enough space. - * - * The number of primitives written is used to compute the number of vertices - * written to a transform feedback stream, which is required to implement - * DrawTransformFeedback(). - */ -void -brw_save_primitives_written_counters(struct brw_context *brw, - struct brw_transform_feedback_object *obj) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - const struct gl_context *ctx = &brw->ctx; - const int streams = ctx->Const.MaxVertexStreams; - - assert(obj->prim_count_bo != NULL); - - /* Check if there's enough space for a new pair of four values. */ - if ((obj->counter.bo_end + 2) * streams * sizeof(uint64_t) >= - obj->prim_count_bo->size) { - aggregate_transform_feedback_counter(brw, obj->prim_count_bo, - &obj->previous_counter); - aggregate_transform_feedback_counter(brw, obj->prim_count_bo, - &obj->counter); - } - - /* Flush any drawing so that the counters have the right values. */ - brw_emit_mi_flush(brw); - - /* Emit MI_STORE_REGISTER_MEM commands to write the values. */ - if (devinfo->ver >= 7) { - for (int i = 0; i < streams; i++) { - int offset = (streams * obj->counter.bo_end + i) * sizeof(uint64_t); - brw_store_register_mem64(brw, obj->prim_count_bo, - GFX7_SO_NUM_PRIMS_WRITTEN(i), - offset); - } - } else { - brw_store_register_mem64(brw, obj->prim_count_bo, - GFX6_SO_NUM_PRIMS_WRITTEN, - obj->counter.bo_end * sizeof(uint64_t)); - } - - /* Update where to write data to. */ - obj->counter.bo_end++; -} - -static void -compute_vertices_written_so_far(struct brw_context *brw, - struct brw_transform_feedback_object *obj, - struct brw_transform_feedback_counter *counter, - uint64_t *vertices_written) -{ - const struct gl_context *ctx = &brw->ctx; - unsigned vertices_per_prim = 0; - - switch (obj->primitive_mode) { - case GL_POINTS: - vertices_per_prim = 1; - break; - case GL_LINES: - vertices_per_prim = 2; - break; - case GL_TRIANGLES: - vertices_per_prim = 3; - break; - default: - unreachable("Invalid transform feedback primitive mode."); - } - - /* Get the number of primitives generated. */ - aggregate_transform_feedback_counter(brw, obj->prim_count_bo, counter); - - for (int i = 0; i < ctx->Const.MaxVertexStreams; i++) { - vertices_written[i] = vertices_per_prim * counter->accum[i]; - } -} - -/** - * Compute the number of vertices written by the last transform feedback - * begin/end block. - */ -static void -compute_xfb_vertices_written(struct brw_context *brw, - struct brw_transform_feedback_object *obj) -{ - if (obj->vertices_written_valid || !obj->base.EndedAnytime) - return; - - compute_vertices_written_so_far(brw, obj, &obj->previous_counter, - obj->vertices_written); - obj->vertices_written_valid = true; -} - -/** - * GetTransformFeedbackVertexCount() driver hook. - * - * Returns the number of vertices written to a particular stream by the last - * Begin/EndTransformFeedback block. Used to implement DrawTransformFeedback(). - */ -GLsizei -brw_get_transform_feedback_vertex_count(struct gl_context *ctx, - struct gl_transform_feedback_object *obj, - GLuint stream) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_transform_feedback_object *brw_obj = - (struct brw_transform_feedback_object *) obj; - - assert(obj->EndedAnytime); - assert(stream < ctx->Const.MaxVertexStreams); - - compute_xfb_vertices_written(brw, brw_obj); - return brw_obj->vertices_written[stream]; -} - -void -brw_begin_transform_feedback(struct gl_context *ctx, GLenum mode, - struct gl_transform_feedback_object *obj) -{ - struct brw_context *brw = brw_context(ctx); - const struct gl_program *prog; - const struct gl_transform_feedback_info *linked_xfb_info; - struct gl_transform_feedback_object *xfb_obj = - ctx->TransformFeedback.CurrentObject; - struct brw_transform_feedback_object *brw_obj = - (struct brw_transform_feedback_object *) xfb_obj; - - assert(brw->screen->devinfo.ver == 6); - - if (ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY]) { - /* BRW_NEW_GEOMETRY_PROGRAM */ - prog = ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY]; - } else { - /* BRW_NEW_VERTEX_PROGRAM */ - prog = ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX]; - } - linked_xfb_info = prog->sh.LinkedTransformFeedback; - - /* Compute the maximum number of vertices that we can write without - * overflowing any of the buffers currently being used for feedback. - */ - brw_obj->max_index - = _mesa_compute_max_transform_feedback_vertices(ctx, xfb_obj, - linked_xfb_info); - - /* Initialize the SVBI 0 register to zero and set the maximum index. */ - BEGIN_BATCH(4); - OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2)); - OUT_BATCH(0); /* SVBI 0 */ - OUT_BATCH(0); /* starting index */ - OUT_BATCH(brw_obj->max_index); - ADVANCE_BATCH(); - - /* Initialize the rest of the unused streams to sane values. Otherwise, - * they may indicate that there is no room to write data and prevent - * anything from happening at all. - */ - for (int i = 1; i < 4; i++) { - BEGIN_BATCH(4); - OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2)); - OUT_BATCH(i << SVB_INDEX_SHIFT); - OUT_BATCH(0); /* starting index */ - OUT_BATCH(0xffffffff); - ADVANCE_BATCH(); - } - - /* Store the starting value of the SO_NUM_PRIMS_WRITTEN counters. */ - brw_save_primitives_written_counters(brw, brw_obj); - - brw_obj->primitive_mode = mode; -} - -void -brw_end_transform_feedback(struct gl_context *ctx, - struct gl_transform_feedback_object *obj) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_transform_feedback_object *brw_obj = - (struct brw_transform_feedback_object *) obj; - - /* Store the ending value of the SO_NUM_PRIMS_WRITTEN counters. */ - if (!obj->Paused) - brw_save_primitives_written_counters(brw, brw_obj); - - /* We've reached the end of a transform feedback begin/end block. This - * means that future DrawTransformFeedback() calls will need to pick up the - * results of the current counter, and that it's time to roll back the - * current primitive counter to zero. - */ - brw_obj->previous_counter = brw_obj->counter; - brw_reset_transform_feedback_counter(&brw_obj->counter); - - /* EndTransformFeedback() means that we need to update the number of - * vertices written. Since it's only necessary if DrawTransformFeedback() - * is called and it means mapping a buffer object, we delay computing it - * until it's absolutely necessary to try and avoid stalls. - */ - brw_obj->vertices_written_valid = false; -} - -void -brw_pause_transform_feedback(struct gl_context *ctx, - struct gl_transform_feedback_object *obj) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_transform_feedback_object *brw_obj = - (struct brw_transform_feedback_object *) obj; - - /* Store the temporary ending value of the SO_NUM_PRIMS_WRITTEN counters. - * While this operation is paused, other transform feedback actions may - * occur, which will contribute to the counters. We need to exclude that - * from our counts. - */ - brw_save_primitives_written_counters(brw, brw_obj); -} - -void -brw_resume_transform_feedback(struct gl_context *ctx, - struct gl_transform_feedback_object *obj) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_transform_feedback_object *brw_obj = - (struct brw_transform_feedback_object *) obj; - - /* Reload SVBI 0 with the count of vertices written so far. */ - uint64_t svbi; - compute_vertices_written_so_far(brw, brw_obj, &brw_obj->counter, &svbi); - - BEGIN_BATCH(4); - OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2)); - OUT_BATCH(0); /* SVBI 0 */ - OUT_BATCH((uint32_t) svbi); /* starting index */ - OUT_BATCH(brw_obj->max_index); - ADVANCE_BATCH(); - - /* Initialize the rest of the unused streams to sane values. Otherwise, - * they may indicate that there is no room to write data and prevent - * anything from happening at all. - */ - for (int i = 1; i < 4; i++) { - BEGIN_BATCH(4); - OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2)); - OUT_BATCH(i << SVB_INDEX_SHIFT); - OUT_BATCH(0); /* starting index */ - OUT_BATCH(0xffffffff); - ADVANCE_BATCH(); - } - - /* Store the new starting value of the SO_NUM_PRIMS_WRITTEN counters. */ - brw_save_primitives_written_counters(brw, brw_obj); -} diff --git a/src/mesa/drivers/dri/i965/gfx6_urb.c b/src/mesa/drivers/dri/i965/gfx6_urb.c deleted file mode 100644 index 8b69409..0000000 --- a/src/mesa/drivers/dri/i965/gfx6_urb.c +++ /dev/null @@ -1,153 +0,0 @@ -/* - * Copyright © 2009 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Eric Anholt - * - */ - -#include "main/macros.h" -#include "brw_batch.h" -#include "brw_context.h" -#include "brw_state.h" -#include "brw_defines.h" - -/** - * When the GS is not in use, we assign the entire URB space to the VS. When - * the GS is in use, we split the URB space evenly between the VS and the GS. - * This is not ideal, but it's simple. - * - * URB size / 2 URB size / 2 - * _____________-______________ _____________-______________ - * / \ / \ - * +-------------------------------------------------------------+ - * | Vertex Shader Entries | Geometry Shader Entries | - * +-------------------------------------------------------------+ - * - * Sandybridge GT1 has 32kB of URB space, while GT2 has 64kB. - * (See the Sandybridge PRM, Volume 2, Part 1, Section 1.4.7: 3DSTATE_URB.) - */ -void -gfx6_upload_urb(struct brw_context *brw, unsigned vs_size, - bool gs_present, unsigned gs_size) -{ - int nr_vs_entries, nr_gs_entries; - int total_urb_size = brw->urb.size * 1024; /* in bytes */ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - /* Calculate how many entries fit in each stage's section of the URB */ - if (gs_present) { - nr_vs_entries = (total_urb_size/2) / (vs_size * 128); - nr_gs_entries = (total_urb_size/2) / (gs_size * 128); - } else { - nr_vs_entries = total_urb_size / (vs_size * 128); - nr_gs_entries = 0; - } - - /* Then clamp to the maximum allowed by the hardware */ - if (nr_vs_entries > devinfo->urb.max_entries[MESA_SHADER_VERTEX]) - nr_vs_entries = devinfo->urb.max_entries[MESA_SHADER_VERTEX]; - - if (nr_gs_entries > devinfo->urb.max_entries[MESA_SHADER_GEOMETRY]) - nr_gs_entries = devinfo->urb.max_entries[MESA_SHADER_GEOMETRY]; - - /* Finally, both must be a multiple of 4 (see 3DSTATE_URB in the PRM). */ - brw->urb.nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, 4); - brw->urb.nr_gs_entries = ROUND_DOWN_TO(nr_gs_entries, 4); - - assert(brw->urb.nr_vs_entries >= - devinfo->urb.min_entries[MESA_SHADER_VERTEX]); - assert(brw->urb.nr_vs_entries % 4 == 0); - assert(brw->urb.nr_gs_entries % 4 == 0); - assert(vs_size <= 5); - assert(gs_size <= 5); - - BEGIN_BATCH(3); - OUT_BATCH(_3DSTATE_URB << 16 | (3 - 2)); - OUT_BATCH(((vs_size - 1) << GFX6_URB_VS_SIZE_SHIFT) | - ((brw->urb.nr_vs_entries) << GFX6_URB_VS_ENTRIES_SHIFT)); - OUT_BATCH(((gs_size - 1) << GFX6_URB_GS_SIZE_SHIFT) | - ((brw->urb.nr_gs_entries) << GFX6_URB_GS_ENTRIES_SHIFT)); - ADVANCE_BATCH(); - - /* From the PRM Volume 2 part 1, section 1.4.7: - * - * Because of a urb corruption caused by allocating a previous gsunit’s - * urb entry to vsunit software is required to send a "GS NULL - * Fence"(Send URB fence with VS URB size == 1 and GS URB size == 0) plus - * a dummy DRAW call before any case where VS will be taking over GS URB - * space. - * - * It is not clear exactly what this means ("URB fence" is a command that - * doesn't exist on Gfx6). So for now we just do a full pipeline flush as - * a workaround. - */ - if (brw->urb.gs_present && !gs_present) - brw_emit_mi_flush(brw); - brw->urb.gs_present = gs_present; -} - -static void -upload_urb(struct brw_context *brw) -{ - /* BRW_NEW_VS_PROG_DATA */ - const struct brw_vue_prog_data *vs_vue_prog_data = - brw_vue_prog_data(brw->vs.base.prog_data); - const unsigned vs_size = MAX2(vs_vue_prog_data->urb_entry_size, 1); - - /* BRW_NEW_GEOMETRY_PROGRAM, BRW_NEW_GS_PROG_DATA */ - const bool gs_present = - brw->ff_gs.prog_active || brw->programs[MESA_SHADER_GEOMETRY]; - - /* Whe using GS to do transform feedback only we use the same VUE layout for - * VS outputs and GS outputs (as it's what the SF and Clipper expect), so we - * can simply make the GS URB entry size the same as for the VS. This may - * technically be too large in cases where we have few vertex attributes and - * a lot of varyings, since the VS size is determined by the larger of the - * two. For now, it's safe. - * - * For user-provided GS the assumption above does not hold since the GS - * outputs can be different from the VS outputs. - */ - unsigned gs_size = vs_size; - if (brw->programs[MESA_SHADER_GEOMETRY]) { - const struct brw_vue_prog_data *gs_vue_prog_data = - brw_vue_prog_data(brw->gs.base.prog_data); - gs_size = gs_vue_prog_data->urb_entry_size; - assert(gs_size >= 1); - } - - gfx6_upload_urb(brw, vs_size, gs_present, gs_size); -} - -const struct brw_tracked_state gfx6_urb = { - .dirty = { - .mesa = 0, - .brw = BRW_NEW_BLORP | - BRW_NEW_CONTEXT | - BRW_NEW_FF_GS_PROG_DATA | - BRW_NEW_GEOMETRY_PROGRAM | - BRW_NEW_GS_PROG_DATA | - BRW_NEW_VS_PROG_DATA, - }, - .emit = upload_urb, -}; diff --git a/src/mesa/drivers/dri/i965/gfx7_l3_state.c b/src/mesa/drivers/dri/i965/gfx7_l3_state.c deleted file mode 100644 index c088dc6..0000000 --- a/src/mesa/drivers/dri/i965/gfx7_l3_state.c +++ /dev/null @@ -1,312 +0,0 @@ -/* - * Copyright (c) 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "common/intel_l3_config.h" - -#include "brw_context.h" -#include "brw_defines.h" -#include "brw_state.h" -#include "brw_batch.h" - -/** - * Calculate the desired L3 partitioning based on the current state of the - * pipeline. For now this simply returns the conservative defaults calculated - * by get_default_l3_weights(), but we could probably do better by gathering - * more statistics from the pipeline state (e.g. guess of expected URB usage - * and bound surfaces), or by using feed-back from performance counters. - */ -static struct intel_l3_weights -get_pipeline_state_l3_weights(const struct brw_context *brw) -{ - const struct brw_stage_state *stage_states[] = { - [MESA_SHADER_VERTEX] = &brw->vs.base, - [MESA_SHADER_TESS_CTRL] = &brw->tcs.base, - [MESA_SHADER_TESS_EVAL] = &brw->tes.base, - [MESA_SHADER_GEOMETRY] = &brw->gs.base, - [MESA_SHADER_FRAGMENT] = &brw->wm.base, - [MESA_SHADER_COMPUTE] = &brw->cs.base - }; - bool needs_dc = false, needs_slm = false; - - for (unsigned i = 0; i < ARRAY_SIZE(stage_states); i++) { - const struct gl_program *prog = - brw->ctx._Shader->CurrentProgram[stage_states[i]->stage]; - const struct brw_stage_prog_data *prog_data = stage_states[i]->prog_data; - - needs_dc |= (prog && (prog->sh.data->NumAtomicBuffers || - prog->sh.data->NumShaderStorageBlocks || - prog->info.num_images)) || - (prog_data && prog_data->total_scratch); - needs_slm |= prog_data && prog_data->total_shared; - } - - return intel_get_default_l3_weights(&brw->screen->devinfo, - needs_dc, needs_slm); -} - -/** - * Program the hardware to use the specified L3 configuration. - */ -static void -setup_l3_config(struct brw_context *brw, const struct intel_l3_config *cfg) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - const bool has_dc = cfg->n[INTEL_L3P_DC] || cfg->n[INTEL_L3P_ALL]; - const bool has_is = cfg->n[INTEL_L3P_IS] || cfg->n[INTEL_L3P_RO] || - cfg->n[INTEL_L3P_ALL]; - const bool has_c = cfg->n[INTEL_L3P_C] || cfg->n[INTEL_L3P_RO] || - cfg->n[INTEL_L3P_ALL]; - const bool has_t = cfg->n[INTEL_L3P_T] || cfg->n[INTEL_L3P_RO] || - cfg->n[INTEL_L3P_ALL]; - const bool has_slm = cfg->n[INTEL_L3P_SLM]; - - /* According to the hardware docs, the L3 partitioning can only be changed - * while the pipeline is completely drained and the caches are flushed, - * which involves a first PIPE_CONTROL flush which stalls the pipeline... - */ - brw_emit_pipe_control_flush(brw, - PIPE_CONTROL_DATA_CACHE_FLUSH | - PIPE_CONTROL_CS_STALL); - - /* ...followed by a second pipelined PIPE_CONTROL that initiates - * invalidation of the relevant caches. Note that because RO invalidation - * happens at the top of the pipeline (i.e. right away as the PIPE_CONTROL - * command is processed by the CS) we cannot combine it with the previous - * stalling flush as the hardware documentation suggests, because that - * would cause the CS to stall on previous rendering *after* RO - * invalidation and wouldn't prevent the RO caches from being polluted by - * concurrent rendering before the stall completes. This intentionally - * doesn't implement the SKL+ hardware workaround suggesting to enable CS - * stall on PIPE_CONTROLs with the texture cache invalidation bit set for - * GPGPU workloads because the previous and subsequent PIPE_CONTROLs - * already guarantee that there is no concurrent GPGPU kernel execution - * (see SKL HSD 2132585). - */ - brw_emit_pipe_control_flush(brw, - PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | - PIPE_CONTROL_CONST_CACHE_INVALIDATE | - PIPE_CONTROL_INSTRUCTION_INVALIDATE | - PIPE_CONTROL_STATE_CACHE_INVALIDATE); - - /* Now send a third stalling flush to make sure that invalidation is - * complete when the L3 configuration registers are modified. - */ - brw_emit_pipe_control_flush(brw, - PIPE_CONTROL_DATA_CACHE_FLUSH | - PIPE_CONTROL_CS_STALL); - - if (devinfo->ver >= 8) { - assert(!cfg->n[INTEL_L3P_IS] && !cfg->n[INTEL_L3P_C] && !cfg->n[INTEL_L3P_T]); - - const unsigned imm_data = ( - (devinfo->ver < 11 && has_slm ? GFX8_L3CNTLREG_SLM_ENABLE : 0) | - (devinfo->ver == 11 ? GFX11_L3CNTLREG_USE_FULL_WAYS : 0) | - SET_FIELD(cfg->n[INTEL_L3P_URB], GFX8_L3CNTLREG_URB_ALLOC) | - SET_FIELD(cfg->n[INTEL_L3P_RO], GFX8_L3CNTLREG_RO_ALLOC) | - SET_FIELD(cfg->n[INTEL_L3P_DC], GFX8_L3CNTLREG_DC_ALLOC) | - SET_FIELD(cfg->n[INTEL_L3P_ALL], GFX8_L3CNTLREG_ALL_ALLOC)); - - /* Set up the L3 partitioning. */ - brw_load_register_imm32(brw, GFX8_L3CNTLREG, imm_data); - } else { - assert(!cfg->n[INTEL_L3P_ALL]); - - /* When enabled SLM only uses a portion of the L3 on half of the banks, - * the matching space on the remaining banks has to be allocated to a - * client (URB for all validated configurations) set to the - * lower-bandwidth 2-bank address hashing mode. - */ - const bool urb_low_bw = has_slm && devinfo->platform != INTEL_PLATFORM_BYT; - assert(!urb_low_bw || cfg->n[INTEL_L3P_URB] == cfg->n[INTEL_L3P_SLM]); - - /* Minimum number of ways that can be allocated to the URB. */ - const unsigned n0_urb = (devinfo->platform == INTEL_PLATFORM_BYT ? 32 : 0); - assert(cfg->n[INTEL_L3P_URB] >= n0_urb); - - BEGIN_BATCH(7); - OUT_BATCH(MI_LOAD_REGISTER_IMM | (7 - 2)); - - /* Demote any clients with no ways assigned to LLC. */ - OUT_BATCH(GFX7_L3SQCREG1); - OUT_BATCH((devinfo->platform == INTEL_PLATFORM_HSW ? HSW_L3SQCREG1_SQGHPCI_DEFAULT : - devinfo->platform == INTEL_PLATFORM_BYT ? VLV_L3SQCREG1_SQGHPCI_DEFAULT : - IVB_L3SQCREG1_SQGHPCI_DEFAULT) | - (has_dc ? 0 : GFX7_L3SQCREG1_CONV_DC_UC) | - (has_is ? 0 : GFX7_L3SQCREG1_CONV_IS_UC) | - (has_c ? 0 : GFX7_L3SQCREG1_CONV_C_UC) | - (has_t ? 0 : GFX7_L3SQCREG1_CONV_T_UC)); - - /* Set up the L3 partitioning. */ - OUT_BATCH(GFX7_L3CNTLREG2); - OUT_BATCH((has_slm ? GFX7_L3CNTLREG2_SLM_ENABLE : 0) | - SET_FIELD(cfg->n[INTEL_L3P_URB] - n0_urb, GFX7_L3CNTLREG2_URB_ALLOC) | - (urb_low_bw ? GFX7_L3CNTLREG2_URB_LOW_BW : 0) | - SET_FIELD(cfg->n[INTEL_L3P_ALL], GFX7_L3CNTLREG2_ALL_ALLOC) | - SET_FIELD(cfg->n[INTEL_L3P_RO], GFX7_L3CNTLREG2_RO_ALLOC) | - SET_FIELD(cfg->n[INTEL_L3P_DC], GFX7_L3CNTLREG2_DC_ALLOC)); - OUT_BATCH(GFX7_L3CNTLREG3); - OUT_BATCH(SET_FIELD(cfg->n[INTEL_L3P_IS], GFX7_L3CNTLREG3_IS_ALLOC) | - SET_FIELD(cfg->n[INTEL_L3P_C], GFX7_L3CNTLREG3_C_ALLOC) | - SET_FIELD(cfg->n[INTEL_L3P_T], GFX7_L3CNTLREG3_T_ALLOC)); - - ADVANCE_BATCH(); - - if (can_do_hsw_l3_atomics(brw->screen)) { - /* Enable L3 atomics on HSW if we have a DC partition, otherwise keep - * them disabled to avoid crashing the system hard. - */ - BEGIN_BATCH(5); - OUT_BATCH(MI_LOAD_REGISTER_IMM | (5 - 2)); - OUT_BATCH(HSW_SCRATCH1); - OUT_BATCH(has_dc ? 0 : HSW_SCRATCH1_L3_ATOMIC_DISABLE); - OUT_BATCH(HSW_ROW_CHICKEN3); - OUT_BATCH(REG_MASK(HSW_ROW_CHICKEN3_L3_ATOMIC_DISABLE) | - (has_dc ? 0 : HSW_ROW_CHICKEN3_L3_ATOMIC_DISABLE)); - ADVANCE_BATCH(); - } - } -} - -/** - * Update the URB size in the context state for the specified L3 - * configuration. - */ -static void -update_urb_size(struct brw_context *brw, const struct intel_l3_config *cfg) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - const unsigned sz = intel_get_l3_config_urb_size(devinfo, cfg); - - if (brw->urb.size != sz) { - brw->urb.size = sz; - brw->ctx.NewDriverState |= BRW_NEW_URB_SIZE; - - /* If we change the total URB size, reset the individual stage sizes to - * zero so that, even if there is no URB size change, gfx7_upload_urb - * still re-emits 3DSTATE_URB_*. - */ - brw->urb.vsize = 0; - brw->urb.gsize = 0; - brw->urb.hsize = 0; - brw->urb.dsize = 0; - } -} - -void -brw_emit_l3_state(struct brw_context *brw) -{ - const struct intel_l3_weights w = get_pipeline_state_l3_weights(brw); - const float dw = intel_diff_l3_weights(w, intel_get_l3_config_weights(brw->l3.config)); - /* The distance between any two compatible weight vectors cannot exceed two - * due to the triangle inequality. - */ - const float large_dw_threshold = 2.0; - /* Somewhat arbitrary, simply makes sure that there will be no repeated - * transitions to the same L3 configuration, could probably do better here. - */ - const float small_dw_threshold = 0.5; - /* If we're emitting a new batch the caches should already be clean and the - * transition should be relatively cheap, so it shouldn't hurt much to use - * the smaller threshold. Otherwise use the larger threshold so that we - * only reprogram the L3 mid-batch if the most recently programmed - * configuration is incompatible with the current pipeline state. - */ - const float dw_threshold = (brw->ctx.NewDriverState & BRW_NEW_BATCH ? - small_dw_threshold : large_dw_threshold); - - if (dw > dw_threshold && can_do_pipelined_register_writes(brw->screen)) { - const struct intel_l3_config *const cfg = - intel_get_l3_config(&brw->screen->devinfo, w); - - setup_l3_config(brw, cfg); - update_urb_size(brw, cfg); - brw->l3.config = cfg; - - if (INTEL_DEBUG(DEBUG_L3)) { - fprintf(stderr, "L3 config transition (%f > %f): ", dw, dw_threshold); - intel_dump_l3_config(cfg, stderr); - } - } -} - -const struct brw_tracked_state gfx7_l3_state = { - .dirty = { - .mesa = 0, - .brw = BRW_NEW_BATCH | - BRW_NEW_BLORP | - BRW_NEW_CS_PROG_DATA | - BRW_NEW_FS_PROG_DATA | - BRW_NEW_GS_PROG_DATA | - BRW_NEW_TCS_PROG_DATA | - BRW_NEW_TES_PROG_DATA | - BRW_NEW_VS_PROG_DATA, - }, - .emit = brw_emit_l3_state -}; - -/** - * Hack to restore the default L3 configuration. - * - * This will be called at the end of every batch in order to reset the L3 - * configuration to the default values for the time being until the kernel is - * fixed. Until kernel commit 6702cf16e0ba8b0129f5aa1b6609d4e9c70bc13b - * (included in v4.1) we would set the MI_RESTORE_INHIBIT bit when submitting - * batch buffers for the default context used by the DDX, which meant that any - * context state changed by the GL would leak into the DDX, the assumption - * being that the DDX would initialize any state it cares about manually. The - * DDX is however not careful enough to program an L3 configuration - * explicitly, and it makes assumptions about it (URB size) which won't hold - * and cause it to misrender if we let our L3 set-up to leak into the DDX. - * - * Since v4.1 of the Linux kernel the default context is saved and restored - * normally, so it's far less likely for our L3 programming to interfere with - * other contexts -- In fact restoring the default L3 configuration at the end - * of the batch will be redundant most of the time. A kind of state leak is - * still possible though if the context making assumptions about L3 state is - * created immediately after our context was active (e.g. without the DDX - * default context being scheduled in between) because at present the DRM - * doesn't fully initialize the contents of newly created contexts and instead - * sets the MI_RESTORE_INHIBIT flag causing it to inherit the state from the - * last active context. - * - * It's possible to realize such a scenario if, say, an X server (or a GL - * application using an outdated non-L3-aware Mesa version) is started while - * another GL application is running and happens to have modified the L3 - * configuration, or if no X server is running at all and a GL application - * using a non-L3-aware Mesa version is started after another GL application - * ran and modified the L3 configuration -- The latter situation can actually - * be reproduced easily on IVB in our CI system. - */ -void -gfx7_restore_default_l3_config(struct brw_context *brw) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - const struct intel_l3_config *const cfg = intel_get_default_l3_config(devinfo); - - if (cfg != brw->l3.config && - can_do_pipelined_register_writes(brw->screen)) { - setup_l3_config(brw, cfg); - update_urb_size(brw, cfg); - brw->l3.config = cfg; - } -} diff --git a/src/mesa/drivers/dri/i965/gfx7_sol_state.c b/src/mesa/drivers/dri/i965/gfx7_sol_state.c deleted file mode 100644 index d1a2e5b..0000000 --- a/src/mesa/drivers/dri/i965/gfx7_sol_state.c +++ /dev/null @@ -1,149 +0,0 @@ -/* - * Copyright © 2011 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -/** - * @file gfx7_sol_state.c - * - * Controls the stream output logic (SOL) stage of the gfx7 hardware, which is - * used to implement GL_EXT_transform_feedback. - */ - -#include "brw_context.h" -#include "brw_state.h" -#include "brw_defines.h" -#include "brw_batch.h" -#include "brw_buffer_objects.h" -#include "main/transformfeedback.h" - -void -gfx7_begin_transform_feedback(struct gl_context *ctx, GLenum mode, - struct gl_transform_feedback_object *obj) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_transform_feedback_object *brw_obj = - (struct brw_transform_feedback_object *) obj; - - assert(brw->screen->devinfo.ver == 7); - - /* Store the starting value of the SO_NUM_PRIMS_WRITTEN counters. */ - brw_save_primitives_written_counters(brw, brw_obj); - - /* Reset the SO buffer offsets to 0. */ - if (!can_do_pipelined_register_writes(brw->screen)) { - brw_batch_flush(brw); - brw->batch.needs_sol_reset = true; - } else { - for (int i = 0; i < 4; i++) { - brw_load_register_imm32(brw, GFX7_SO_WRITE_OFFSET(i), 0); - } - } - - brw_obj->primitive_mode = mode; -} - -void -gfx7_end_transform_feedback(struct gl_context *ctx, - struct gl_transform_feedback_object *obj) -{ - /* After EndTransformFeedback, it's likely that the client program will try - * to draw using the contents of the transform feedback buffer as vertex - * input. In order for this to work, we need to flush the data through at - * least the GS stage of the pipeline, and flush out the render cache. For - * simplicity, just do a full flush. - */ - struct brw_context *brw = brw_context(ctx); - struct brw_transform_feedback_object *brw_obj = - (struct brw_transform_feedback_object *) obj; - - /* Store the ending value of the SO_NUM_PRIMS_WRITTEN counters. */ - if (!obj->Paused) - brw_save_primitives_written_counters(brw, brw_obj); - - /* We've reached the end of a transform feedback begin/end block. This - * means that future DrawTransformFeedback() calls will need to pick up the - * results of the current counter, and that it's time to roll back the - * current primitive counter to zero. - */ - brw_obj->previous_counter = brw_obj->counter; - brw_reset_transform_feedback_counter(&brw_obj->counter); - - /* EndTransformFeedback() means that we need to update the number of - * vertices written. Since it's only necessary if DrawTransformFeedback() - * is called and it means mapping a buffer object, we delay computing it - * until it's absolutely necessary to try and avoid stalls. - */ - brw_obj->vertices_written_valid = false; -} - -void -gfx7_pause_transform_feedback(struct gl_context *ctx, - struct gl_transform_feedback_object *obj) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_transform_feedback_object *brw_obj = - (struct brw_transform_feedback_object *) obj; - - /* Flush any drawing so that the counters have the right values. */ - brw_emit_mi_flush(brw); - - assert(brw->screen->devinfo.ver == 7); - - /* Save the SOL buffer offset register values. */ - for (int i = 0; i < 4; i++) { - BEGIN_BATCH(3); - OUT_BATCH(MI_STORE_REGISTER_MEM | (3 - 2)); - OUT_BATCH(GFX7_SO_WRITE_OFFSET(i)); - OUT_RELOC(brw_obj->offset_bo, RELOC_WRITE, i * sizeof(uint32_t)); - ADVANCE_BATCH(); - } - - /* Store the temporary ending value of the SO_NUM_PRIMS_WRITTEN counters. - * While this operation is paused, other transform feedback actions may - * occur, which will contribute to the counters. We need to exclude that - * from our counts. - */ - brw_save_primitives_written_counters(brw, brw_obj); -} - -void -gfx7_resume_transform_feedback(struct gl_context *ctx, - struct gl_transform_feedback_object *obj) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_transform_feedback_object *brw_obj = - (struct brw_transform_feedback_object *) obj; - - assert(brw->screen->devinfo.ver == 7); - - /* Reload the SOL buffer offset registers. */ - for (int i = 0; i < 4; i++) { - BEGIN_BATCH(3); - OUT_BATCH(GFX7_MI_LOAD_REGISTER_MEM | (3 - 2)); - OUT_BATCH(GFX7_SO_WRITE_OFFSET(i)); - OUT_RELOC(brw_obj->offset_bo, RELOC_WRITE, i * sizeof(uint32_t)); - ADVANCE_BATCH(); - } - - /* Store the new starting value of the SO_NUM_PRIMS_WRITTEN counters. */ - brw_save_primitives_written_counters(brw, brw_obj); -} diff --git a/src/mesa/drivers/dri/i965/gfx7_urb.c b/src/mesa/drivers/dri/i965/gfx7_urb.c deleted file mode 100644 index ff00108..0000000 --- a/src/mesa/drivers/dri/i965/gfx7_urb.c +++ /dev/null @@ -1,280 +0,0 @@ -/* - * Copyright © 2011 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "main/macros.h" -#include "brw_batch.h" -#include "brw_context.h" -#include "brw_state.h" -#include "brw_defines.h" - -#include "common/intel_l3_config.h" - -/** - * The following diagram shows how we partition the URB: - * - * 16kB or 32kB Rest of the URB space - * __________-__________ _________________-_________________ - * / \ / \ - * +-------------------------------------------------------------+ - * | VS/HS/DS/GS/FS Push | VS/HS/DS/GS URB | - * | Constants | Entries | - * +-------------------------------------------------------------+ - * - * Notably, push constants must be stored at the beginning of the URB - * space, while entries can be stored anywhere. Ivybridge and Haswell - * GT1/GT2 have a maximum constant buffer size of 16kB, while Haswell GT3 - * doubles this (32kB). - * - * Ivybridge and Haswell GT1/GT2 allow push constants to be located (and - * sized) in increments of 1kB. Haswell GT3 requires them to be located and - * sized in increments of 2kB. - * - * Currently we split the constant buffer space evenly among whatever stages - * are active. This is probably not ideal, but simple. - * - * Ivybridge GT1 and Haswell GT1 have 128kB of URB space. - * Ivybridge GT2 and Haswell GT2 have 256kB of URB space. - * Haswell GT3 has 512kB of URB space. - * - * See "Volume 2a: 3D Pipeline," section 1.8, "Volume 1b: Configurations", - * and the documentation for 3DSTATE_PUSH_CONSTANT_ALLOC_xS. - */ -static void -gfx7_allocate_push_constants(struct brw_context *brw) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - /* BRW_NEW_GEOMETRY_PROGRAM */ - bool gs_present = brw->programs[MESA_SHADER_GEOMETRY]; - - /* BRW_NEW_TESS_PROGRAMS */ - bool tess_present = brw->programs[MESA_SHADER_TESS_EVAL]; - - unsigned avail_size = 16; - unsigned multiplier = devinfo->max_constant_urb_size_kb / 16; - - int stages = 2 + gs_present + 2 * tess_present; - - /* Divide up the available space equally between stages. Because we - * round down (using floor division), there may be some left over - * space. We allocate that to the pixel shader stage. - */ - unsigned size_per_stage = avail_size / stages; - - unsigned vs_size = size_per_stage; - unsigned hs_size = tess_present ? size_per_stage : 0; - unsigned ds_size = tess_present ? size_per_stage : 0; - unsigned gs_size = gs_present ? size_per_stage : 0; - unsigned fs_size = avail_size - size_per_stage * (stages - 1); - - gfx7_emit_push_constant_state(brw, multiplier * vs_size, - multiplier * hs_size, multiplier * ds_size, - multiplier * gs_size, multiplier * fs_size); - - /* From p115 of the Ivy Bridge PRM (3.2.1.4 3DSTATE_PUSH_CONSTANT_ALLOC_VS): - * - * Programming Restriction: - * - * The 3DSTATE_CONSTANT_VS must be reprogrammed prior to the next - * 3DPRIMITIVE command after programming the - * 3DSTATE_PUSH_CONSTANT_ALLOC_VS. - * - * Similar text exists for the other 3DSTATE_PUSH_CONSTANT_ALLOC_* - * commands. - */ - brw->vs.base.push_constants_dirty = true; - brw->tcs.base.push_constants_dirty = true; - brw->tes.base.push_constants_dirty = true; - brw->gs.base.push_constants_dirty = true; - brw->wm.base.push_constants_dirty = true; -} - -void -gfx7_emit_push_constant_state(struct brw_context *brw, unsigned vs_size, - unsigned hs_size, unsigned ds_size, - unsigned gs_size, unsigned fs_size) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - unsigned offset = 0; - - /* From the SKL PRM, Workarounds section (#878): - * - * Push constant buffer corruption possible. WA: Insert 2 zero-length - * PushConst_PS before every intended PushConst_PS update, issue a - * NULLPRIM after each of the zero len PC update to make sure CS commits - * them. - * - * This workaround is attempting to solve a pixel shader push constant - * synchronization issue. - * - * There's an unpublished WA that involves re-emitting - * 3DSTATE_PUSH_CONSTANT_ALLOC_PS for every 500-ish 3DSTATE_CONSTANT_PS - * packets. Since our counting methods may not be reliable due to - * context-switching and pre-emption, we instead choose to approximate this - * behavior by re-emitting the packet at the top of the batch. - */ - if (brw->ctx.NewDriverState == BRW_NEW_BATCH) { - /* SKL GT2 and GLK 2x6 have reliably demonstrated this issue thus far. - * We've also seen some intermittent failures from SKL GT4 and BXT in - * the past. - */ - if (devinfo->platform != INTEL_PLATFORM_SKL && - devinfo->platform != INTEL_PLATFORM_BXT && - devinfo->platform != INTEL_PLATFORM_GLK) - return; - } - - BEGIN_BATCH(10); - OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_VS << 16 | (2 - 2)); - OUT_BATCH(vs_size | offset << GFX7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT); - offset += vs_size; - - OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_HS << 16 | (2 - 2)); - OUT_BATCH(hs_size | offset << GFX7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT); - offset += hs_size; - - OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_DS << 16 | (2 - 2)); - OUT_BATCH(ds_size | offset << GFX7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT); - offset += ds_size; - - OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_GS << 16 | (2 - 2)); - OUT_BATCH(gs_size | offset << GFX7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT); - offset += gs_size; - - OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_PS << 16 | (2 - 2)); - OUT_BATCH(fs_size | offset << GFX7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT); - ADVANCE_BATCH(); - - /* From p292 of the Ivy Bridge PRM (11.2.4 3DSTATE_PUSH_CONSTANT_ALLOC_PS): - * - * A PIPE_CONTROL command with the CS Stall bit set must be programmed - * in the ring after this instruction. - * - * No such restriction exists for Haswell or Baytrail. - */ - if (devinfo->verx10 <= 70 && devinfo->platform != INTEL_PLATFORM_BYT) - gfx7_emit_cs_stall_flush(brw); -} - -const struct brw_tracked_state gfx7_push_constant_space = { - .dirty = { - .mesa = 0, - .brw = BRW_NEW_CONTEXT | - BRW_NEW_BATCH | /* Push constant workaround */ - BRW_NEW_GEOMETRY_PROGRAM | - BRW_NEW_TESS_PROGRAMS, - }, - .emit = gfx7_allocate_push_constants, -}; - -static void -upload_urb(struct brw_context *brw) -{ - /* BRW_NEW_VS_PROG_DATA */ - const struct brw_vue_prog_data *vs_vue_prog_data = - brw_vue_prog_data(brw->vs.base.prog_data); - const unsigned vs_size = MAX2(vs_vue_prog_data->urb_entry_size, 1); - /* BRW_NEW_GS_PROG_DATA */ - const bool gs_present = brw->gs.base.prog_data; - /* BRW_NEW_TES_PROG_DATA */ - const bool tess_present = brw->tes.base.prog_data; - - gfx7_upload_urb(brw, vs_size, gs_present, tess_present); -} - -void -gfx7_upload_urb(struct brw_context *brw, unsigned vs_size, - bool gs_present, bool tess_present) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - /* BRW_NEW_{VS,TCS,TES,GS}_PROG_DATA */ - struct brw_vue_prog_data *prog_data[4] = { - [MESA_SHADER_VERTEX] = - brw_vue_prog_data(brw->vs.base.prog_data), - [MESA_SHADER_TESS_CTRL] = - tess_present ? brw_vue_prog_data(brw->tcs.base.prog_data) : NULL, - [MESA_SHADER_TESS_EVAL] = - tess_present ? brw_vue_prog_data(brw->tes.base.prog_data) : NULL, - [MESA_SHADER_GEOMETRY] = - gs_present ? brw_vue_prog_data(brw->gs.base.prog_data) : NULL, - }; - - unsigned entry_size[4]; - entry_size[MESA_SHADER_VERTEX] = vs_size; - for (int i = MESA_SHADER_TESS_CTRL; i <= MESA_SHADER_GEOMETRY; i++) { - entry_size[i] = prog_data[i] ? prog_data[i]->urb_entry_size : 1; - } - - /* If we're just switching between programs with the same URB requirements, - * skip the rest of the logic. - */ - if (brw->urb.vsize == entry_size[MESA_SHADER_VERTEX] && - brw->urb.gs_present == gs_present && - brw->urb.gsize == entry_size[MESA_SHADER_GEOMETRY] && - brw->urb.tess_present == tess_present && - brw->urb.hsize == entry_size[MESA_SHADER_TESS_CTRL] && - brw->urb.dsize == entry_size[MESA_SHADER_TESS_EVAL]) { - return; - } - brw->urb.vsize = entry_size[MESA_SHADER_VERTEX]; - brw->urb.gs_present = gs_present; - brw->urb.gsize = entry_size[MESA_SHADER_GEOMETRY]; - brw->urb.tess_present = tess_present; - brw->urb.hsize = entry_size[MESA_SHADER_TESS_CTRL]; - brw->urb.dsize = entry_size[MESA_SHADER_TESS_EVAL]; - - unsigned entries[4]; - unsigned start[4]; - bool constrained; - intel_get_urb_config(devinfo, brw->l3.config, - tess_present, gs_present, entry_size, - entries, start, NULL, &constrained); - - if (devinfo->platform == INTEL_PLATFORM_IVB) - gfx7_emit_vs_workaround_flush(brw); - - BEGIN_BATCH(8); - for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) { - assert(devinfo->ver != 10 || entry_size[i] % 3); - OUT_BATCH((_3DSTATE_URB_VS + i) << 16 | (2 - 2)); - OUT_BATCH(entries[i] | - ((entry_size[i] - 1) << GFX7_URB_ENTRY_SIZE_SHIFT) | - (start[i] << GFX7_URB_STARTING_ADDRESS_SHIFT)); - } - ADVANCE_BATCH(); -} - -const struct brw_tracked_state gfx7_urb = { - .dirty = { - .mesa = 0, - .brw = BRW_NEW_BLORP | - BRW_NEW_CONTEXT | - BRW_NEW_URB_SIZE | - BRW_NEW_GS_PROG_DATA | - BRW_NEW_TCS_PROG_DATA | - BRW_NEW_TES_PROG_DATA | - BRW_NEW_VS_PROG_DATA, - }, - .emit = upload_urb, -}; diff --git a/src/mesa/drivers/dri/i965/gfx8_depth_state.c b/src/mesa/drivers/dri/i965/gfx8_depth_state.c deleted file mode 100644 index e041355..0000000 --- a/src/mesa/drivers/dri/i965/gfx8_depth_state.c +++ /dev/null @@ -1,197 +0,0 @@ -/* - * Copyright © 2011 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "brw_batch.h" -#include "brw_mipmap_tree.h" -#include "brw_fbo.h" -#include "brw_context.h" -#include "brw_state.h" -#include "brw_defines.h" -#include "compiler/brw_eu_defines.h" -#include "brw_wm.h" -#include "main/framebuffer.h" - -/** - * Should we set the PMA FIX ENABLE bit? - * - * To avoid unnecessary depth related stalls, we need to set this bit. - * However, there is a very complicated formula which governs when it - * is legal to do so. This function computes that. - * - * See the documenation for the CACHE_MODE_1 register, bit 11. - */ -static bool -pma_fix_enable(const struct brw_context *brw) -{ - const struct gl_context *ctx = &brw->ctx; - /* BRW_NEW_FS_PROG_DATA */ - const struct brw_wm_prog_data *wm_prog_data = - brw_wm_prog_data(brw->wm.base.prog_data); - /* _NEW_BUFFERS */ - struct brw_renderbuffer *depth_irb = - brw_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH); - - /* 3DSTATE_WM::ForceThreadDispatch is never used. */ - const bool wm_force_thread_dispatch = false; - - /* 3DSTATE_RASTER::ForceSampleCount is never used. */ - const bool raster_force_sample_count_nonzero = false; - - /* _NEW_BUFFERS: - * 3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL && - * 3DSTATE_DEPTH_BUFFER::HIZ Enable - */ - const bool hiz_enabled = depth_irb && brw_renderbuffer_has_hiz(depth_irb); - - /* 3DSTATE_WM::Early Depth/Stencil Control != EDSC_PREPS (2). */ - const bool edsc_not_preps = !wm_prog_data->early_fragment_tests; - - /* 3DSTATE_PS_EXTRA::PixelShaderValid is always true. */ - const bool pixel_shader_valid = true; - - /* !(3DSTATE_WM_HZ_OP::DepthBufferClear || - * 3DSTATE_WM_HZ_OP::DepthBufferResolve || - * 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable || - * 3DSTATE_WM_HZ_OP::StencilBufferClear) - * - * HiZ operations are done outside of the normal state upload, so they're - * definitely not happening now. - */ - const bool in_hiz_op = false; - - /* _NEW_DEPTH: - * DEPTH_STENCIL_STATE::DepthTestEnable - */ - const bool depth_test_enabled = depth_irb && ctx->Depth.Test; - - /* _NEW_DEPTH: - * 3DSTATE_WM_DEPTH_STENCIL::DepthWriteEnable && - * 3DSTATE_DEPTH_BUFFER::DEPTH_WRITE_ENABLE. - */ - const bool depth_writes_enabled = brw_depth_writes_enabled(brw); - - /* _NEW_STENCIL: - * !DEPTH_STENCIL_STATE::Stencil Buffer Write Enable || - * !3DSTATE_DEPTH_BUFFER::Stencil Buffer Enable || - * !3DSTATE_STENCIL_BUFFER::Stencil Buffer Enable - */ - const bool stencil_writes_enabled = brw->stencil_write_enabled; - - /* 3DSTATE_PS_EXTRA::Pixel Shader Computed Depth Mode != PSCDEPTH_OFF */ - const bool ps_computes_depth = - wm_prog_data->computed_depth_mode != BRW_PSCDEPTH_OFF; - - /* BRW_NEW_FS_PROG_DATA: 3DSTATE_PS_EXTRA::PixelShaderKillsPixels - * BRW_NEW_FS_PROG_DATA: 3DSTATE_PS_EXTRA::oMask Present to RenderTarget - * _NEW_MULTISAMPLE: 3DSTATE_PS_BLEND::AlphaToCoverageEnable - * _NEW_COLOR: 3DSTATE_PS_BLEND::AlphaTestEnable - * _NEW_BUFFERS: 3DSTATE_PS_BLEND::AlphaTestEnable - * 3DSTATE_PS_BLEND::AlphaToCoverageEnable - * - * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable is always false. - * 3DSTATE_WM::ForceKillPix != ForceOff is always true. - */ - const bool kill_pixel = - wm_prog_data->uses_kill || - wm_prog_data->uses_omask || - _mesa_is_alpha_test_enabled(ctx) || - _mesa_is_alpha_to_coverage_enabled(ctx); - - /* The big formula in CACHE_MODE_1::NP PMA FIX ENABLE. */ - return !wm_force_thread_dispatch && - !raster_force_sample_count_nonzero && - hiz_enabled && - edsc_not_preps && - pixel_shader_valid && - !in_hiz_op && - depth_test_enabled && - (ps_computes_depth || - (kill_pixel && (depth_writes_enabled || stencil_writes_enabled))); -} - -void -gfx8_write_pma_stall_bits(struct brw_context *brw, uint32_t pma_stall_bits) -{ - /* If we haven't actually changed the value, bail now to avoid unnecessary - * pipeline stalls and register writes. - */ - if (brw->pma_stall_bits == pma_stall_bits) - return; - - brw->pma_stall_bits = pma_stall_bits; - - /* According to the PIPE_CONTROL documentation, software should emit a - * PIPE_CONTROL with the CS Stall and Depth Cache Flush bits set prior - * to the LRI. If stencil buffer writes are enabled, then a Render Cache - * Flush is also necessary. - */ - const uint32_t render_cache_flush = - brw->stencil_write_enabled ? PIPE_CONTROL_RENDER_TARGET_FLUSH : 0; - brw_emit_pipe_control_flush(brw, - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_DEPTH_CACHE_FLUSH | - render_cache_flush); - - /* CACHE_MODE_1 is a non-privileged register. */ - brw_load_register_imm32(brw, GFX7_CACHE_MODE_1, - GFX8_HIZ_PMA_MASK_BITS | - pma_stall_bits ); - - /* After the LRI, a PIPE_CONTROL with both the Depth Stall and Depth Cache - * Flush bits is often necessary. We do it regardless because it's easier. - * The render cache flush is also necessary if stencil writes are enabled. - */ - brw_emit_pipe_control_flush(brw, - PIPE_CONTROL_DEPTH_STALL | - PIPE_CONTROL_DEPTH_CACHE_FLUSH | - render_cache_flush); - -} - -static void -gfx8_emit_pma_stall_workaround(struct brw_context *brw) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - uint32_t bits = 0; - - if (devinfo->ver >= 9) - return; - - if (pma_fix_enable(brw)) - bits |= GFX8_HIZ_NP_PMA_FIX_ENABLE | GFX8_HIZ_NP_EARLY_Z_FAILS_DISABLE; - - gfx8_write_pma_stall_bits(brw, bits); -} - -const struct brw_tracked_state gfx8_pma_fix = { - .dirty = { - .mesa = _NEW_BUFFERS | - _NEW_COLOR | - _NEW_DEPTH | - _NEW_MULTISAMPLE | - _NEW_STENCIL, - .brw = BRW_NEW_BLORP | - BRW_NEW_FS_PROG_DATA, - }, - .emit = gfx8_emit_pma_stall_workaround -}; diff --git a/src/mesa/drivers/dri/i965/gfx8_multisample_state.c b/src/mesa/drivers/dri/i965/gfx8_multisample_state.c deleted file mode 100644 index 5724463..0000000 --- a/src/mesa/drivers/dri/i965/gfx8_multisample_state.c +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "brw_batch.h" - -#include "brw_context.h" -#include "brw_defines.h" -#include "brw_multisample_state.h" - -/** - * 3DSTATE_SAMPLE_PATTERN - */ -void -gfx8_emit_3dstate_sample_pattern(struct brw_context *brw) -{ - BEGIN_BATCH(9); - OUT_BATCH(_3DSTATE_SAMPLE_PATTERN << 16 | (9 - 2)); - - /* 16x MSAA */ - OUT_BATCH(brw_multisample_positions_16x[0]); /* positions 3, 2, 1, 0 */ - OUT_BATCH(brw_multisample_positions_16x[1]); /* positions 7, 6, 5, 4 */ - OUT_BATCH(brw_multisample_positions_16x[2]); /* positions 11, 10, 9, 8 */ - OUT_BATCH(brw_multisample_positions_16x[3]); /* positions 15, 14, 13, 12 */ - - /* 8x MSAA */ - OUT_BATCH(brw_multisample_positions_8x[1]); /* sample positions 7654 */ - OUT_BATCH(brw_multisample_positions_8x[0]); /* sample positions 3210 */ - - /* 4x MSAA */ - OUT_BATCH(brw_multisample_positions_4x); - - /* 1x and 2x MSAA */ - OUT_BATCH(brw_multisample_positions_1x_2x); - ADVANCE_BATCH(); -} diff --git a/src/mesa/drivers/dri/i965/hsw_queryobj.c b/src/mesa/drivers/dri/i965/hsw_queryobj.c deleted file mode 100644 index da195c8..0000000 --- a/src/mesa/drivers/dri/i965/hsw_queryobj.c +++ /dev/null @@ -1,486 +0,0 @@ -/* - * Copyright (c) 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -/** @file hsw_queryobj.c - * - * Support for query buffer objects (GL_ARB_query_buffer_object) on Haswell+. - */ -#include "brw_context.h" -#include "brw_defines.h" -#include "brw_batch.h" -#include "brw_buffer_objects.h" - -/* - * GPR0 = 80 * GPR0; - */ -static void -mult_gpr0_by_80(struct brw_context *brw) -{ - static const uint32_t maths[] = { - MI_MATH_ALU2(LOAD, SRCA, R0), - MI_MATH_ALU2(LOAD, SRCB, R0), - MI_MATH_ALU0(ADD), - MI_MATH_ALU2(STORE, R1, ACCU), - MI_MATH_ALU2(LOAD, SRCA, R1), - MI_MATH_ALU2(LOAD, SRCB, R1), - MI_MATH_ALU0(ADD), - MI_MATH_ALU2(STORE, R1, ACCU), - MI_MATH_ALU2(LOAD, SRCA, R1), - MI_MATH_ALU2(LOAD, SRCB, R1), - MI_MATH_ALU0(ADD), - MI_MATH_ALU2(STORE, R1, ACCU), - MI_MATH_ALU2(LOAD, SRCA, R1), - MI_MATH_ALU2(LOAD, SRCB, R1), - MI_MATH_ALU0(ADD), - /* GPR1 = 16 * GPR0 */ - MI_MATH_ALU2(STORE, R1, ACCU), - MI_MATH_ALU2(LOAD, SRCA, R1), - MI_MATH_ALU2(LOAD, SRCB, R1), - MI_MATH_ALU0(ADD), - MI_MATH_ALU2(STORE, R2, ACCU), - MI_MATH_ALU2(LOAD, SRCA, R2), - MI_MATH_ALU2(LOAD, SRCB, R2), - MI_MATH_ALU0(ADD), - /* GPR2 = 64 * GPR0 */ - MI_MATH_ALU2(STORE, R2, ACCU), - MI_MATH_ALU2(LOAD, SRCA, R1), - MI_MATH_ALU2(LOAD, SRCB, R2), - MI_MATH_ALU0(ADD), - /* GPR0 = 80 * GPR0 */ - MI_MATH_ALU2(STORE, R0, ACCU), - }; - - BEGIN_BATCH(1 + ARRAY_SIZE(maths)); - OUT_BATCH(HSW_MI_MATH | (1 + ARRAY_SIZE(maths) - 2)); - - for (int m = 0; m < ARRAY_SIZE(maths); m++) - OUT_BATCH(maths[m]); - - ADVANCE_BATCH(); -} - -/* - * GPR0 = GPR0 & ((1ull << n) - 1); - */ -static void -keep_gpr0_lower_n_bits(struct brw_context *brw, uint32_t n) -{ - static const uint32_t maths[] = { - MI_MATH_ALU2(LOAD, SRCA, R0), - MI_MATH_ALU2(LOAD, SRCB, R1), - MI_MATH_ALU0(AND), - MI_MATH_ALU2(STORE, R0, ACCU), - }; - - assert(n < 64); - brw_load_register_imm64(brw, HSW_CS_GPR(1), (1ull << n) - 1); - - BEGIN_BATCH(1 + ARRAY_SIZE(maths)); - OUT_BATCH(HSW_MI_MATH | (1 + ARRAY_SIZE(maths) - 2)); - - for (int m = 0; m < ARRAY_SIZE(maths); m++) - OUT_BATCH(maths[m]); - - ADVANCE_BATCH(); -} - -/* - * GPR0 = GPR0 << 30; - */ -static void -shl_gpr0_by_30_bits(struct brw_context *brw) -{ - /* First we mask 34 bits of GPR0 to prevent overflow */ - keep_gpr0_lower_n_bits(brw, 34); - - static const uint32_t shl_maths[] = { - MI_MATH_ALU2(LOAD, SRCA, R0), - MI_MATH_ALU2(LOAD, SRCB, R0), - MI_MATH_ALU0(ADD), - MI_MATH_ALU2(STORE, R0, ACCU), - }; - - const uint32_t outer_count = 5; - const uint32_t inner_count = 6; - STATIC_ASSERT(outer_count * inner_count == 30); - const uint32_t cmd_len = 1 + inner_count * ARRAY_SIZE(shl_maths); - const uint32_t batch_len = cmd_len * outer_count; - - BEGIN_BATCH(batch_len); - - /* We'll emit 5 commands, each shifting GPR0 left by 6 bits, for a total of - * 30 left shifts. - */ - for (int o = 0; o < outer_count; o++) { - /* Submit one MI_MATH to shift left by 6 bits */ - OUT_BATCH(HSW_MI_MATH | (cmd_len - 2)); - for (int i = 0; i < inner_count; i++) - for (int m = 0; m < ARRAY_SIZE(shl_maths); m++) - OUT_BATCH(shl_maths[m]); - } - - ADVANCE_BATCH(); -} - -/* - * GPR0 = GPR0 >> 2; - * - * Note that the upper 30 bits of GPR0 are lost! - */ -static void -shr_gpr0_by_2_bits(struct brw_context *brw) -{ - shl_gpr0_by_30_bits(brw); - brw_load_register_reg(brw, HSW_CS_GPR(0), HSW_CS_GPR(0) + 4); - brw_load_register_imm32(brw, HSW_CS_GPR(0) + 4, 0); -} - -/* - * GPR0 = (GPR0 == 0) ? 0 : 1; - */ -static void -gpr0_to_bool(struct brw_context *brw) -{ - static const uint32_t maths[] = { - MI_MATH_ALU2(LOAD, SRCA, R0), - MI_MATH_ALU1(LOAD0, SRCB), - MI_MATH_ALU0(ADD), - MI_MATH_ALU2(STOREINV, R0, ZF), - MI_MATH_ALU2(LOAD, SRCA, R0), - MI_MATH_ALU2(LOAD, SRCB, R1), - MI_MATH_ALU0(AND), - MI_MATH_ALU2(STORE, R0, ACCU), - }; - - brw_load_register_imm64(brw, HSW_CS_GPR(1), 1ull); - - BEGIN_BATCH(1 + ARRAY_SIZE(maths)); - OUT_BATCH(HSW_MI_MATH | (1 + ARRAY_SIZE(maths) - 2)); - - for (int m = 0; m < ARRAY_SIZE(maths); m++) - OUT_BATCH(maths[m]); - - ADVANCE_BATCH(); -} - -static void -load_overflow_data_to_cs_gprs(struct brw_context *brw, - struct brw_query_object *query, - int idx) -{ - int offset = idx * sizeof(uint64_t) * 4; - - brw_load_register_mem64(brw, HSW_CS_GPR(1), query->bo, offset); - - offset += sizeof(uint64_t); - brw_load_register_mem64(brw, HSW_CS_GPR(2), query->bo, offset); - - offset += sizeof(uint64_t); - brw_load_register_mem64(brw, HSW_CS_GPR(3), query->bo, offset); - - offset += sizeof(uint64_t); - brw_load_register_mem64(brw, HSW_CS_GPR(4), query->bo, offset); -} - -/* - * R3 = R4 - R3; - * R1 = R2 - R1; - * R1 = R3 - R1; - * R0 = R0 | R1; - */ -static void -calc_overflow_for_stream(struct brw_context *brw) -{ - static const uint32_t maths[] = { - MI_MATH_ALU2(LOAD, SRCA, R4), - MI_MATH_ALU2(LOAD, SRCB, R3), - MI_MATH_ALU0(SUB), - MI_MATH_ALU2(STORE, R3, ACCU), - MI_MATH_ALU2(LOAD, SRCA, R2), - MI_MATH_ALU2(LOAD, SRCB, R1), - MI_MATH_ALU0(SUB), - MI_MATH_ALU2(STORE, R1, ACCU), - MI_MATH_ALU2(LOAD, SRCA, R3), - MI_MATH_ALU2(LOAD, SRCB, R1), - MI_MATH_ALU0(SUB), - MI_MATH_ALU2(STORE, R1, ACCU), - MI_MATH_ALU2(LOAD, SRCA, R1), - MI_MATH_ALU2(LOAD, SRCB, R0), - MI_MATH_ALU0(OR), - MI_MATH_ALU2(STORE, R0, ACCU), - }; - - BEGIN_BATCH(1 + ARRAY_SIZE(maths)); - OUT_BATCH(HSW_MI_MATH | (1 + ARRAY_SIZE(maths) - 2)); - - for (int m = 0; m < ARRAY_SIZE(maths); m++) - OUT_BATCH(maths[m]); - - ADVANCE_BATCH(); -} - -static void -calc_overflow_to_gpr0(struct brw_context *brw, struct brw_query_object *query, - int count) -{ - brw_load_register_imm64(brw, HSW_CS_GPR(0), 0ull); - - for (int i = 0; i < count; i++) { - load_overflow_data_to_cs_gprs(brw, query, i); - calc_overflow_for_stream(brw); - } -} - -/* - * Take a query and calculate whether there was overflow during transform - * feedback. Store the result in the gpr0 register. - */ -void -hsw_overflow_result_to_gpr0(struct brw_context *brw, - struct brw_query_object *query, - int count) -{ - calc_overflow_to_gpr0(brw, query, count); - gpr0_to_bool(brw); -} - -static void -hsw_result_to_gpr0(struct gl_context *ctx, struct brw_query_object *query, - struct gl_buffer_object *buf, intptr_t offset, - GLenum pname, GLenum ptype) -{ - struct brw_context *brw = brw_context(ctx); - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - assert(query->bo); - assert(pname != GL_QUERY_TARGET); - - if (pname == GL_QUERY_RESULT_AVAILABLE) { - /* The query result availability is stored at offset 0 of the buffer. */ - brw_load_register_mem64(brw, - HSW_CS_GPR(0), - query->bo, - 2 * sizeof(uint64_t)); - return; - } - - if (pname == GL_QUERY_RESULT) { - /* Since GL_QUERY_RESULT_NO_WAIT wasn't used, they want us to stall to - * make sure the query is available. - */ - brw_emit_pipe_control_flush(brw, - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_STALL_AT_SCOREBOARD); - } - - if (query->Base.Target == GL_TIMESTAMP) { - brw_load_register_mem64(brw, - HSW_CS_GPR(0), - query->bo, - 0 * sizeof(uint64_t)); - } else if (query->Base.Target == GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB - || query->Base.Target == GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB) { - /* Don't do anything in advance here, since the math for this is a little - * more complex. - */ - } else { - brw_load_register_mem64(brw, - HSW_CS_GPR(1), - query->bo, - 0 * sizeof(uint64_t)); - brw_load_register_mem64(brw, - HSW_CS_GPR(2), - query->bo, - 1 * sizeof(uint64_t)); - - BEGIN_BATCH(5); - OUT_BATCH(HSW_MI_MATH | (5 - 2)); - - OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R2)); - OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R1)); - OUT_BATCH(MI_MATH_ALU0(SUB)); - OUT_BATCH(MI_MATH_ALU2(STORE, R0, ACCU)); - - ADVANCE_BATCH(); - } - - switch (query->Base.Target) { - case GL_FRAGMENT_SHADER_INVOCATIONS_ARB: - /* Implement the "WaDividePSInvocationCountBy4:HSW,BDW" workaround: - * "Invocation counter is 4 times actual. WA: SW to divide HW reported - * PS Invocations value by 4." - * - * Prior to Haswell, invocation count was counted by the WM, and it - * buggily counted invocations in units of subspans (2x2 unit). To get the - * correct value, the CS multiplied this by 4. With HSW the logic moved, - * and correctly emitted the number of pixel shader invocations, but, - * whomever forgot to undo the multiply by 4. - */ - if (devinfo->ver == 8 || devinfo->platform == INTEL_PLATFORM_HSW) - shr_gpr0_by_2_bits(brw); - break; - case GL_TIME_ELAPSED: - case GL_TIMESTAMP: - mult_gpr0_by_80(brw); - if (query->Base.Target == GL_TIMESTAMP) { - keep_gpr0_lower_n_bits(brw, 36); - } - break; - case GL_ANY_SAMPLES_PASSED: - case GL_ANY_SAMPLES_PASSED_CONSERVATIVE: - gpr0_to_bool(brw); - break; - case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB: - hsw_overflow_result_to_gpr0(brw, query, 1); - break; - case GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB: - hsw_overflow_result_to_gpr0(brw, query, MAX_VERTEX_STREAMS); - break; - } -} - -/* - * Store immediate data into the user buffer using the requested size. - */ -static void -store_query_result_imm(struct brw_context *brw, struct brw_bo *bo, - uint32_t offset, GLenum ptype, uint64_t imm) -{ - switch (ptype) { - case GL_INT: - case GL_UNSIGNED_INT: - brw_store_data_imm32(brw, bo, offset, imm); - break; - case GL_INT64_ARB: - case GL_UNSIGNED_INT64_ARB: - brw_store_data_imm64(brw, bo, offset, imm); - break; - default: - unreachable("Unexpected result type"); - } -} - -static void -set_predicate(struct brw_context *brw, struct brw_bo *query_bo) -{ - brw_load_register_imm64(brw, MI_PREDICATE_SRC1, 0ull); - - /* Load query availability into SRC0 */ - brw_load_register_mem64(brw, MI_PREDICATE_SRC0, query_bo, - 2 * sizeof(uint64_t)); - - /* predicate = !(query_availability == 0); */ - BEGIN_BATCH(1); - OUT_BATCH(GFX7_MI_PREDICATE | - MI_PREDICATE_LOADOP_LOADINV | - MI_PREDICATE_COMBINEOP_SET | - MI_PREDICATE_COMPAREOP_SRCS_EQUAL); - ADVANCE_BATCH(); -} - -/* - * Store data from the register into the user buffer using the requested size. - * The write also enables the predication to prevent writing the result if the - * query has not finished yet. - */ -static void -store_query_result_reg(struct brw_context *brw, struct brw_bo *bo, - uint32_t offset, GLenum ptype, uint32_t reg, - const bool pipelined) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - uint32_t cmd_size = devinfo->ver >= 8 ? 4 : 3; - uint32_t dwords = (ptype == GL_INT || ptype == GL_UNSIGNED_INT) ? 1 : 2; - assert(devinfo->ver >= 6); - - BEGIN_BATCH(dwords * cmd_size); - for (int i = 0; i < dwords; i++) { - OUT_BATCH(MI_STORE_REGISTER_MEM | - (pipelined ? MI_STORE_REGISTER_MEM_PREDICATE : 0) | - (cmd_size - 2)); - OUT_BATCH(reg + 4 * i); - if (devinfo->ver >= 8) { - OUT_RELOC64(bo, RELOC_WRITE, offset + 4 * i); - } else { - OUT_RELOC(bo, RELOC_WRITE | RELOC_NEEDS_GGTT, offset + 4 * i); - } - } - ADVANCE_BATCH(); -} - -static void -hsw_store_query_result(struct gl_context *ctx, struct gl_query_object *q, - struct gl_buffer_object *buf, intptr_t offset, - GLenum pname, GLenum ptype) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_query_object *query = (struct brw_query_object *)q; - struct brw_buffer_object *bo = brw_buffer_object(buf); - const bool pipelined = brw_is_query_pipelined(query); - - if (pname == GL_QUERY_TARGET) { - store_query_result_imm(brw, bo->buffer, offset, ptype, - query->Base.Target); - return; - } else if (pname == GL_QUERY_RESULT_AVAILABLE && !pipelined) { - store_query_result_imm(brw, bo->buffer, offset, ptype, 1ull); - } else if (query->bo) { - /* The query bo still around. Therefore, we: - * - * 1. Compute the current result in GPR0 - * 2. Set the command streamer predicate based on query availability - * 3. (With predication) Write GPR0 to the requested buffer - */ - hsw_result_to_gpr0(ctx, query, buf, offset, pname, ptype); - if (pipelined) - set_predicate(brw, query->bo); - store_query_result_reg(brw, bo->buffer, offset, ptype, HSW_CS_GPR(0), - pipelined); - } else { - /* The query bo is gone, so the query must have been processed into - * client memory. In this case we can fill the buffer location with the - * requested data using MI_STORE_DATA_IMM. - */ - switch (pname) { - case GL_QUERY_RESULT_AVAILABLE: - store_query_result_imm(brw, bo->buffer, offset, ptype, 1ull); - break; - case GL_QUERY_RESULT_NO_WAIT: - case GL_QUERY_RESULT: - store_query_result_imm(brw, bo->buffer, offset, ptype, - q->Result); - break; - default: - unreachable("Unexpected result type"); - } - } - -} - -/* Initialize hsw+-specific query object functions. */ -void hsw_init_queryobj_functions(struct dd_function_table *functions) -{ - gfx6_init_queryobj_functions(functions); - functions->StoreQueryResult = hsw_store_query_result; -} diff --git a/src/mesa/drivers/dri/i965/hsw_sol.c b/src/mesa/drivers/dri/i965/hsw_sol.c deleted file mode 100644 index 8d801e5..0000000 --- a/src/mesa/drivers/dri/i965/hsw_sol.c +++ /dev/null @@ -1,264 +0,0 @@ -/* - * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -/** - * An implementation of the transform feedback driver hooks for Haswell - * and later hardware. This uses MI_MATH to compute the number of vertices - * written (for use by DrawTransformFeedback()) without any CPU<->GPU - * synchronization which could stall. - */ - -#include "brw_context.h" -#include "brw_state.h" -#include "brw_defines.h" -#include "brw_batch.h" -#include "brw_buffer_objects.h" -#include "main/transformfeedback.h" - -/** - * We store several values in obj->prim_count_bo: - * - * [4x 32-bit values]: Final Number of Vertices Written - * [4x 32-bit values]: Tally of Primitives Written So Far - * [4x 64-bit values]: Starting SO_NUM_PRIMS_WRITTEN Counter Snapshots - * - * The first set of values is used by DrawTransformFeedback(), which - * copies one of them into the 3DPRIM_VERTEX_COUNT register and performs - * an indirect draw. The other values are just temporary storage. - */ - -#define TALLY_OFFSET (BRW_MAX_XFB_STREAMS * sizeof(uint32_t)) -#define START_OFFSET (TALLY_OFFSET * 2) - -/** - * Store the SO_NUM_PRIMS_WRITTEN counters for each stream (4 uint64_t values) - * to prim_count_bo. - */ -static void -save_prim_start_values(struct brw_context *brw, - struct brw_transform_feedback_object *obj) -{ - /* Flush any drawing so that the counters have the right values. */ - brw_emit_mi_flush(brw); - - /* Emit MI_STORE_REGISTER_MEM commands to write the values. */ - for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) { - brw_store_register_mem64(brw, obj->prim_count_bo, - GFX7_SO_NUM_PRIMS_WRITTEN(i), - START_OFFSET + i * sizeof(uint64_t)); - } -} - -/** - * Compute the number of primitives written during our most recent - * transform feedback activity (the current SO_NUM_PRIMS_WRITTEN value - * minus the stashed "start" value), and add it to our running tally. - * - * If \p finalize is true, also compute the number of vertices written - * (by multiplying by the number of vertices per primitive), and store - * that to the "final" location. - * - * Otherwise, just overwrite the old tally with the new one. - */ -static void -tally_prims_written(struct brw_context *brw, - struct brw_transform_feedback_object *obj, - bool finalize) -{ - /* Flush any drawing so that the counters have the right values. */ - brw_emit_mi_flush(brw); - - for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) { - /* GPR0 = Tally */ - brw_load_register_imm32(brw, HSW_CS_GPR(0) + 4, 0); - brw_load_register_mem(brw, HSW_CS_GPR(0), obj->prim_count_bo, - TALLY_OFFSET + i * sizeof(uint32_t)); - if (!obj->base.Paused) { - /* GPR1 = Start Snapshot */ - brw_load_register_mem64(brw, HSW_CS_GPR(1), obj->prim_count_bo, - START_OFFSET + i * sizeof(uint64_t)); - /* GPR2 = Ending Snapshot */ - brw_load_register_reg64(brw, HSW_CS_GPR(2), - GFX7_SO_NUM_PRIMS_WRITTEN(i)); - - BEGIN_BATCH(9); - OUT_BATCH(HSW_MI_MATH | (9 - 2)); - /* GPR1 = GPR2 (End) - GPR1 (Start) */ - OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R2)); - OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R1)); - OUT_BATCH(MI_MATH_ALU0(SUB)); - OUT_BATCH(MI_MATH_ALU2(STORE, R1, ACCU)); - /* GPR0 = GPR0 (Tally) + GPR1 (Diff) */ - OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R0)); - OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R1)); - OUT_BATCH(MI_MATH_ALU0(ADD)); - OUT_BATCH(MI_MATH_ALU2(STORE, R0, ACCU)); - ADVANCE_BATCH(); - } - - if (!finalize) { - /* Write back the new tally */ - brw_store_register_mem32(brw, obj->prim_count_bo, HSW_CS_GPR(0), - TALLY_OFFSET + i * sizeof(uint32_t)); - } else { - /* Convert the number of primitives to the number of vertices. */ - if (obj->primitive_mode == GL_LINES) { - /* Double R0 (R0 = R0 + R0) */ - BEGIN_BATCH(5); - OUT_BATCH(HSW_MI_MATH | (5 - 2)); - OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R0)); - OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R0)); - OUT_BATCH(MI_MATH_ALU0(ADD)); - OUT_BATCH(MI_MATH_ALU2(STORE, R0, ACCU)); - ADVANCE_BATCH(); - } else if (obj->primitive_mode == GL_TRIANGLES) { - /* Triple R0 (R1 = R0 + R0, R0 = R0 + R1) */ - BEGIN_BATCH(9); - OUT_BATCH(HSW_MI_MATH | (9 - 2)); - OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R0)); - OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R0)); - OUT_BATCH(MI_MATH_ALU0(ADD)); - OUT_BATCH(MI_MATH_ALU2(STORE, R1, ACCU)); - OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R0)); - OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R1)); - OUT_BATCH(MI_MATH_ALU0(ADD)); - OUT_BATCH(MI_MATH_ALU2(STORE, R0, ACCU)); - ADVANCE_BATCH(); - } - /* Store it to the final result */ - brw_store_register_mem32(brw, obj->prim_count_bo, HSW_CS_GPR(0), - i * sizeof(uint32_t)); - } - } -} - -/** - * BeginTransformFeedback() driver hook. - */ -void -hsw_begin_transform_feedback(struct gl_context *ctx, GLenum mode, - struct gl_transform_feedback_object *obj) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_transform_feedback_object *brw_obj = - (struct brw_transform_feedback_object *) obj; - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - brw_obj->primitive_mode = mode; - - /* Reset the SO buffer offsets to 0. */ - if (devinfo->ver >= 8) { - brw_obj->zero_offsets = true; - } else { - BEGIN_BATCH(1 + 2 * BRW_MAX_XFB_STREAMS); - OUT_BATCH(MI_LOAD_REGISTER_IMM | (1 + 2 * BRW_MAX_XFB_STREAMS - 2)); - for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) { - OUT_BATCH(GFX7_SO_WRITE_OFFSET(i)); - OUT_BATCH(0); - } - ADVANCE_BATCH(); - } - - /* Zero out the initial tallies */ - brw_store_data_imm64(brw, brw_obj->prim_count_bo, TALLY_OFFSET, 0ull); - brw_store_data_imm64(brw, brw_obj->prim_count_bo, TALLY_OFFSET + 8, 0ull); - - /* Store the new starting value of the SO_NUM_PRIMS_WRITTEN counters. */ - save_prim_start_values(brw, brw_obj); -} - -/** - * PauseTransformFeedback() driver hook. - */ -void -hsw_pause_transform_feedback(struct gl_context *ctx, - struct gl_transform_feedback_object *obj) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_transform_feedback_object *brw_obj = - (struct brw_transform_feedback_object *) obj; - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - if (devinfo->platform == INTEL_PLATFORM_HSW) { - /* Flush any drawing so that the counters have the right values. */ - brw_emit_mi_flush(brw); - - /* Save the SOL buffer offset register values. */ - for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) { - BEGIN_BATCH(3); - OUT_BATCH(MI_STORE_REGISTER_MEM | (3 - 2)); - OUT_BATCH(GFX7_SO_WRITE_OFFSET(i)); - OUT_RELOC(brw_obj->offset_bo, RELOC_WRITE, i * sizeof(uint32_t)); - ADVANCE_BATCH(); - } - } - - /* Add any primitives written to our tally */ - tally_prims_written(brw, brw_obj, false); -} - -/** - * ResumeTransformFeedback() driver hook. - */ -void -hsw_resume_transform_feedback(struct gl_context *ctx, - struct gl_transform_feedback_object *obj) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_transform_feedback_object *brw_obj = - (struct brw_transform_feedback_object *) obj; - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - if (devinfo->platform == INTEL_PLATFORM_HSW) { - /* Reload the SOL buffer offset registers. */ - for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) { - BEGIN_BATCH(3); - OUT_BATCH(GFX7_MI_LOAD_REGISTER_MEM | (3 - 2)); - OUT_BATCH(GFX7_SO_WRITE_OFFSET(i)); - OUT_RELOC(brw_obj->offset_bo, RELOC_WRITE, i * sizeof(uint32_t)); - ADVANCE_BATCH(); - } - } - - /* Store the new starting value of the SO_NUM_PRIMS_WRITTEN counters. */ - save_prim_start_values(brw, brw_obj); -} - -/** - * EndTransformFeedback() driver hook. - */ -void -hsw_end_transform_feedback(struct gl_context *ctx, - struct gl_transform_feedback_object *obj) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_transform_feedback_object *brw_obj = - (struct brw_transform_feedback_object *) obj; - - /* Add any primitives written to our tally, convert it from the number - * of primitives written to the number of vertices written, and store - * it in the "final" location in the buffer which DrawTransformFeedback() - * will use as the vertex count. - */ - tally_prims_written(brw, brw_obj, true); -} diff --git a/src/mesa/drivers/dri/i965/libdrm_macros.h b/src/mesa/drivers/dri/i965/libdrm_macros.h deleted file mode 100644 index 2cb76d5..0000000 --- a/src/mesa/drivers/dri/i965/libdrm_macros.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright © 2014 NVIDIA Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef LIBDRM_LIBDRM_H -#define LIBDRM_LIBDRM_H - -#include "util/macros.h" - - -#include - -#if defined(ANDROID) && !defined(__LP64__) -/* 32-bit needs mmap64 for 64-bit offsets */ -# define drm_mmap(addr, length, prot, flags, fd, offset) \ - mmap64(addr, length, prot, flags, fd, offset) - -# define drm_munmap(addr, length) \ - munmap(addr, length) - -#else - -/* assume large file support exists */ -# define drm_mmap(addr, length, prot, flags, fd, offset) \ - mmap(addr, length, prot, flags, fd, offset) - - -static inline int drm_munmap(void *addr, size_t length) -{ - /* Copied from configure code generated by AC_SYS_LARGEFILE */ -#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + \ - (((off_t) 1 << 31) << 31)) - STATIC_ASSERT(LARGE_OFF_T % 2147483629 == 721 && - LARGE_OFF_T % 2147483647 == 1); -#undef LARGE_OFF_T - - return munmap(addr, length); -} -#endif - -#endif diff --git a/src/mesa/drivers/dri/i965/meson.build b/src/mesa/drivers/dri/i965/meson.build deleted file mode 100644 index 25bcd1a..0000000 --- a/src/mesa/drivers/dri/i965/meson.build +++ /dev/null @@ -1,166 +0,0 @@ -# Copyright © 2017 Intel Corporation - -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. - -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -files_i965 = files( - 'brw_binding_tables.c', - 'brw_blorp.c', - 'brw_blorp.h', - 'brw_bufmgr.c', - 'brw_bufmgr.h', - 'brw_clear.c', - 'brw_clip.c', - 'brw_compute.c', - 'brw_conditional_render.c', - 'brw_context.c', - 'brw_context.h', - 'brw_cs.c', - 'brw_cs.h', - 'brw_curbe.c', - 'brw_defines.h', - 'brw_disk_cache.c', - 'brw_draw.c', - 'brw_draw.h', - 'brw_draw_upload.c', - 'brw_ff_gs.c', - 'brw_ff_gs.h', - 'brw_formatquery.c', - 'brw_generate_mipmap.c', - 'brw_gs.c', - 'brw_gs.h', - 'brw_gs_surface_state.c', - 'brw_link.cpp', - 'brw_meta_util.c', - 'brw_meta_util.h', - 'brw_misc_state.c', - 'brw_multisample_state.h', - 'brw_nir_uniforms.cpp', - 'brw_object_purgeable.c', - 'brw_pipe_control.c', - 'brw_performance_query.c', - 'brw_program.c', - 'brw_program.h', - 'brw_program_binary.c', - 'brw_program_cache.c', - 'brw_primitive_restart.c', - 'brw_queryobj.c', - 'brw_reset.c', - 'brw_sf.c', - 'brw_state.h', - 'brw_state_upload.c', - 'brw_structs.h', - 'brw_surface_formats.c', - 'brw_sync.c', - 'brw_tcs.c', - 'brw_tcs_surface_state.c', - 'brw_tes.c', - 'brw_tes_surface_state.c', - 'brw_urb.c', - 'brw_util.c', - 'brw_util.h', - 'brw_vs.c', - 'brw_vs.h', - 'brw_vs_surface_state.c', - 'brw_wm.c', - 'brw_wm.h', - 'brw_wm_surface_state.c', - 'gfx4_blorp_exec.h', - 'gfx6_clip_state.c', - 'gfx6_constant_state.c', - 'gfx6_multisample_state.c', - 'gfx6_queryobj.c', - 'gfx6_sampler_state.c', - 'gfx6_sol.c', - 'gfx6_urb.c', - 'gfx7_l3_state.c', - 'gfx7_sol_state.c', - 'gfx7_urb.c', - 'gfx8_depth_state.c', - 'gfx8_multisample_state.c', - 'hsw_queryobj.c', - 'hsw_sol.c', - 'brw_batch.c', - 'brw_batch.h', - 'brw_blit.c', - 'brw_blit.h', - 'brw_buffer_objects.c', - 'brw_buffer_objects.h', - 'brw_buffers.c', - 'brw_buffers.h', - 'brw_copy_image.c', - 'brw_extensions.c', - 'brw_fbo.c', - 'brw_fbo.h', - 'brw_image.h', - 'brw_mipmap_tree.c', - 'brw_mipmap_tree.h', - 'brw_pixel_bitmap.c', - 'brw_pixel.c', - 'brw_pixel_copy.c', - 'brw_pixel_draw.c', - 'brw_pixel.h', - 'brw_pixel_read.c', - 'brw_screen.c', - 'brw_screen.h', - 'brw_state.c', - 'brw_tex.c', - 'brw_tex_copy.c', - 'brw_tex.h', - 'brw_tex_image.c', - 'brw_tex_obj.h', - 'brw_tex_validate.c', - 'brw_upload.c', - 'libdrm_macros.h', -) - -i965_per_hw_ver_libs = [] -foreach v : ['40', '45', '50', '60', '70', '75', '80', '90', '110'] - i965_per_hw_ver_libs += static_library( - 'i965_per_hw_ver@0@'.format(v), - ['genX_blorp_exec.c', 'genX_boilerplate.h', 'genX_pipe_control.c', - 'genX_state_upload.c', gen_xml_pack], - include_directories : [inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_intel, inc_dri_common], - c_args : [ - no_override_init_args, c_sse2_args, - '-DGFX_VERx10=@0@'.format(v), - ], - gnu_symbol_visibility : 'hidden', - dependencies : [dep_libdrm, idep_nir_headers, idep_mesautil], - ) -endforeach - - -libi965 = static_library( - 'i965', - [files_i965, ir_expression_operation_h], - include_directories : [ - inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_intel, inc_dri_common, - ], - c_args : [no_override_init_args, c_sse2_args], - cpp_args : [c_sse2_args], - gnu_symbol_visibility : 'hidden', - link_with : [ - i965_per_hw_ver_libs, libintel_dev, libisl, libintel_compiler, libblorp, - libintel_perf - ], - dependencies : [ - dep_libdrm, dep_valgrind, idep_libintel_common, idep_nir_headers, idep_genxml, - idep_xmlconfig, - ], -) diff --git a/src/mesa/drivers/dri/meson.build b/src/mesa/drivers/dri/meson.build index f6efd96..cea38dc 100644 --- a/src/mesa/drivers/dri/meson.build +++ b/src/mesa/drivers/dri/meson.build @@ -22,11 +22,6 @@ subdir('common') _dri_drivers = [] _dri_link = [] -if with_dri_i965 - subdir('i965') - _dri_drivers += libi965 - _dri_link += 'i965_dri.so' -endif if _dri_drivers != [] libmesa_dri_drivers = shared_library( -- 2.7.4