-Wno-error=uninitialized
CPP_ARGS: >
-Wno-error=array-bounds
- DRI_DRIVERS: "i965"
DRI_LOADERS: >
-D glx=dri
-D gbm=enabled
-Wno-error=unused-variable
DRI_LOADERS: >
-D glvnd=true
- DRI_DRIVERS: "auto"
GALLIUM_DRIVERS: "iris,nouveau,kmsro,r300,r600,freedreno,swr,swrast,svga,v3d,vc4,virgl,etnaviv,panfrost,lima,zink,radeonsi,tegra,d3d12,crocus"
VULKAN_DRIVERS: intel,amd,freedreno,broadcom,virtio-experimental
CC: clang
CROSS: i386
VULKAN_DRIVERS: intel,amd,swrast,virtio-experimental
GALLIUM_DRIVERS: "iris,nouveau,r300,r600,radeonsi,swrast,virgl,zink,crocus"
- DRI_DRIVERS: "i965"
EXTRA_OPTION: >
-D vulkan-layers=device-select,overlay
-Wno-error=format
-Wno-error=format-extra-args
CPP_ARGS: $C_ARGS
- DRI_DRIVERS: ""
GALLIUM_DRIVERS: "swrast"
EXTRA_OPTION: >
-Dllvm=disabled
-D cpp_args="$(echo -n $CPP_ARGS)" \
-D libunwind=${UNWIND} \
${DRI_LOADERS} \
- -D dri-drivers=${DRI_DRIVERS:-[]} \
${GALLIUM_ST} \
-D gallium-drivers=${GALLIUM_DRIVERS:-[]} \
-D vulkan-drivers=${VULKAN_DRIVERS:-[]} \
+++ /dev/null
-#ifndef PREFER_CROCUS
-CHIPSET(0x29A2, i965, "BW", "Intel(R) 965G")
-CHIPSET(0x2992, i965, "BW", "Intel(R) 965Q")
-CHIPSET(0x2982, i965, "BW", "Intel(R) 965G")
-CHIPSET(0x2972, i965, "BW", "Intel(R) 946GZ")
-CHIPSET(0x2A02, i965, "CL", "Intel(R) 965GM")
-CHIPSET(0x2A12, i965, "CL", "Intel(R) 965GME/GLE")
-
-CHIPSET(0x2A42, g4x, "CTG", "Mobile Intel® GM45 Express Chipset")
-CHIPSET(0x2E02, g4x, "ELK", "Intel(R) Integrated Graphics Device")
-CHIPSET(0x2E12, g4x, "ELK", "Intel(R) Q45/Q43")
-CHIPSET(0x2E22, g4x, "ELK", "Intel(R) G45/G43")
-CHIPSET(0x2E32, g4x, "ELK", "Intel(R) G41")
-CHIPSET(0x2E42, g4x, "ELK", "Intel(R) B43")
-CHIPSET(0x2E92, g4x, "ELK", "Intel(R) B43")
-
-CHIPSET(0x0042, ilk, "ILK", "Intel(R) HD Graphics")
-CHIPSET(0x0046, ilk, "ILK", "Intel(R) HD Graphics")
-
-CHIPSET(0x0102, snb_gt1, "SNB GT1", "Intel(R) HD Graphics 2000")
-CHIPSET(0x0112, snb_gt2, "SNB GT2", "Intel(R) HD Graphics 3000")
-CHIPSET(0x0122, snb_gt2, "SNB GT2", "Intel(R) HD Graphics 3000")
-CHIPSET(0x0106, snb_gt1, "SNB GT1", "Intel(R) HD Graphics 2000")
-CHIPSET(0x0116, snb_gt2, "SNB GT2", "Intel(R) HD Graphics 3000")
-CHIPSET(0x0126, snb_gt2, "SNB GT2", "Intel(R) HD Graphics 3000")
-CHIPSET(0x010A, snb_gt1, "SNB GT1", "Intel(R) HD Graphics 2000")
-
-CHIPSET(0x0152, ivb_gt1, "IVB GT1", "Intel(R) HD Graphics 2500")
-CHIPSET(0x0162, ivb_gt2, "IVB GT2", "Intel(R) HD Graphics 4000")
-CHIPSET(0x0156, ivb_gt1, "IVB GT1", "Intel(R) HD Graphics 2500")
-CHIPSET(0x0166, ivb_gt2, "IVB GT2", "Intel(R) HD Graphics 4000")
-CHIPSET(0x015a, ivb_gt1, "IVB GT1", "Intel(R) HD Graphics")
-CHIPSET(0x016a, ivb_gt2, "IVB GT2", "Intel(R) HD Graphics P4000")
-
-CHIPSET(0x0402, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
-CHIPSET(0x0412, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics 4600")
-CHIPSET(0x0422, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
-CHIPSET(0x0406, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
-CHIPSET(0x0416, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics 4600")
-CHIPSET(0x0426, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
-CHIPSET(0x040A, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
-CHIPSET(0x041A, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics P4600/P4700")
-CHIPSET(0x042A, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
-CHIPSET(0x040B, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
-CHIPSET(0x041B, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics")
-CHIPSET(0x042B, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
-CHIPSET(0x040E, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
-CHIPSET(0x041E, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics 4400")
-CHIPSET(0x042E, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
-CHIPSET(0x0C02, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
-CHIPSET(0x0C12, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics")
-CHIPSET(0x0C22, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
-CHIPSET(0x0C06, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
-CHIPSET(0x0C16, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics")
-CHIPSET(0x0C26, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
-CHIPSET(0x0C0A, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
-CHIPSET(0x0C1A, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics")
-CHIPSET(0x0C2A, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
-CHIPSET(0x0C0B, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
-CHIPSET(0x0C1B, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics")
-CHIPSET(0x0C2B, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
-CHIPSET(0x0C0E, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
-CHIPSET(0x0C1E, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics")
-CHIPSET(0x0C2E, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
-CHIPSET(0x0A02, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
-CHIPSET(0x0A12, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics")
-CHIPSET(0x0A22, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
-CHIPSET(0x0A06, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
-CHIPSET(0x0A16, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics 4400")
-CHIPSET(0x0A26, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics 5000")
-CHIPSET(0x0A0A, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
-CHIPSET(0x0A1A, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics")
-CHIPSET(0x0A2A, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
-CHIPSET(0x0A0B, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
-CHIPSET(0x0A1B, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics")
-CHIPSET(0x0A2B, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
-CHIPSET(0x0A0E, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
-CHIPSET(0x0A1E, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics 4200")
-CHIPSET(0x0A2E, hsw_gt3, "HSW GT3", "Intel(R) Iris(R) Graphics 5100")
-CHIPSET(0x0D02, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
-CHIPSET(0x0D12, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics 4600")
-CHIPSET(0x0D22, hsw_gt3, "HSW GT3", "Intel(R) Iris(R) Pro Graphics 5200")
-CHIPSET(0x0D06, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
-CHIPSET(0x0D16, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics")
-CHIPSET(0x0D26, hsw_gt3, "HSW GT3", "Intel(R) Iris(R) Pro Graphics P5200")
-CHIPSET(0x0D0A, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
-CHIPSET(0x0D1A, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics")
-CHIPSET(0x0D2A, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
-CHIPSET(0x0D0B, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
-CHIPSET(0x0D1B, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics")
-CHIPSET(0x0D2B, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
-CHIPSET(0x0D0E, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
-CHIPSET(0x0D1E, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics")
-CHIPSET(0x0D2E, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
-
-CHIPSET(0x0F31, byt, "BYT", "Intel(R) HD Graphics")
-CHIPSET(0x0F32, byt, "BYT", "Intel(R) HD Graphics")
-CHIPSET(0x0F33, byt, "BYT", "Intel(R) HD Graphics")
-CHIPSET(0x0157, byt, "BYT", "Intel(R) HD Graphics")
-CHIPSET(0x0155, byt, "BYT", "Intel(R) HD Graphics")
-
-CHIPSET(0x22B0, chv, "CHV", "Intel(R) HD Graphics")
-CHIPSET(0x22B1, chv, "BSW", "Intel(R) HD Graphics XXX") /* Overridden in brw_get_renderer_string */
-CHIPSET(0x22B2, chv, "CHV", "Intel(R) HD Graphics")
-CHIPSET(0x22B3, chv, "CHV", "Intel(R) HD Graphics")
-#endif
-
-#ifndef PREFER_IRIS
-CHIPSET(0x1602, bdw_gt1, "BDW GT1", "Intel(R) HD Graphics")
-CHIPSET(0x1606, bdw_gt1, "BDW GT1", "Intel(R) HD Graphics")
-CHIPSET(0x160A, bdw_gt1, "BDW GT1", "Intel(R) HD Graphics")
-CHIPSET(0x160B, bdw_gt1, "BDW GT1", "Intel(R) HD Graphics")
-CHIPSET(0x160D, bdw_gt1, "BDW GT1", "Intel(R) HD Graphics")
-CHIPSET(0x160E, bdw_gt1, "BDW GT1", "Intel(R) HD Graphics")
-CHIPSET(0x1612, bdw_gt2, "BDW GT2", "Intel(R) HD Graphics 5600")
-CHIPSET(0x1616, bdw_gt2, "BDW GT2", "Intel(R) HD Graphics 5500")
-CHIPSET(0x161A, bdw_gt2, "BDW GT2", "Intel(R) HD Graphics P5700")
-CHIPSET(0x161B, bdw_gt2, "BDW GT2", "Intel(R) HD Graphics")
-CHIPSET(0x161D, bdw_gt2, "BDW GT2", "Intel(R) HD Graphics")
-CHIPSET(0x161E, bdw_gt2, "BDW GT2", "Intel(R) HD Graphics 5300")
-CHIPSET(0x1622, bdw_gt3, "BDW GT3", "Intel(R) Iris(R) Pro Graphics 6200")
-CHIPSET(0x1626, bdw_gt3, "BDW GT3", "Intel(R) HD Graphics 6000")
-CHIPSET(0x162A, bdw_gt3, "BDW GT3", "Intel(R) Iris(R) Pro Graphics P6300")
-CHIPSET(0x162B, bdw_gt3, "BDW GT3", "Intel(R) Iris(R) Graphics 6100")
-CHIPSET(0x162D, bdw_gt3, "BDW GT3", "Intel(R) HD Graphics")
-CHIPSET(0x162E, bdw_gt3, "BDW GT3", "Intel(R) HD Graphics")
-
-CHIPSET(0x1902, skl_gt1, "SKL GT1", "Intel(R) HD Graphics 510")
-CHIPSET(0x1906, skl_gt1, "SKL GT1", "Intel(R) HD Graphics 510")
-CHIPSET(0x190A, skl_gt1, "SKL GT1", "Intel(R) HD Graphics")
-CHIPSET(0x190B, skl_gt1, "SKL GT1", "Intel(R) HD Graphics 510")
-CHIPSET(0x190E, skl_gt1, "SKL GT1", "Intel(R) HD Graphics")
-CHIPSET(0x1912, skl_gt2, "SKL GT2", "Intel(R) HD Graphics 530")
-CHIPSET(0x1913, skl_gt2, "SKL GT2F", "Intel(R) HD Graphics")
-CHIPSET(0x1915, skl_gt2, "SKL GT2F", "Intel(R) HD Graphics")
-CHIPSET(0x1916, skl_gt2, "SKL GT2", "Intel(R) HD Graphics 520")
-CHIPSET(0x1917, skl_gt2, "SKL GT2F", "Intel(R) HD Graphics")
-CHIPSET(0x191A, skl_gt2, "SKL GT2", "Intel(R) HD Graphics")
-CHIPSET(0x191B, skl_gt2, "SKL GT2", "Intel(R) HD Graphics 530")
-CHIPSET(0x191D, skl_gt2, "SKL GT2", "Intel(R) HD Graphics P530")
-CHIPSET(0x191E, skl_gt2, "SKL GT2", "Intel(R) HD Graphics 515")
-CHIPSET(0x1921, skl_gt2, "SKL GT2", "Intel(R) HD Graphics 520")
-CHIPSET(0x1923, skl_gt3, "SKL GT3", "Intel(R) HD Graphics 535")
-CHIPSET(0x1926, skl_gt3, "SKL GT3", "Intel(R) Iris(R) Graphics 540")
-CHIPSET(0x1927, skl_gt3, "SKL GT3", "Intel(R) Iris(R) Graphics 550")
-CHIPSET(0x192A, skl_gt4, "SKL GT4", "Intel(R) HD Graphics")
-CHIPSET(0x192B, skl_gt3, "SKL GT3", "Intel(R) Iris(R) Graphics 555")
-CHIPSET(0x192D, skl_gt3, "SKL GT3", "Intel(R) Iris(R) Graphics P555")
-CHIPSET(0x1932, skl_gt4, "SKL GT4", "Intel(R) Iris(R) Pro Graphics 580")
-CHIPSET(0x193A, skl_gt4, "SKL GT4", "Intel(R) Iris(R) Pro Graphics P580")
-CHIPSET(0x193B, skl_gt4, "SKL GT4", "Intel(R) Iris(R) Pro Graphics 580")
-CHIPSET(0x193D, skl_gt4, "SKL GT4", "Intel(R) Iris(R) Pro Graphics P580")
-
-CHIPSET(0x0A84, bxt, "BXT 3", "Intel(R) HD Graphics")
-CHIPSET(0x1A84, bxt, "BXT 3", "Intel(R) HD Graphics")
-CHIPSET(0x1A85, bxt_2x6, "BXT 2", "Intel(R) HD Graphics")
-CHIPSET(0x5A84, bxt, "APL 3", "Intel(R) HD Graphics 505")
-CHIPSET(0x5A85, bxt_2x6, "APL 2", "Intel(R) HD Graphics 500")
-
-CHIPSET(0x3184, glk, "GLK 3", "Intel(R) UHD Graphics 605")
-CHIPSET(0x3185, glk_2x6, "GLK 2", "Intel(R) UHD Graphics 600")
-
-CHIPSET(0x5902, kbl_gt1, "KBL GT1", "Intel(R) HD Graphics 610")
-CHIPSET(0x5906, kbl_gt1, "KBL GT1", "Intel(R) HD Graphics 610")
-CHIPSET(0x590A, kbl_gt1, "KBL GT1", "Intel(R) HD Graphics")
-CHIPSET(0x5908, kbl_gt1, "KBL GT1", "Intel(R) HD Graphics")
-CHIPSET(0x590B, kbl_gt1, "KBL GT1", "Intel(R) HD Graphics 610")
-CHIPSET(0x590E, kbl_gt1, "KBL GT1", "Intel(R) HD Graphics")
-CHIPSET(0x5913, kbl_gt1_5, "KBL GT1.5", "Intel(R) HD Graphics")
-CHIPSET(0x5915, kbl_gt1_5, "KBL GT1.5", "Intel(R) HD Graphics")
-CHIPSET(0x5917, kbl_gt2, "KBL GT2", "Intel(R) UHD Graphics 620")
-CHIPSET(0x5912, kbl_gt2, "KBL GT2", "Intel(R) HD Graphics 630")
-CHIPSET(0x5916, kbl_gt2, "KBL GT2", "Intel(R) HD Graphics 620")
-CHIPSET(0x591A, kbl_gt2, "KBL GT2", "Intel(R) HD Graphics P630")
-CHIPSET(0x591B, kbl_gt2, "KBL GT2", "Intel(R) HD Graphics 630")
-CHIPSET(0x591D, kbl_gt2, "KBL GT2", "Intel(R) HD Graphics P630")
-CHIPSET(0x591E, kbl_gt2, "KBL GT2", "Intel(R) HD Graphics 615")
-CHIPSET(0x5921, kbl_gt2, "KBL GT2F", "Intel(R) HD Graphics 620")
-CHIPSET(0x5923, kbl_gt3, "KBL GT3", "Intel(R) HD Graphics 635")
-CHIPSET(0x5926, kbl_gt3, "KBL GT3", "Intel(R) Iris(R) Plus Graphics 640 (Kaby Lake GT3e)")
-CHIPSET(0x5927, kbl_gt3, "KBL GT3", "Intel(R) Iris(R) Plus Graphics 650 (Kaby Lake GT3e)")
-CHIPSET(0x593B, kbl_gt4, "KBL GT4", "Intel(R) HD Graphics")
-
-CHIPSET(0x591C, kbl_gt2, "AML-KBL", "Intel(R) UHD Graphics 615")
-CHIPSET(0x87C0, kbl_gt2, "AML-KBL", "Intel(R) UHD Graphics 617")
-
-CHIPSET(0x87CA, cfl_gt2, "AML-CFL", "Intel(R) UHD Graphics")
-
-CHIPSET(0x3E90, cfl_gt1, "CFL GT1", "Intel(R) UHD Graphics 610")
-CHIPSET(0x3E93, cfl_gt1, "CFL GT1", "Intel(R) UHD Graphics 610")
-CHIPSET(0x3E99, cfl_gt1, "CFL GT1", "Intel(R) UHD Graphics 610")
-CHIPSET(0x3E9C, cfl_gt1, "CFL GT1", "Intel(R) UHD Graphics 610")
-CHIPSET(0x3E91, cfl_gt2, "CFL GT2", "Intel(R) UHD Graphics 630")
-CHIPSET(0x3E92, cfl_gt2, "CFL GT2", "Intel(R) UHD Graphics 630")
-CHIPSET(0x3E96, cfl_gt2, "CFL GT2", "Intel(R) UHD Graphics P630")
-CHIPSET(0x3E98, cfl_gt2, "CFL GT2", "Intel(R) UHD Graphics 630")
-CHIPSET(0x3E9A, cfl_gt2, "CFL GT2", "Intel(R) UHD Graphics P630")
-CHIPSET(0x3E9B, cfl_gt2, "CFL GT2", "Intel(R) UHD Graphics 630")
-CHIPSET(0x3E94, cfl_gt2, "CFL GT2", "Intel(R) UHD Graphics P630")
-CHIPSET(0x3EA9, cfl_gt2, "CFL GT2", "Intel(R) UHD Graphics 620")
-CHIPSET(0x3EA5, cfl_gt3, "CFL GT3", "Intel(R) Iris(R) Plus Graphics 655")
-CHIPSET(0x3EA6, cfl_gt3, "CFL GT3", "Intel(R) Iris(R) Plus Graphics 645")
-CHIPSET(0x3EA7, cfl_gt3, "CFL GT3", "Intel(R) HD Graphics")
-CHIPSET(0x3EA8, cfl_gt3, "CFL GT3", "Intel(R) Iris(R) Plus Graphics 655")
-
-CHIPSET(0x3EA1, cfl_gt1, "WHL GT1", "Intel(R) UHD Graphics 610")
-CHIPSET(0x3EA4, cfl_gt1, "WHL GT1", "Intel(R) UHD Graphics")
-CHIPSET(0x3EA0, cfl_gt2, "WHL GT2", "Intel(R) UHD Graphics 620")
-CHIPSET(0x3EA3, cfl_gt2, "WHL GT2", "Intel(R) UHD Graphics")
-CHIPSET(0x3EA2, cfl_gt3, "WHL GT3", "Intel(R) UHD Graphics")
-
-CHIPSET(0x9B21, cfl_gt1, "CML GT1", "Intel(R) UHD Graphics")
-CHIPSET(0x9BA0, cfl_gt1, "CML GT1", "Intel(R) UHD Graphics")
-CHIPSET(0x9BA2, cfl_gt1, "CML GT1", "Intel(R) UHD Graphics")
-CHIPSET(0x9BA4, cfl_gt1, "CML GT1", "Intel(R) UHD Graphics")
-CHIPSET(0x9BA5, cfl_gt1, "CML GT1", "Intel(R) UHD Graphics 610")
-CHIPSET(0x9BA8, cfl_gt1, "CML GT1", "Intel(R) UHD Graphics 610")
-CHIPSET(0x9BAA, cfl_gt1, "CML GT1", "Intel(R) UHD Graphics")
-CHIPSET(0x9BAB, cfl_gt1, "CML GT1", "Intel(R) UHD Graphics")
-CHIPSET(0x9BAC, cfl_gt1, "CML GT1", "Intel(R) UHD Graphics")
-CHIPSET(0x9B41, cfl_gt2, "CML GT2", "Intel(R) UHD Graphics")
-CHIPSET(0x9BC0, cfl_gt2, "CML GT2", "Intel(R) UHD Graphics")
-CHIPSET(0x9BC2, cfl_gt2, "CML GT2", "Intel(R) UHD Graphics")
-CHIPSET(0x9BC4, cfl_gt2, "CML GT2", "Intel(R) UHD Graphics")
-CHIPSET(0x9BC5, cfl_gt2, "CML GT2", "Intel(R) UHD Graphics 630")
-CHIPSET(0x9BC6, cfl_gt2, "CML GT2", "Intel(R) UHD Graphics P630")
-CHIPSET(0x9BC8, cfl_gt2, "CML GT2", "Intel(R) UHD Graphics 630")
-CHIPSET(0x9BCA, cfl_gt2, "CML GT2", "Intel(R) UHD Graphics")
-CHIPSET(0x9BCB, cfl_gt2, "CML GT2", "Intel(R) UHD Graphics")
-CHIPSET(0x9BCC, cfl_gt2, "CML GT2", "Intel(R) UHD Graphics")
-CHIPSET(0x9BE6, cfl_gt2, "CML GT2", "Intel(R) UHD Graphics P630")
-CHIPSET(0x9BF6, cfl_gt2, "CML GT2", "Intel(R) UHD Graphics P630")
-
-CHIPSET(0x8A50, icl_gt2, "ICL GT2", "Intel(R) HD Graphics")
-CHIPSET(0x8A51, icl_gt2, "ICL GT2", "Intel(R) Iris(R) Plus Graphics")
-CHIPSET(0x8A52, icl_gt2, "ICL GT2", "Intel(R) Iris(R) Plus Graphics")
-CHIPSET(0x8A53, icl_gt2, "ICL GT2", "Intel(R) Iris(R) Plus Graphics")
-CHIPSET(0x8A54, icl_gt1_5, "ICL GT1.5", "Intel(R) Iris(R) Plus Graphics")
-CHIPSET(0x8A56, icl_gt1, "ICL GT1", "Intel(R) UHD Graphics")
-CHIPSET(0x8A57, icl_gt1_5, "ICL GT1.5", "Intel(R) HD Graphics")
-CHIPSET(0x8A58, icl_gt1, "ICL GT1", "Intel(R) UHD Graphics")
-CHIPSET(0x8A59, icl_gt1_5, "ICL GT1.5", "Intel(R) HD Graphics")
-CHIPSET(0x8A5A, icl_gt1_5, "ICL GT1.5", "Intel(R) Iris(R) Plus Graphics")
-CHIPSET(0x8A5B, icl_gt1, "ICL GT1", "Intel(R) HD Graphics")
-CHIPSET(0x8A5C, icl_gt1_5, "ICL GT1.5", "Intel(R) Iris(R) Plus Graphics")
-CHIPSET(0x8A5D, icl_gt1, "ICL GT1", "Intel(R) HD Graphics")
-CHIPSET(0x8A71, icl_gt0_5, "ICL GT0.5", "Intel(R) HD Graphics")
-
-CHIPSET(0x4500, ehl_4x8, "EHL", "Intel(R) UHD Graphics")
-CHIPSET(0x4541, ehl_2x4, "EHL", "Intel(R) UHD Graphics")
-CHIPSET(0x4551, ehl_4x4, "EHL", "Intel(R) UHD Graphics")
-CHIPSET(0x4555, ehl_2x8, "EHL", "Intel(R) UHD Graphics")
-CHIPSET(0x4557, ehl_4x5, "EHL", "Intel(R) UHD Graphics")
-CHIPSET(0x4571, ehl_4x8, "EHL", "Intel(R) UHD Graphics")
-CHIPSET(0x4E51, ehl_4x4, "JSL", "Intel(R) UHD Graphics")
-CHIPSET(0x4E55, ehl_2x8, "JSL", "Intel(R) UHD Graphics")
-CHIPSET(0x4E57, ehl_4x5, "JSL", "Intel(R) UHD Graphics")
-CHIPSET(0x4E61, ehl_4x6, "JSL", "Intel(R) UHD Graphics")
-CHIPSET(0x4E71, ehl_4x8, "JSL", "Intel(R) UHD Graphics")
-#endif
system_has_kms_drm = ['openbsd', 'netbsd', 'freebsd', 'gnu/kfreebsd', 'dragonfly', 'linux', 'sunos'].contains(host_machine.system())
dri_drivers = get_option('dri-drivers')
-if dri_drivers.contains('auto')
- if system_has_kms_drm
- # TODO: PPC, Sparc
- if ['x86', 'x86_64'].contains(host_machine.cpu_family())
- dri_drivers = ['i965']
- elif ['arm', 'aarch64', 'mips', 'mips64'].contains(host_machine.cpu_family())
- dri_drivers = []
- else
- error('Unknown architecture @0@. Please pass -Ddri-drivers to set driver options. Patches gladly accepted to fix this.'.format(
- host_machine.cpu_family()))
- endif
- elif ['darwin', 'windows', 'cygwin', 'haiku'].contains(host_machine.system())
- # only swrast would make sense here, but gallium swrast is a much better default
- dri_drivers = []
- else
- error('Unknown OS @0@. Please pass -Ddri-drivers to set driver options. Patches gladly accepted to fix this.'.format(
- host_machine.system()))
- endif
+if dri_drivers.length() != 0
+ error('Mesa\'s main branch no longer has any "classic" drivers, use the "amber" branch instead.')
endif
-with_dri_i965 = dri_drivers.contains('i965')
-
with_dri = dri_drivers.length() != 0
gallium_drivers = get_option('gallium-drivers')
if ['x86', 'x86_64'].contains(host_machine.cpu_family())
gallium_drivers = [
'r300', 'r600', 'radeonsi', 'nouveau', 'virgl', 'svga', 'swrast',
- 'iris', 'crocus'
+ 'iris', 'crocus', 'i915'
]
elif ['arm', 'aarch64'].contains(host_machine.cpu_family())
gallium_drivers = [
with_any_vk = _vulkan_drivers.length() != 0
with_any_broadcom = with_gallium_vc4 or with_gallium_v3d or with_broadcom_vk
-with_any_intel = with_dri_i965 or with_intel_vk or with_gallium_iris or with_gallium_crocus
+with_any_intel = with_intel_vk or with_gallium_iris or with_gallium_crocus
if with_swrast_vk and not with_gallium_softpipe
error('swrast vulkan requires gallium swrast')
pre_args += '-DHAVE_DL_ITERATE_PHDR'
elif with_intel_vk
error('Intel "Anvil" Vulkan driver requires the dl_iterate_phdr function')
-elif with_dri_i965 and with_shader_cache
- error('Intel i965 GL driver requires dl_iterate_phdr when built with shader caching.')
endif
# Determine whether or not the rt library is needed for time functions
option(
'dri-drivers',
type : 'array',
- value : ['auto'],
- choices : ['auto', 'i965'],
- description : 'List of dri drivers to build. If this is set to auto all drivers applicable to the target OS/architecture will be built'
+ description : 'DEPRECATED: List of dri drivers to build. If this is set to auto all drivers applicable to the target OS/architecture will be built'
)
option(
'dri-drivers-path',
value : true,
description : 'Enable direct rendering in GLX and EGL for DRI',
)
-option(
- 'prefer-iris',
- type : 'boolean',
- value : true,
- description : 'Prefer new Intel iris driver over older i965 driver'
-)
-option(
- 'prefer-crocus',
- type : 'boolean',
- value : false,
- description : 'Prefer new crocus driver over older i965 driver for gen4-7'
-)
option('egl-lib-suffix',
type : 'string',
value : '',
#undef CHIPSET
#define CHIPSET(id, family, fam_str, name) \
case id: *devinfo = intel_device_info_##family; break;
-#include "pci_ids/i965_pci_ids.h"
+#include "pci_ids/crocus_pci_ids.h"
#include "pci_ids/iris_pci_ids.h"
#undef CHIPSET
sizeof(devinfo->name)); \
strncpy(devinfo->name, _name " (" _fam_str ")", sizeof(devinfo->name)); \
break;
-#include "pci_ids/i965_pci_ids.h"
+#include "pci_ids/crocus_pci_ids.h"
#include "pci_ids/iris_pci_ids.h"
default:
strncpy(devinfo->name, "Intel Unknown", sizeof(devinfo->name));
} chipsets[] = {
#undef CHIPSET
#define CHIPSET(id, family, family_str, str_name) { .pci_id = id, .name = str_name, },
-#include "pci_ids/crocus_pci_ids.h"
-#include "pci_ids/i965_pci_ids.h"
#include "pci_ids/iris_pci_ids.h"
-#undef CHIPSET
-#define CHIPSET(id, fam_str, str_name) { .pci_id = id, .name = str_name, },
-#include "pci_ids/i915_pci_ids.h"
-#undef CHIPSET
+#include "pci_ids/crocus_pci_ids.h"
};
for (uint32_t i = 0; i < ARRAY_SIZE(chipsets); i++) {
'-DUSE_DRICONF',
'-DDEFAULT_DRIVER_DIR="@0@"'.format(dri_search_path),
]
-
-if get_option('prefer-iris')
- loader_c_args += ['-DPREFER_IRIS']
-endif
-
-if get_option('prefer-crocus')
- loader_c_args += ['-DPREFER_CROCUS']
-endif
-
libloader = static_library(
'loader',
['loader_dri_helper.c', 'loader.c'],
# error "Only include from loader.c"
#endif
-static const int i965_chip_ids[] = {
-#define CHIPSET(chip, family, family_str, name) chip,
-#include "pci_ids/i965_pci_ids.h"
-#undef CHIPSET
-};
-
static const int crocus_chip_ids[] = {
#define CHIPSET(chip, family, family_str, name) chip,
#include "pci_ids/crocus_pci_ids.h"
int num_chips_ids;
bool (*predicate)(int fd);
} driver_map[] = {
- { 0x8086, "i965", i965_chip_ids, ARRAY_SIZE(i965_chip_ids) },
{ 0x8086, "crocus", crocus_chip_ids, ARRAY_SIZE(crocus_chip_ids) },
{ 0x8086, "iris", NULL, -1, is_kernel_i915 },
{ 0x1002, "r300", r300_chip_ids, ARRAY_SIZE(r300_chip_ids) },
+++ /dev/null
-/*
- * Copyright 2006 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "brw_batch.h"
-#include "brw_buffer_objects.h"
-#include "brw_bufmgr.h"
-#include "brw_buffers.h"
-#include "brw_fbo.h"
-#include "brw_context.h"
-#include "brw_defines.h"
-#include "brw_state.h"
-#include "common/intel_decoder.h"
-#include "common/intel_gem.h"
-
-#include "util/hash_table.h"
-
-#include <xf86drm.h>
-#include "drm-uapi/i915_drm.h"
-
-#define FILE_DEBUG_FLAG DEBUG_BUFMGR
-
-/**
- * Target sizes of the batch and state buffers. We create the initial
- * buffers at these sizes, and flush when they're nearly full. If we
- * underestimate how close we are to the end, and suddenly need more space
- * in the middle of a draw, we can grow the buffers, and finish the draw.
- * At that point, we'll be over our target size, so the next operation
- * should flush. Each time we flush the batch, we recreate both buffers
- * at the original target size, so it doesn't grow without bound.
- */
-#define BATCH_SZ (20 * 1024)
-#define STATE_SZ (16 * 1024)
-
-static void
-brw_batch_reset(struct brw_context *brw);
-static void
-brw_new_batch(struct brw_context *brw);
-
-static unsigned
-num_fences(struct brw_batch *batch)
-{
- return util_dynarray_num_elements(&batch->exec_fences,
- struct drm_i915_gem_exec_fence);
-}
-
-
-static void
-dump_validation_list(struct brw_batch *batch)
-{
- fprintf(stderr, "Validation list (length %d):\n", batch->exec_count);
-
- for (int i = 0; i < batch->exec_count; i++) {
- uint64_t flags = batch->validation_list[i].flags;
- assert(batch->validation_list[i].handle ==
- batch->exec_bos[i]->gem_handle);
- fprintf(stderr, "[%2d]: %2d %-14s %p %s%-7s @ 0x%"PRIx64"%s (%"PRIu64"B)\n",
- i,
- batch->validation_list[i].handle,
- batch->exec_bos[i]->name,
- batch->exec_bos[i],
- (flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) ? "(48b" : "(32b",
- (flags & EXEC_OBJECT_WRITE) ? " write)" : ")",
- (uint64_t)batch->validation_list[i].offset,
- (flags & EXEC_OBJECT_PINNED) ? " (pinned)" : "",
- batch->exec_bos[i]->size);
- }
-}
-
-static struct intel_batch_decode_bo
-decode_get_bo(void *v_brw, bool ppgtt, uint64_t address)
-{
- struct brw_context *brw = v_brw;
- struct brw_batch *batch = &brw->batch;
-
- for (int i = 0; i < batch->exec_count; i++) {
- struct brw_bo *bo = batch->exec_bos[i];
- /* The decoder zeroes out the top 16 bits, so we need to as well */
- uint64_t bo_address = bo->gtt_offset & (~0ull >> 16);
-
- if (address >= bo_address && address < bo_address + bo->size) {
- return (struct intel_batch_decode_bo) {
- .addr = bo_address,
- .size = bo->size,
- .map = brw_bo_map(brw, bo, MAP_READ),
- };
- }
- }
-
- return (struct intel_batch_decode_bo) { };
-}
-
-static unsigned
-decode_get_state_size(void *v_brw, uint64_t address, uint64_t base_address)
-{
- struct brw_context *brw = v_brw;
- struct brw_batch *batch = &brw->batch;
- unsigned size = (uintptr_t)
- _mesa_hash_table_u64_search(batch->state_batch_sizes,
- address - base_address);
- return size;
-}
-
-static void
-init_reloc_list(struct brw_reloc_list *rlist, int count)
-{
- rlist->reloc_count = 0;
- rlist->reloc_array_size = count;
- rlist->relocs = malloc(rlist->reloc_array_size *
- sizeof(struct drm_i915_gem_relocation_entry));
-}
-
-void
-brw_batch_init(struct brw_context *brw)
-{
- struct brw_screen *screen = brw->screen;
- struct brw_batch *batch = &brw->batch;
- const struct intel_device_info *devinfo = &screen->devinfo;
-
- if (INTEL_DEBUG(DEBUG_BATCH)) {
- /* The shadow doesn't get relocs written so state decode fails. */
- batch->use_shadow_copy = false;
- } else
- batch->use_shadow_copy = !devinfo->has_llc;
-
- init_reloc_list(&batch->batch_relocs, 250);
- init_reloc_list(&batch->state_relocs, 250);
-
- batch->batch.map = NULL;
- batch->state.map = NULL;
- batch->exec_count = 0;
- batch->exec_array_size = 100;
- batch->exec_bos =
- malloc(batch->exec_array_size * sizeof(batch->exec_bos[0]));
- batch->validation_list =
- malloc(batch->exec_array_size * sizeof(batch->validation_list[0]));
- batch->contains_fence_signal = false;
-
- if (INTEL_DEBUG(DEBUG_BATCH)) {
- batch->state_batch_sizes =
- _mesa_hash_table_u64_create(NULL);
-
- const unsigned decode_flags =
- INTEL_BATCH_DECODE_FULL |
- (INTEL_DEBUG(DEBUG_COLOR) ? INTEL_BATCH_DECODE_IN_COLOR : 0) |
- INTEL_BATCH_DECODE_OFFSETS |
- INTEL_BATCH_DECODE_FLOATS;
-
- intel_batch_decode_ctx_init(&batch->decoder, devinfo, stderr,
- decode_flags, NULL, decode_get_bo,
- decode_get_state_size, brw);
- batch->decoder.max_vbo_decoded_lines = 100;
- }
-
- batch->use_batch_first =
- screen->kernel_features & KERNEL_ALLOWS_EXEC_BATCH_FIRST;
-
- /* PIPE_CONTROL needs a w/a but only on gfx6 */
- batch->valid_reloc_flags = EXEC_OBJECT_WRITE;
- if (devinfo->ver == 6)
- batch->valid_reloc_flags |= EXEC_OBJECT_NEEDS_GTT;
-
- brw_batch_reset(brw);
-}
-
-#define READ_ONCE(x) (*(volatile __typeof__(x) *)&(x))
-
-static unsigned
-add_exec_bo(struct brw_batch *batch, struct brw_bo *bo)
-{
- assert(bo->bufmgr == batch->batch.bo->bufmgr);
-
- unsigned index = READ_ONCE(bo->index);
-
- if (index < batch->exec_count && batch->exec_bos[index] == bo)
- return index;
-
- /* May have been shared between multiple active batches */
- for (index = 0; index < batch->exec_count; index++) {
- if (batch->exec_bos[index] == bo)
- return index;
- }
-
- brw_bo_reference(bo);
-
- if (batch->exec_count == batch->exec_array_size) {
- batch->exec_array_size *= 2;
- batch->exec_bos =
- realloc(batch->exec_bos,
- batch->exec_array_size * sizeof(batch->exec_bos[0]));
- batch->validation_list =
- realloc(batch->validation_list,
- batch->exec_array_size * sizeof(batch->validation_list[0]));
- }
-
- batch->validation_list[batch->exec_count] =
- (struct drm_i915_gem_exec_object2) {
- .handle = bo->gem_handle,
- .offset = bo->gtt_offset,
- .flags = bo->kflags,
- };
-
- bo->index = batch->exec_count;
- batch->exec_bos[batch->exec_count] = bo;
- batch->aperture_space += bo->size;
-
- return batch->exec_count++;
-}
-
-static void
-recreate_growing_buffer(struct brw_context *brw,
- struct brw_growing_bo *grow,
- const char *name, unsigned size,
- enum brw_memory_zone memzone)
-{
- struct brw_screen *screen = brw->screen;
- struct brw_batch *batch = &brw->batch;
- struct brw_bufmgr *bufmgr = screen->bufmgr;
-
- /* We can't grow buffers when using softpin, so just overallocate them. */
- if (brw_using_softpin(bufmgr))
- size *= 2;
-
- grow->bo = brw_bo_alloc(bufmgr, name, size, memzone);
- grow->bo->kflags |= can_do_exec_capture(screen) ? EXEC_OBJECT_CAPTURE : 0;
- grow->partial_bo = NULL;
- grow->partial_bo_map = NULL;
- grow->partial_bytes = 0;
- grow->memzone = memzone;
-
- if (batch->use_shadow_copy)
- grow->map = realloc(grow->map, grow->bo->size);
- else
- grow->map = brw_bo_map(brw, grow->bo, MAP_READ | MAP_WRITE);
-}
-
-static void
-brw_batch_reset(struct brw_context *brw)
-{
- struct brw_batch *batch = &brw->batch;
-
- if (batch->last_bo != NULL) {
- brw_bo_unreference(batch->last_bo);
- batch->last_bo = NULL;
- }
- batch->last_bo = batch->batch.bo;
-
- recreate_growing_buffer(brw, &batch->batch, "batchbuffer", BATCH_SZ,
- BRW_MEMZONE_OTHER);
- batch->map_next = batch->batch.map;
-
- recreate_growing_buffer(brw, &batch->state, "statebuffer", STATE_SZ,
- BRW_MEMZONE_DYNAMIC);
-
- /* Avoid making 0 a valid state offset - otherwise the decoder will try
- * and decode data when we use offset 0 as a null pointer.
- */
- batch->state_used = 1;
-
- add_exec_bo(batch, batch->batch.bo);
- assert(batch->batch.bo->index == 0);
-
- batch->needs_sol_reset = false;
- batch->state_base_address_emitted = false;
-
- if (batch->state_batch_sizes)
- _mesa_hash_table_u64_clear(batch->state_batch_sizes);
-
- /* Always add workaround_bo which contains a driver identifier to be
- * recorded in error states.
- */
- struct brw_bo *identifier_bo = brw->workaround_bo;
- if (identifier_bo)
- add_exec_bo(batch, identifier_bo);
-
- if (batch->contains_fence_signal)
- batch->contains_fence_signal = false;
-}
-
-static void
-brw_batch_reset_and_clear_render_cache(struct brw_context *brw)
-{
- brw_batch_reset(brw);
- brw_cache_sets_clear(brw);
-}
-
-void
-brw_batch_save_state(struct brw_context *brw)
-{
- brw->batch.saved.map_next = brw->batch.map_next;
- brw->batch.saved.batch_reloc_count = brw->batch.batch_relocs.reloc_count;
- brw->batch.saved.state_reloc_count = brw->batch.state_relocs.reloc_count;
- brw->batch.saved.exec_count = brw->batch.exec_count;
-}
-
-bool
-brw_batch_saved_state_is_empty(struct brw_context *brw)
-{
- struct brw_batch *batch = &brw->batch;
- return (batch->saved.map_next == batch->batch.map);
-}
-
-void
-brw_batch_reset_to_saved(struct brw_context *brw)
-{
- for (int i = brw->batch.saved.exec_count;
- i < brw->batch.exec_count; i++) {
- brw_bo_unreference(brw->batch.exec_bos[i]);
- }
- brw->batch.batch_relocs.reloc_count = brw->batch.saved.batch_reloc_count;
- brw->batch.state_relocs.reloc_count = brw->batch.saved.state_reloc_count;
- brw->batch.exec_count = brw->batch.saved.exec_count;
-
- brw->batch.map_next = brw->batch.saved.map_next;
- if (USED_BATCH(brw->batch) == 0)
- brw_new_batch(brw);
-}
-
-void
-brw_batch_free(struct brw_batch *batch)
-{
- if (batch->use_shadow_copy) {
- free(batch->batch.map);
- free(batch->state.map);
- }
-
- for (int i = 0; i < batch->exec_count; i++) {
- brw_bo_unreference(batch->exec_bos[i]);
- }
- free(batch->batch_relocs.relocs);
- free(batch->state_relocs.relocs);
- free(batch->exec_bos);
- free(batch->validation_list);
-
- brw_bo_unreference(batch->last_bo);
- brw_bo_unreference(batch->batch.bo);
- brw_bo_unreference(batch->state.bo);
- if (batch->state_batch_sizes) {
- _mesa_hash_table_u64_destroy(batch->state_batch_sizes);
- intel_batch_decode_ctx_finish(&batch->decoder);
- }
-}
-
-/**
- * Finish copying the old batch/state buffer's contents to the new one
- * after we tried to "grow" the buffer in an earlier operation.
- */
-static void
-finish_growing_bos(struct brw_growing_bo *grow)
-{
- struct brw_bo *old_bo = grow->partial_bo;
- if (!old_bo)
- return;
-
- memcpy(grow->map, grow->partial_bo_map, grow->partial_bytes);
-
- grow->partial_bo = NULL;
- grow->partial_bo_map = NULL;
- grow->partial_bytes = 0;
-
- brw_bo_unreference(old_bo);
-}
-
-static void
-replace_bo_in_reloc_list(struct brw_reloc_list *rlist,
- uint32_t old_handle, uint32_t new_handle)
-{
- for (int i = 0; i < rlist->reloc_count; i++) {
- if (rlist->relocs[i].target_handle == old_handle)
- rlist->relocs[i].target_handle = new_handle;
- }
-}
-
-/**
- * Grow either the batch or state buffer to a new larger size.
- *
- * We can't actually grow buffers, so we allocate a new one, copy over
- * the existing contents, and update our lists to refer to the new one.
- *
- * Note that this is only temporary - each new batch recreates the buffers
- * at their original target size (BATCH_SZ or STATE_SZ).
- */
-static void
-grow_buffer(struct brw_context *brw,
- struct brw_growing_bo *grow,
- unsigned existing_bytes,
- unsigned new_size)
-{
- struct brw_batch *batch = &brw->batch;
- struct brw_bufmgr *bufmgr = brw->bufmgr;
- struct brw_bo *bo = grow->bo;
-
- /* We can't grow buffers that are softpinned, as the growing mechanism
- * involves putting a larger buffer at the same gtt_offset...and we've
- * only allocated the smaller amount of VMA. Without relocations, this
- * simply won't work. This should never happen, however.
- */
- assert(!(bo->kflags & EXEC_OBJECT_PINNED));
-
- perf_debug("Growing %s - ran out of space\n", bo->name);
-
- if (grow->partial_bo) {
- /* We've already grown once, and now we need to do it again.
- * Finish our last grow operation so we can start a new one.
- * This should basically never happen.
- */
- perf_debug("Had to grow multiple times");
- finish_growing_bos(grow);
- }
-
- struct brw_bo *new_bo =
- brw_bo_alloc(bufmgr, bo->name, new_size, grow->memzone);
-
- /* Copy existing data to the new larger buffer */
- grow->partial_bo_map = grow->map;
-
- if (batch->use_shadow_copy) {
- /* We can't safely use realloc, as it may move the existing buffer,
- * breaking existing pointers the caller may still be using. Just
- * malloc a new copy and memcpy it like the normal BO path.
- *
- * Use bo->size rather than new_size because the bufmgr may have
- * rounded up the size, and we want the shadow size to match.
- */
- grow->map = malloc(new_bo->size);
- } else {
- grow->map = brw_bo_map(brw, new_bo, MAP_READ | MAP_WRITE);
- }
-
- /* Try to put the new BO at the same GTT offset as the old BO (which
- * we're throwing away, so it doesn't need to be there).
- *
- * This guarantees that our relocations continue to work: values we've
- * already written into the buffer, values we're going to write into the
- * buffer, and the validation/relocation lists all will match.
- *
- * Also preserve kflags for EXEC_OBJECT_CAPTURE.
- */
- new_bo->gtt_offset = bo->gtt_offset;
- new_bo->index = bo->index;
- new_bo->kflags = bo->kflags;
-
- /* Batch/state buffers are per-context, and if we've run out of space,
- * we must have actually used them before, so...they will be in the list.
- */
- assert(bo->index < batch->exec_count);
- assert(batch->exec_bos[bo->index] == bo);
-
- /* Update the validation list to use the new BO. */
- batch->validation_list[bo->index].handle = new_bo->gem_handle;
-
- if (!batch->use_batch_first) {
- /* We're not using I915_EXEC_HANDLE_LUT, which means we need to go
- * update the relocation list entries to point at the new BO as well.
- * (With newer kernels, the "handle" is an offset into the validation
- * list, which remains unchanged, so we can skip this.)
- */
- replace_bo_in_reloc_list(&batch->batch_relocs,
- bo->gem_handle, new_bo->gem_handle);
- replace_bo_in_reloc_list(&batch->state_relocs,
- bo->gem_handle, new_bo->gem_handle);
- }
-
- /* Exchange the two BOs...without breaking pointers to the old BO.
- *
- * Consider this scenario:
- *
- * 1. Somebody calls brw_state_batch() to get a region of memory, and
- * and then creates a brw_address pointing to brw->batch.state.bo.
- * 2. They then call brw_state_batch() a second time, which happens to
- * grow and replace the state buffer. They then try to emit a
- * relocation to their first section of memory.
- *
- * If we replace the brw->batch.state.bo pointer at step 2, we would
- * break the address created in step 1. They'd have a pointer to the
- * old destroyed BO. Emitting a relocation would add this dead BO to
- * the validation list...causing /both/ statebuffers to be in the list,
- * and all kinds of disasters.
- *
- * This is not a contrived case - BLORP vertex data upload hits this.
- *
- * There are worse scenarios too. Fences for GL sync objects reference
- * brw->batch.batch.bo. If we replaced the batch pointer when growing,
- * we'd need to chase down every fence and update it to point to the
- * new BO. Otherwise, it would refer to a "batch" that never actually
- * gets submitted, and would fail to trigger.
- *
- * To work around both of these issues, we transmutate the buffers in
- * place, making the existing struct brw_bo represent the new buffer,
- * and "new_bo" represent the old BO. This is highly unusual, but it
- * seems like a necessary evil.
- *
- * We also defer the memcpy of the existing batch's contents. Callers
- * may make multiple brw_state_batch calls, and retain pointers to the
- * old BO's map. We'll perform the memcpy in finish_growing_bo() when
- * we finally submit the batch, at which point we've finished uploading
- * state, and nobody should have any old references anymore.
- *
- * To do that, we keep a reference to the old BO in grow->partial_bo,
- * and store the number of bytes to copy in grow->partial_bytes. We
- * can monkey with the refcounts directly without atomics because these
- * are per-context BOs and they can only be touched by this thread.
- */
- assert(new_bo->refcount == 1);
- new_bo->refcount = bo->refcount;
- bo->refcount = 1;
-
- assert(list_is_empty(&bo->exports));
- assert(list_is_empty(&new_bo->exports));
-
- struct brw_bo tmp;
- memcpy(&tmp, bo, sizeof(struct brw_bo));
- memcpy(bo, new_bo, sizeof(struct brw_bo));
- memcpy(new_bo, &tmp, sizeof(struct brw_bo));
-
- list_inithead(&bo->exports);
- list_inithead(&new_bo->exports);
-
- grow->partial_bo = new_bo; /* the one reference of the OLD bo */
- grow->partial_bytes = existing_bytes;
-}
-
-void
-brw_batch_require_space(struct brw_context *brw, GLuint sz)
-{
- struct brw_batch *batch = &brw->batch;
-
- const unsigned batch_used = USED_BATCH(*batch) * 4;
- if (batch_used + sz >= BATCH_SZ && !batch->no_wrap) {
- brw_batch_flush(brw);
- } else if (batch_used + sz >= batch->batch.bo->size) {
- const unsigned new_size =
- MIN2(batch->batch.bo->size + batch->batch.bo->size / 2,
- MAX_BATCH_SIZE);
- grow_buffer(brw, &batch->batch, batch_used, new_size);
- batch->map_next = (void *) batch->batch.map + batch_used;
- assert(batch_used + sz < batch->batch.bo->size);
- }
-}
-
-/**
- * Called when starting a new batch buffer.
- */
-static void
-brw_new_batch(struct brw_context *brw)
-{
- /* Unreference any BOs held by the previous batch, and reset counts. */
- for (int i = 0; i < brw->batch.exec_count; i++) {
- brw_bo_unreference(brw->batch.exec_bos[i]);
- brw->batch.exec_bos[i] = NULL;
- }
- brw->batch.batch_relocs.reloc_count = 0;
- brw->batch.state_relocs.reloc_count = 0;
- brw->batch.exec_count = 0;
- brw->batch.aperture_space = 0;
-
- brw_bo_unreference(brw->batch.state.bo);
-
- /* Create a new batchbuffer and reset the associated state: */
- brw_batch_reset_and_clear_render_cache(brw);
-
- /* If the kernel supports hardware contexts, then most hardware state is
- * preserved between batches; we only need to re-emit state that is required
- * to be in every batch. Otherwise we need to re-emit all the state that
- * would otherwise be stored in the context (which for all intents and
- * purposes means everything).
- */
- if (brw->hw_ctx == 0) {
- brw->ctx.NewDriverState |= BRW_NEW_CONTEXT;
- brw_upload_invariant_state(brw);
- }
-
- brw->ctx.NewDriverState |= BRW_NEW_BATCH;
-
- brw->ib.index_size = -1;
-
- /* We need to periodically reap the shader time results, because rollover
- * happens every few seconds. We also want to see results every once in a
- * while, because many programs won't cleanly destroy our context, so the
- * end-of-run printout may not happen.
- */
- if (INTEL_DEBUG(DEBUG_SHADER_TIME))
- brw_collect_and_report_shader_time(brw);
-
- brw_batch_maybe_noop(brw);
-}
-
-/**
- * Called from brw_batch_flush before emitting MI_BATCHBUFFER_END and
- * sending it off.
- *
- * This function can emit state (say, to preserve registers that aren't saved
- * between batches).
- */
-static void
-brw_finish_batch(struct brw_context *brw)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- brw->batch.no_wrap = true;
-
- /* Capture the closing pipeline statistics register values necessary to
- * support query objects (in the non-hardware context world).
- */
- brw_emit_query_end(brw);
-
- /* Work around L3 state leaks into contexts set MI_RESTORE_INHIBIT which
- * assume that the L3 cache is configured according to the hardware
- * defaults. On Kernel 4.16+, we no longer need to do this.
- */
- if (devinfo->ver >= 7 &&
- !(brw->screen->kernel_features & KERNEL_ALLOWS_CONTEXT_ISOLATION))
- gfx7_restore_default_l3_config(brw);
-
- if (devinfo->platform == INTEL_PLATFORM_HSW) {
- /* From the Haswell PRM, Volume 2b, Command Reference: Instructions,
- * 3DSTATE_CC_STATE_POINTERS > "Note":
- *
- * "SW must program 3DSTATE_CC_STATE_POINTERS command at the end of every
- * 3D batch buffer followed by a PIPE_CONTROL with RC flush and CS stall."
- *
- * From the example in the docs, it seems to expect a regular pipe control
- * flush here as well. We may have done it already, but meh.
- *
- * See also WaAvoidRCZCounterRollover.
- */
- brw_emit_mi_flush(brw);
- BEGIN_BATCH(2);
- OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (2 - 2));
- OUT_BATCH(brw->cc.state_offset | 1);
- ADVANCE_BATCH();
- brw_emit_pipe_control_flush(brw, PIPE_CONTROL_RENDER_TARGET_FLUSH |
- PIPE_CONTROL_CS_STALL);
- }
-
- /* Do not restore push constant packets during context restore. */
- if (devinfo->ver >= 7)
- gfx7_emit_isp_disable(brw);
-
- /* Emit MI_BATCH_BUFFER_END to finish our batch. Note that execbuf2
- * requires our batch size to be QWord aligned, so we pad it out if
- * necessary by emitting an extra MI_NOOP after the end.
- */
- brw_batch_require_space(brw, 8);
- *brw->batch.map_next++ = MI_BATCH_BUFFER_END;
- if (USED_BATCH(brw->batch) & 1) {
- *brw->batch.map_next++ = MI_NOOP;
- }
-
- brw->batch.no_wrap = false;
-}
-
-static void
-throttle(struct brw_context *brw)
-{
- /* Wait for the swapbuffers before the one we just emitted, so we
- * don't get too many swaps outstanding for apps that are GPU-heavy
- * but not CPU-heavy.
- *
- * We're using intelDRI2Flush (called from the loader before
- * swapbuffer) and glFlush (for front buffer rendering) as the
- * indicator that a frame is done and then throttle when we get
- * here as we prepare to render the next frame. At this point for
- * round trips for swap/copy and getting new buffers are done and
- * we'll spend less time waiting on the GPU.
- *
- * Unfortunately, we don't have a handle to the batch containing
- * the swap, and getting our hands on that doesn't seem worth it,
- * so we just use the first batch we emitted after the last swap.
- */
- if (brw->need_swap_throttle && brw->throttle_batch[0]) {
- if (brw->throttle_batch[1]) {
- if (!brw->disable_throttling) {
- brw_bo_wait_rendering(brw->throttle_batch[1]);
- }
- brw_bo_unreference(brw->throttle_batch[1]);
- }
- brw->throttle_batch[1] = brw->throttle_batch[0];
- brw->throttle_batch[0] = NULL;
- brw->need_swap_throttle = false;
- /* Throttling here is more precise than the throttle ioctl, so skip it */
- brw->need_flush_throttle = false;
- }
-
- if (brw->need_flush_throttle) {
- drmCommandNone(brw->screen->fd, DRM_I915_GEM_THROTTLE);
- brw->need_flush_throttle = false;
- }
-}
-
-static int
-execbuffer(int fd,
- struct brw_batch *batch,
- uint32_t ctx_id,
- int used,
- int in_fence,
- int *out_fence,
- int flags)
-{
- struct drm_i915_gem_execbuffer2 execbuf = {
- .buffers_ptr = (uintptr_t) batch->validation_list,
- .buffer_count = batch->exec_count,
- .batch_start_offset = 0,
- .batch_len = used,
- .flags = flags,
- .rsvd1 = ctx_id, /* rsvd1 is actually the context ID */
- };
-
- unsigned long cmd = DRM_IOCTL_I915_GEM_EXECBUFFER2;
-
- if (in_fence != -1) {
- execbuf.rsvd2 = in_fence;
- execbuf.flags |= I915_EXEC_FENCE_IN;
- }
-
- if (out_fence != NULL) {
- cmd = DRM_IOCTL_I915_GEM_EXECBUFFER2_WR;
- *out_fence = -1;
- execbuf.flags |= I915_EXEC_FENCE_OUT;
- }
-
- if (num_fences(batch)) {
- execbuf.flags |= I915_EXEC_FENCE_ARRAY;
- execbuf.num_cliprects = num_fences(batch);
- execbuf.cliprects_ptr =
- (uintptr_t)util_dynarray_begin(&batch->exec_fences);
- }
-
-
- int ret = drmIoctl(fd, cmd, &execbuf);
- if (ret != 0)
- ret = -errno;
-
- for (int i = 0; i < batch->exec_count; i++) {
- struct brw_bo *bo = batch->exec_bos[i];
-
- bo->idle = false;
- bo->index = -1;
-
- /* Update brw_bo::gtt_offset */
- if (batch->validation_list[i].offset != bo->gtt_offset) {
- DBG("BO %d migrated: 0x%" PRIx64 " -> 0x%" PRIx64 "\n",
- bo->gem_handle, bo->gtt_offset,
- (uint64_t)batch->validation_list[i].offset);
- assert(!(bo->kflags & EXEC_OBJECT_PINNED));
- bo->gtt_offset = batch->validation_list[i].offset;
- }
- }
-
- if (ret == 0 && out_fence != NULL)
- *out_fence = execbuf.rsvd2 >> 32;
-
- return ret;
-}
-
-static int
-submit_batch(struct brw_context *brw, int in_fence_fd, int *out_fence_fd)
-{
- struct brw_batch *batch = &brw->batch;
- int ret = 0;
-
- if (batch->use_shadow_copy) {
- void *bo_map = brw_bo_map(brw, batch->batch.bo, MAP_WRITE);
- memcpy(bo_map, batch->batch.map, 4 * USED_BATCH(*batch));
-
- bo_map = brw_bo_map(brw, batch->state.bo, MAP_WRITE);
- memcpy(bo_map, batch->state.map, batch->state_used);
- }
-
- brw_bo_unmap(batch->batch.bo);
- brw_bo_unmap(batch->state.bo);
-
- if (!brw->screen->devinfo.no_hw) {
- /* The requirement for using I915_EXEC_NO_RELOC are:
- *
- * The addresses written in the objects must match the corresponding
- * reloc.gtt_offset which in turn must match the corresponding
- * execobject.offset.
- *
- * Any render targets written to in the batch must be flagged with
- * EXEC_OBJECT_WRITE.
- *
- * To avoid stalling, execobject.offset should match the current
- * address of that object within the active context.
- */
- int flags = I915_EXEC_NO_RELOC | I915_EXEC_RENDER;
-
- if (batch->needs_sol_reset)
- flags |= I915_EXEC_GEN7_SOL_RESET;
-
- /* Set statebuffer relocations */
- const unsigned state_index = batch->state.bo->index;
- if (state_index < batch->exec_count &&
- batch->exec_bos[state_index] == batch->state.bo) {
- struct drm_i915_gem_exec_object2 *entry =
- &batch->validation_list[state_index];
- assert(entry->handle == batch->state.bo->gem_handle);
- entry->relocation_count = batch->state_relocs.reloc_count;
- entry->relocs_ptr = (uintptr_t) batch->state_relocs.relocs;
- }
-
- /* Set batchbuffer relocations */
- struct drm_i915_gem_exec_object2 *entry = &batch->validation_list[0];
- assert(entry->handle == batch->batch.bo->gem_handle);
- entry->relocation_count = batch->batch_relocs.reloc_count;
- entry->relocs_ptr = (uintptr_t) batch->batch_relocs.relocs;
-
- if (batch->use_batch_first) {
- flags |= I915_EXEC_BATCH_FIRST | I915_EXEC_HANDLE_LUT;
- } else {
- /* Move the batch to the end of the validation list */
- struct drm_i915_gem_exec_object2 tmp;
- struct brw_bo *tmp_bo;
- const unsigned index = batch->exec_count - 1;
-
- tmp = *entry;
- *entry = batch->validation_list[index];
- batch->validation_list[index] = tmp;
-
- tmp_bo = batch->exec_bos[0];
- batch->exec_bos[0] = batch->exec_bos[index];
- batch->exec_bos[index] = tmp_bo;
- }
-
- ret = execbuffer(brw->screen->fd, batch, brw->hw_ctx,
- 4 * USED_BATCH(*batch),
- in_fence_fd, out_fence_fd, flags);
-
- throttle(brw);
- }
-
- if (INTEL_DEBUG(DEBUG_BATCH)) {
- intel_print_batch(&batch->decoder, batch->batch.map,
- 4 * USED_BATCH(*batch),
- batch->batch.bo->gtt_offset, false);
- }
-
- if (brw->ctx.Const.ResetStrategy == GL_LOSE_CONTEXT_ON_RESET_ARB)
- brw_check_for_reset(brw);
-
- if (ret != 0) {
- fprintf(stderr, "i965: Failed to submit batchbuffer: %s\n",
- strerror(-ret));
- abort();
- }
-
- return ret;
-}
-
-/**
- * The in_fence_fd is ignored if -1. Otherwise this function takes ownership
- * of the fd.
- *
- * The out_fence_fd is ignored if NULL. Otherwise, the caller takes ownership
- * of the returned fd.
- */
-int
-_brw_batch_flush_fence(struct brw_context *brw,
- int in_fence_fd, int *out_fence_fd,
- const char *file, int line)
-{
- int ret;
-
- if (USED_BATCH(brw->batch) == 0 && !brw->batch.contains_fence_signal)
- return 0;
-
- /* Check that we didn't just wrap our batchbuffer at a bad time. */
- assert(!brw->batch.no_wrap);
-
- brw_finish_batch(brw);
- brw_upload_finish(&brw->upload);
-
- finish_growing_bos(&brw->batch.batch);
- finish_growing_bos(&brw->batch.state);
-
- if (brw->throttle_batch[0] == NULL) {
- brw->throttle_batch[0] = brw->batch.batch.bo;
- brw_bo_reference(brw->throttle_batch[0]);
- }
-
- if (INTEL_DEBUG(DEBUG_BATCH | DEBUG_SUBMIT)) {
- int bytes_for_commands = 4 * USED_BATCH(brw->batch);
- int bytes_for_state = brw->batch.state_used;
- fprintf(stderr, "%19s:%-3d: Batchbuffer flush with %5db (%0.1f%%) (pkt),"
- " %5db (%0.1f%%) (state), %4d BOs (%0.1fMb aperture),"
- " %4d batch relocs, %4d state relocs\n", file, line,
- bytes_for_commands, 100.0f * bytes_for_commands / BATCH_SZ,
- bytes_for_state, 100.0f * bytes_for_state / STATE_SZ,
- brw->batch.exec_count,
- (float) (brw->batch.aperture_space / (1024 * 1024)),
- brw->batch.batch_relocs.reloc_count,
- brw->batch.state_relocs.reloc_count);
-
- dump_validation_list(&brw->batch);
- }
-
- ret = submit_batch(brw, in_fence_fd, out_fence_fd);
-
- if (INTEL_DEBUG(DEBUG_SYNC)) {
- fprintf(stderr, "waiting for idle\n");
- brw_bo_wait_rendering(brw->batch.batch.bo);
- }
-
- /* Start a new batch buffer. */
- brw_new_batch(brw);
-
- return ret;
-}
-
-void
-brw_batch_maybe_noop(struct brw_context *brw)
-{
- if (!brw->frontend_noop || USED_BATCH(brw->batch) != 0)
- return;
-
- BEGIN_BATCH(1);
- OUT_BATCH(MI_BATCH_BUFFER_END);
- ADVANCE_BATCH();
-}
-
-bool
-brw_batch_references(struct brw_batch *batch, struct brw_bo *bo)
-{
- unsigned index = READ_ONCE(bo->index);
- if (index < batch->exec_count && batch->exec_bos[index] == bo)
- return true;
-
- for (int i = 0; i < batch->exec_count; i++) {
- if (batch->exec_bos[i] == bo)
- return true;
- }
- return false;
-}
-
-/* This is the only way buffers get added to the validate list.
- */
-static uint64_t
-emit_reloc(struct brw_batch *batch,
- struct brw_reloc_list *rlist, uint32_t offset,
- struct brw_bo *target, int32_t target_offset,
- unsigned int reloc_flags)
-{
- assert(target != NULL);
-
- if (target->kflags & EXEC_OBJECT_PINNED) {
- brw_use_pinned_bo(batch, target, reloc_flags & RELOC_WRITE);
- return intel_canonical_address(target->gtt_offset + target_offset);
- }
-
- unsigned int index = add_exec_bo(batch, target);
- struct drm_i915_gem_exec_object2 *entry = &batch->validation_list[index];
-
- if (rlist->reloc_count == rlist->reloc_array_size) {
- rlist->reloc_array_size *= 2;
- rlist->relocs = realloc(rlist->relocs,
- rlist->reloc_array_size *
- sizeof(struct drm_i915_gem_relocation_entry));
- }
-
- if (reloc_flags & RELOC_32BIT) {
- /* Restrict this buffer to the low 32 bits of the address space.
- *
- * Altering the validation list flags restricts it for this batch,
- * but we also alter the BO's kflags to restrict it permanently
- * (until the BO is destroyed and put back in the cache). Buffers
- * may stay bound across batches, and we want keep it constrained.
- */
- target->kflags &= ~EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
- entry->flags &= ~EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
-
- /* RELOC_32BIT is not an EXEC_OBJECT_* flag, so get rid of it. */
- reloc_flags &= ~RELOC_32BIT;
- }
-
- if (reloc_flags)
- entry->flags |= reloc_flags & batch->valid_reloc_flags;
-
- rlist->relocs[rlist->reloc_count++] =
- (struct drm_i915_gem_relocation_entry) {
- .offset = offset,
- .delta = target_offset,
- .target_handle = batch->use_batch_first ? index : target->gem_handle,
- .presumed_offset = entry->offset,
- };
-
- /* Using the old buffer offset, write in what the right data would be, in
- * case the buffer doesn't move and we can short-circuit the relocation
- * processing in the kernel
- */
- return entry->offset + target_offset;
-}
-
-void
-brw_use_pinned_bo(struct brw_batch *batch, struct brw_bo *bo,
- unsigned writable_flag)
-{
- assert(bo->kflags & EXEC_OBJECT_PINNED);
- assert((writable_flag & ~EXEC_OBJECT_WRITE) == 0);
-
- unsigned int index = add_exec_bo(batch, bo);
- struct drm_i915_gem_exec_object2 *entry = &batch->validation_list[index];
- assert(entry->offset == bo->gtt_offset);
-
- if (writable_flag)
- entry->flags |= EXEC_OBJECT_WRITE;
-}
-
-uint64_t
-brw_batch_reloc(struct brw_batch *batch, uint32_t batch_offset,
- struct brw_bo *target, uint32_t target_offset,
- unsigned int reloc_flags)
-{
- assert(batch_offset <= batch->batch.bo->size - sizeof(uint32_t));
-
- return emit_reloc(batch, &batch->batch_relocs, batch_offset,
- target, target_offset, reloc_flags);
-}
-
-uint64_t
-brw_state_reloc(struct brw_batch *batch, uint32_t state_offset,
- struct brw_bo *target, uint32_t target_offset,
- unsigned int reloc_flags)
-{
- assert(state_offset <= batch->state.bo->size - sizeof(uint32_t));
-
- return emit_reloc(batch, &batch->state_relocs, state_offset,
- target, target_offset, reloc_flags);
-}
-
-/**
- * Reserve some space in the statebuffer, or flush.
- *
- * This is used to estimate when we're near the end of the batch,
- * so we can flush early.
- */
-void
-brw_require_statebuffer_space(struct brw_context *brw, int size)
-{
- if (brw->batch.state_used + size >= STATE_SZ)
- brw_batch_flush(brw);
-}
-
-/**
- * Allocates a block of space in the batchbuffer for indirect state.
- */
-void *
-brw_state_batch(struct brw_context *brw,
- int size,
- int alignment,
- uint32_t *out_offset)
-{
- struct brw_batch *batch = &brw->batch;
-
- assert(size < batch->state.bo->size);
-
- uint32_t offset = ALIGN(batch->state_used, alignment);
-
- if (offset + size >= STATE_SZ && !batch->no_wrap) {
- brw_batch_flush(brw);
- offset = ALIGN(batch->state_used, alignment);
- } else if (offset + size >= batch->state.bo->size) {
- const unsigned new_size =
- MIN2(batch->state.bo->size + batch->state.bo->size / 2,
- MAX_STATE_SIZE);
- grow_buffer(brw, &batch->state, batch->state_used, new_size);
- assert(offset + size < batch->state.bo->size);
- }
-
- if (INTEL_DEBUG(DEBUG_BATCH)) {
- _mesa_hash_table_u64_insert(batch->state_batch_sizes,
- offset, (void *) (uintptr_t) size);
- }
-
- batch->state_used = offset + size;
-
- *out_offset = offset;
- return batch->state.map + (offset >> 2);
-}
-
-void
-brw_batch_data(struct brw_context *brw,
- const void *data, GLuint bytes)
-{
- assert((bytes & 3) == 0);
- brw_batch_require_space(brw, bytes);
- memcpy(brw->batch.map_next, data, bytes);
- brw->batch.map_next += bytes >> 2;
-}
-
-static void
-load_sized_register_mem(struct brw_context *brw,
- uint32_t reg,
- struct brw_bo *bo,
- uint32_t offset,
- int size)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- int i;
-
- /* MI_LOAD_REGISTER_MEM only exists on Gfx7+. */
- assert(devinfo->ver >= 7);
-
- if (devinfo->ver >= 8) {
- BEGIN_BATCH(4 * size);
- for (i = 0; i < size; i++) {
- OUT_BATCH(GFX7_MI_LOAD_REGISTER_MEM | (4 - 2));
- OUT_BATCH(reg + i * 4);
- OUT_RELOC64(bo, 0, offset + i * 4);
- }
- ADVANCE_BATCH();
- } else {
- BEGIN_BATCH(3 * size);
- for (i = 0; i < size; i++) {
- OUT_BATCH(GFX7_MI_LOAD_REGISTER_MEM | (3 - 2));
- OUT_BATCH(reg + i * 4);
- OUT_RELOC(bo, 0, offset + i * 4);
- }
- ADVANCE_BATCH();
- }
-}
-
-void
-brw_load_register_mem(struct brw_context *brw,
- uint32_t reg,
- struct brw_bo *bo,
- uint32_t offset)
-{
- load_sized_register_mem(brw, reg, bo, offset, 1);
-}
-
-void
-brw_load_register_mem64(struct brw_context *brw,
- uint32_t reg,
- struct brw_bo *bo,
- uint32_t offset)
-{
- load_sized_register_mem(brw, reg, bo, offset, 2);
-}
-
-/*
- * Write an arbitrary 32-bit register to a buffer via MI_STORE_REGISTER_MEM.
- */
-void
-brw_store_register_mem32(struct brw_context *brw,
- struct brw_bo *bo, uint32_t reg, uint32_t offset)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- assert(devinfo->ver >= 6);
-
- if (devinfo->ver >= 8) {
- BEGIN_BATCH(4);
- OUT_BATCH(MI_STORE_REGISTER_MEM | (4 - 2));
- OUT_BATCH(reg);
- OUT_RELOC64(bo, RELOC_WRITE, offset);
- ADVANCE_BATCH();
- } else {
- BEGIN_BATCH(3);
- OUT_BATCH(MI_STORE_REGISTER_MEM | (3 - 2));
- OUT_BATCH(reg);
- OUT_RELOC(bo, RELOC_WRITE | RELOC_NEEDS_GGTT, offset);
- ADVANCE_BATCH();
- }
-}
-
-/*
- * Write an arbitrary 64-bit register to a buffer via MI_STORE_REGISTER_MEM.
- */
-void
-brw_store_register_mem64(struct brw_context *brw,
- struct brw_bo *bo, uint32_t reg, uint32_t offset)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- assert(devinfo->ver >= 6);
-
- /* MI_STORE_REGISTER_MEM only stores a single 32-bit value, so to
- * read a full 64-bit register, we need to do two of them.
- */
- if (devinfo->ver >= 8) {
- BEGIN_BATCH(8);
- OUT_BATCH(MI_STORE_REGISTER_MEM | (4 - 2));
- OUT_BATCH(reg);
- OUT_RELOC64(bo, RELOC_WRITE, offset);
- OUT_BATCH(MI_STORE_REGISTER_MEM | (4 - 2));
- OUT_BATCH(reg + sizeof(uint32_t));
- OUT_RELOC64(bo, RELOC_WRITE, offset + sizeof(uint32_t));
- ADVANCE_BATCH();
- } else {
- BEGIN_BATCH(6);
- OUT_BATCH(MI_STORE_REGISTER_MEM | (3 - 2));
- OUT_BATCH(reg);
- OUT_RELOC(bo, RELOC_WRITE | RELOC_NEEDS_GGTT, offset);
- OUT_BATCH(MI_STORE_REGISTER_MEM | (3 - 2));
- OUT_BATCH(reg + sizeof(uint32_t));
- OUT_RELOC(bo, RELOC_WRITE | RELOC_NEEDS_GGTT, offset + sizeof(uint32_t));
- ADVANCE_BATCH();
- }
-}
-
-/*
- * Write a 32-bit register using immediate data.
- */
-void
-brw_load_register_imm32(struct brw_context *brw, uint32_t reg, uint32_t imm)
-{
- assert(brw->screen->devinfo.ver >= 6);
-
- BEGIN_BATCH(3);
- OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
- OUT_BATCH(reg);
- OUT_BATCH(imm);
- ADVANCE_BATCH();
-}
-
-/*
- * Write a 64-bit register using immediate data.
- */
-void
-brw_load_register_imm64(struct brw_context *brw, uint32_t reg, uint64_t imm)
-{
- assert(brw->screen->devinfo.ver >= 6);
-
- BEGIN_BATCH(5);
- OUT_BATCH(MI_LOAD_REGISTER_IMM | (5 - 2));
- OUT_BATCH(reg);
- OUT_BATCH(imm & 0xffffffff);
- OUT_BATCH(reg + 4);
- OUT_BATCH(imm >> 32);
- ADVANCE_BATCH();
-}
-
-/*
- * Copies a 32-bit register.
- */
-void
-brw_load_register_reg(struct brw_context *brw, uint32_t dest, uint32_t src)
-{
- assert(brw->screen->devinfo.verx10 >= 75);
-
- BEGIN_BATCH(3);
- OUT_BATCH(MI_LOAD_REGISTER_REG | (3 - 2));
- OUT_BATCH(src);
- OUT_BATCH(dest);
- ADVANCE_BATCH();
-}
-
-/*
- * Copies a 64-bit register.
- */
-void
-brw_load_register_reg64(struct brw_context *brw, uint32_t dest, uint32_t src)
-{
- assert(brw->screen->devinfo.verx10 >= 75);
-
- BEGIN_BATCH(6);
- OUT_BATCH(MI_LOAD_REGISTER_REG | (3 - 2));
- OUT_BATCH(src);
- OUT_BATCH(dest);
- OUT_BATCH(MI_LOAD_REGISTER_REG | (3 - 2));
- OUT_BATCH(src + sizeof(uint32_t));
- OUT_BATCH(dest + sizeof(uint32_t));
- ADVANCE_BATCH();
-}
-
-/*
- * Write 32-bits of immediate data to a GPU memory buffer.
- */
-void
-brw_store_data_imm32(struct brw_context *brw, struct brw_bo *bo,
- uint32_t offset, uint32_t imm)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- assert(devinfo->ver >= 6);
-
- BEGIN_BATCH(4);
- OUT_BATCH(MI_STORE_DATA_IMM | (4 - 2));
- if (devinfo->ver >= 8)
- OUT_RELOC64(bo, RELOC_WRITE, offset);
- else {
- OUT_BATCH(0); /* MBZ */
- OUT_RELOC(bo, RELOC_WRITE, offset);
- }
- OUT_BATCH(imm);
- ADVANCE_BATCH();
-}
-
-/*
- * Write 64-bits of immediate data to a GPU memory buffer.
- */
-void
-brw_store_data_imm64(struct brw_context *brw, struct brw_bo *bo,
- uint32_t offset, uint64_t imm)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- assert(devinfo->ver >= 6);
-
- BEGIN_BATCH(5);
- OUT_BATCH(MI_STORE_DATA_IMM | (5 - 2));
- if (devinfo->ver >= 8)
- OUT_RELOC64(bo, RELOC_WRITE, offset);
- else {
- OUT_BATCH(0); /* MBZ */
- OUT_RELOC(bo, RELOC_WRITE, offset);
- }
- OUT_BATCH(imm & 0xffffffffu);
- OUT_BATCH(imm >> 32);
- ADVANCE_BATCH();
-}
+++ /dev/null
-#ifndef BRW_BATCH_H
-#define BRW_BATCH_H
-
-#include "main/mtypes.h"
-
-#include "brw_context.h"
-#include "brw_bufmgr.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* The kernel assumes batchbuffers are smaller than 256kB. */
-#define MAX_BATCH_SIZE (256 * 1024)
-
-/* 3DSTATE_BINDING_TABLE_POINTERS has a U16 offset from Surface State Base
- * Address, which means that we can't put binding tables beyond 64kB. This
- * effectively limits the maximum statebuffer size to 64kB.
- */
-#define MAX_STATE_SIZE (64 * 1024)
-
-struct brw_batch;
-
-void brw_batch_init(struct brw_context *brw);
-void brw_batch_free(struct brw_batch *batch);
-void brw_batch_save_state(struct brw_context *brw);
-bool brw_batch_saved_state_is_empty(struct brw_context *brw);
-void brw_batch_reset_to_saved(struct brw_context *brw);
-void brw_batch_require_space(struct brw_context *brw, GLuint sz);
-int _brw_batch_flush_fence(struct brw_context *brw,
- int in_fence_fd, int *out_fence_fd,
- const char *file, int line);
-void brw_batch_maybe_noop(struct brw_context *brw);
-
-#define brw_batch_flush(brw) \
- _brw_batch_flush_fence((brw), -1, NULL, __FILE__, __LINE__)
-
-#define brw_batch_flush_fence(brw, in_fence_fd, out_fence_fd) \
- _brw_batch_flush_fence((brw), (in_fence_fd), (out_fence_fd), \
- __FILE__, __LINE__)
-
-/* Unlike bmBufferData, this currently requires the buffer be mapped.
- * Consider it a convenience function wrapping multple
- * brw_buffer_dword() calls.
- */
-void brw_batch_data(struct brw_context *brw,
- const void *data, GLuint bytes);
-
-static inline bool
-brw_batch_has_aperture_space(struct brw_context *brw, uint64_t extra_space)
-{
- return brw->batch.aperture_space + extra_space <=
- brw->screen->aperture_threshold;
-}
-
-bool brw_batch_references(struct brw_batch *batch, struct brw_bo *bo);
-
-#define RELOC_WRITE EXEC_OBJECT_WRITE
-#define RELOC_NEEDS_GGTT EXEC_OBJECT_NEEDS_GTT
-/* Inverted meaning, but using the same bit...emit_reloc will flip it. */
-#define RELOC_32BIT EXEC_OBJECT_SUPPORTS_48B_ADDRESS
-
-void brw_use_pinned_bo(struct brw_batch *batch, struct brw_bo *bo,
- unsigned writeable_flag);
-
-uint64_t brw_batch_reloc(struct brw_batch *batch,
- uint32_t batch_offset,
- struct brw_bo *target,
- uint32_t target_offset,
- unsigned flags);
-uint64_t brw_state_reloc(struct brw_batch *batch,
- uint32_t batch_offset,
- struct brw_bo *target,
- uint32_t target_offset,
- unsigned flags);
-
-#define USED_BATCH(_batch) \
- ((uintptr_t)((_batch).map_next - (_batch).batch.map))
-
-static inline uint32_t float_as_int(float f)
-{
- union {
- float f;
- uint32_t d;
- } fi;
-
- fi.f = f;
- return fi.d;
-}
-
-static inline void
-brw_batch_begin(struct brw_context *brw, int n)
-{
- brw_batch_require_space(brw, n * 4);
-
-#ifdef DEBUG
- brw->batch.emit = USED_BATCH(brw->batch);
- brw->batch.total = n;
-#endif
-}
-
-static inline void
-brw_batch_advance(struct brw_context *brw)
-{
-#ifdef DEBUG
- struct brw_batch *batch = &brw->batch;
- unsigned int _n = USED_BATCH(*batch) - batch->emit;
- assert(batch->total != 0);
- if (_n != batch->total) {
- fprintf(stderr, "ADVANCE_BATCH: %d of %d dwords emitted\n",
- _n, batch->total);
- abort();
- }
- batch->total = 0;
-#else
- (void) brw;
-#endif
-}
-
-static inline bool
-brw_ptr_in_state_buffer(struct brw_batch *batch, void *p)
-{
- return (char *) p >= (char *) batch->state.map &&
- (char *) p < (char *) batch->state.map + batch->state.bo->size;
-}
-
-#define BEGIN_BATCH(n) do { \
- brw_batch_begin(brw, (n)); \
- uint32_t *__map = brw->batch.map_next; \
- brw->batch.map_next += (n)
-
-#define BEGIN_BATCH_BLT(n) do { \
- assert(brw->screen->devinfo.ver < 6); \
- brw_batch_begin(brw, (n)); \
- uint32_t *__map = brw->batch.map_next; \
- brw->batch.map_next += (n)
-
-#define OUT_BATCH(d) *__map++ = (d)
-#define OUT_BATCH_F(f) OUT_BATCH(float_as_int((f)))
-
-#define OUT_RELOC(buf, flags, delta) do { \
- uint32_t __offset = (__map - brw->batch.batch.map) * 4; \
- uint32_t reloc = \
- brw_batch_reloc(&brw->batch, __offset, (buf), (delta), (flags)); \
- OUT_BATCH(reloc); \
-} while (0)
-
-/* Handle 48-bit address relocations for Gfx8+ */
-#define OUT_RELOC64(buf, flags, delta) do { \
- uint32_t __offset = (__map - brw->batch.batch.map) * 4; \
- uint64_t reloc64 = \
- brw_batch_reloc(&brw->batch, __offset, (buf), (delta), (flags)); \
- OUT_BATCH(reloc64); \
- OUT_BATCH(reloc64 >> 32); \
-} while (0)
-
-#define ADVANCE_BATCH() \
- assert(__map == brw->batch.map_next); \
- brw_batch_advance(brw); \
-} while (0)
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
+++ /dev/null
-/*
- * Copyright © 2013 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-/**
- * \file brw_binding_tables.c
- *
- * State atoms which upload the "binding table" for each shader stage.
- *
- * Binding tables map a numeric "surface index" to the SURFACE_STATE structure
- * for a currently bound surface. This allows SEND messages (such as sampler
- * or data port messages) to refer to a particular surface by number, rather
- * than by pointer.
- *
- * The binding table is stored as a (sparse) array of SURFACE_STATE entries;
- * surface indexes are simply indexes into the array. The ordering of the
- * entries is entirely left up to software; see the SURF_INDEX_* macros in
- * brw_context.h to see our current layout.
- */
-
-#include "main/mtypes.h"
-
-#include "brw_context.h"
-#include "brw_defines.h"
-#include "brw_state.h"
-#include "brw_batch.h"
-
-/**
- * Upload a shader stage's binding table as indirect state.
- *
- * This copies brw_stage_state::surf_offset[] into the indirect state section
- * of the batchbuffer (allocated by brw_state_batch()).
- */
-void
-brw_upload_binding_table(struct brw_context *brw,
- uint32_t packet_name,
- const struct brw_stage_prog_data *prog_data,
- struct brw_stage_state *stage_state)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- if (prog_data->binding_table.size_bytes == 0) {
- /* There are no surfaces; skip making the binding table altogether. */
- if (stage_state->bind_bo_offset == 0 && devinfo->ver < 9)
- return;
-
- stage_state->bind_bo_offset = 0;
- } else {
- /* Upload a new binding table. */
- if (INTEL_DEBUG(DEBUG_SHADER_TIME)) {
- brw_emit_buffer_surface_state(
- brw, &stage_state->surf_offset[
- prog_data->binding_table.shader_time_start],
- brw->shader_time.bo, 0, ISL_FORMAT_RAW,
- brw->shader_time.bo->size, 1, RELOC_WRITE);
- }
- uint32_t *bind =
- brw_state_batch(brw, prog_data->binding_table.size_bytes,
- 32, &stage_state->bind_bo_offset);
-
- /* BRW_NEW_SURFACES and BRW_NEW_*_CONSTBUF */
- memcpy(bind, stage_state->surf_offset,
- prog_data->binding_table.size_bytes);
- }
-
- brw->ctx.NewDriverState |= BRW_NEW_BINDING_TABLE_POINTERS;
-
- if (devinfo->ver >= 7) {
- BEGIN_BATCH(2);
- OUT_BATCH(packet_name << 16 | (2 - 2));
- /* Align SurfaceStateOffset[16:6] format to [15:5] PS Binding Table field
- * when hw-generated binding table is enabled.
- */
- OUT_BATCH(stage_state->bind_bo_offset);
- ADVANCE_BATCH();
- }
-}
-
-/**
- * State atoms which upload the binding table for a particular shader stage.
- * @{
- */
-
-/** Upload the VS binding table. */
-static void
-brw_vs_upload_binding_table(struct brw_context *brw)
-{
- /* BRW_NEW_VS_PROG_DATA */
- const struct brw_stage_prog_data *prog_data = brw->vs.base.prog_data;
- brw_upload_binding_table(brw,
- _3DSTATE_BINDING_TABLE_POINTERS_VS,
- prog_data,
- &brw->vs.base);
-}
-
-const struct brw_tracked_state brw_vs_binding_table = {
- .dirty = {
- .mesa = 0,
- .brw = BRW_NEW_BATCH |
- BRW_NEW_BLORP |
- BRW_NEW_VS_CONSTBUF |
- BRW_NEW_VS_PROG_DATA |
- BRW_NEW_SURFACES,
- },
- .emit = brw_vs_upload_binding_table,
-};
-
-
-/** Upload the PS binding table. */
-static void
-brw_upload_wm_binding_table(struct brw_context *brw)
-{
- /* BRW_NEW_FS_PROG_DATA */
- const struct brw_stage_prog_data *prog_data = brw->wm.base.prog_data;
- brw_upload_binding_table(brw,
- _3DSTATE_BINDING_TABLE_POINTERS_PS,
- prog_data,
- &brw->wm.base);
-}
-
-const struct brw_tracked_state brw_wm_binding_table = {
- .dirty = {
- .mesa = 0,
- .brw = BRW_NEW_BATCH |
- BRW_NEW_BLORP |
- BRW_NEW_FS_PROG_DATA |
- BRW_NEW_SURFACES,
- },
- .emit = brw_upload_wm_binding_table,
-};
-
-/** Upload the TCS binding table (if tessellation stages are active). */
-static void
-brw_tcs_upload_binding_table(struct brw_context *brw)
-{
- /* Skip if the tessellation stages are disabled. */
- if (brw->programs[MESA_SHADER_TESS_EVAL] == NULL)
- return;
-
- /* BRW_NEW_TCS_PROG_DATA */
- const struct brw_stage_prog_data *prog_data = brw->tcs.base.prog_data;
- brw_upload_binding_table(brw,
- _3DSTATE_BINDING_TABLE_POINTERS_HS,
- prog_data,
- &brw->tcs.base);
-}
-
-const struct brw_tracked_state brw_tcs_binding_table = {
- .dirty = {
- .mesa = 0,
- .brw = BRW_NEW_BATCH |
- BRW_NEW_BLORP |
- BRW_NEW_DEFAULT_TESS_LEVELS |
- BRW_NEW_SURFACES |
- BRW_NEW_TCS_CONSTBUF |
- BRW_NEW_TCS_PROG_DATA,
- },
- .emit = brw_tcs_upload_binding_table,
-};
-
-/** Upload the TES binding table (if TES is active). */
-static void
-brw_tes_upload_binding_table(struct brw_context *brw)
-{
- /* If there's no TES, skip changing anything. */
- if (brw->programs[MESA_SHADER_TESS_EVAL] == NULL)
- return;
-
- /* BRW_NEW_TES_PROG_DATA */
- const struct brw_stage_prog_data *prog_data = brw->tes.base.prog_data;
- brw_upload_binding_table(brw,
- _3DSTATE_BINDING_TABLE_POINTERS_DS,
- prog_data,
- &brw->tes.base);
-}
-
-const struct brw_tracked_state brw_tes_binding_table = {
- .dirty = {
- .mesa = 0,
- .brw = BRW_NEW_BATCH |
- BRW_NEW_BLORP |
- BRW_NEW_SURFACES |
- BRW_NEW_TES_CONSTBUF |
- BRW_NEW_TES_PROG_DATA,
- },
- .emit = brw_tes_upload_binding_table,
-};
-
-/** Upload the GS binding table (if GS is active). */
-static void
-brw_gs_upload_binding_table(struct brw_context *brw)
-{
- /* If there's no GS, skip changing anything. */
- if (brw->programs[MESA_SHADER_GEOMETRY] == NULL)
- return;
-
- /* BRW_NEW_GS_PROG_DATA */
- const struct brw_stage_prog_data *prog_data = brw->gs.base.prog_data;
- brw_upload_binding_table(brw,
- _3DSTATE_BINDING_TABLE_POINTERS_GS,
- prog_data,
- &brw->gs.base);
-}
-
-const struct brw_tracked_state brw_gs_binding_table = {
- .dirty = {
- .mesa = 0,
- .brw = BRW_NEW_BATCH |
- BRW_NEW_BLORP |
- BRW_NEW_GS_CONSTBUF |
- BRW_NEW_GS_PROG_DATA |
- BRW_NEW_SURFACES,
- },
- .emit = brw_gs_upload_binding_table,
-};
-/** @} */
-
-/**
- * State atoms which emit 3DSTATE packets to update the binding table pointers.
- * @{
- */
-
-/**
- * (Gfx4-5) Upload the binding table pointers for all shader stages.
- *
- * The binding table pointers are relative to the surface state base address,
- * which points at the batchbuffer containing the streamed batch state.
- */
-static void
-gfx4_upload_binding_table_pointers(struct brw_context *brw)
-{
- BEGIN_BATCH(6);
- OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 | (6 - 2));
- OUT_BATCH(brw->vs.base.bind_bo_offset);
- OUT_BATCH(0); /* gs */
- OUT_BATCH(0); /* clip */
- OUT_BATCH(0); /* sf */
- OUT_BATCH(brw->wm.base.bind_bo_offset);
- ADVANCE_BATCH();
-}
-
-const struct brw_tracked_state brw_binding_table_pointers = {
- .dirty = {
- .mesa = 0,
- .brw = BRW_NEW_BATCH |
- BRW_NEW_BLORP |
- BRW_NEW_BINDING_TABLE_POINTERS |
- BRW_NEW_STATE_BASE_ADDRESS,
- },
- .emit = gfx4_upload_binding_table_pointers,
-};
-
-/**
- * (Sandybridge Only) Upload the binding table pointers for all shader stages.
- *
- * The binding table pointers are relative to the surface state base address,
- * which points at the batchbuffer containing the streamed batch state.
- */
-static void
-gfx6_upload_binding_table_pointers(struct brw_context *brw)
-{
- BEGIN_BATCH(4);
- OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 |
- GFX6_BINDING_TABLE_MODIFY_VS |
- GFX6_BINDING_TABLE_MODIFY_GS |
- GFX6_BINDING_TABLE_MODIFY_PS |
- (4 - 2));
- OUT_BATCH(brw->vs.base.bind_bo_offset); /* vs */
- if (brw->ff_gs.prog_active)
- OUT_BATCH(brw->ff_gs.bind_bo_offset); /* gs */
- else
- OUT_BATCH(brw->gs.base.bind_bo_offset); /* gs */
- OUT_BATCH(brw->wm.base.bind_bo_offset); /* wm/ps */
- ADVANCE_BATCH();
-}
-
-const struct brw_tracked_state gfx6_binding_table_pointers = {
- .dirty = {
- .mesa = 0,
- .brw = BRW_NEW_BATCH |
- BRW_NEW_BLORP |
- BRW_NEW_BINDING_TABLE_POINTERS |
- BRW_NEW_STATE_BASE_ADDRESS,
- },
- .emit = gfx6_upload_binding_table_pointers,
-};
-
-/** @} */
+++ /dev/null
-/*
- * Copyright 2003 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "main/mtypes.h"
-#include "main/blit.h"
-#include "main/context.h"
-#include "main/enums.h"
-#include "main/fbobject.h"
-
-#include "brw_context.h"
-#include "brw_defines.h"
-#include "brw_blit.h"
-#include "brw_buffers.h"
-#include "brw_fbo.h"
-#include "brw_batch.h"
-#include "brw_mipmap_tree.h"
-
-#define FILE_DEBUG_FLAG DEBUG_BLIT
-
-static void
-brw_miptree_set_alpha_to_one(struct brw_context *brw,
- struct brw_mipmap_tree *mt,
- int x, int y, int width, int height);
-
-static GLuint translate_raster_op(enum gl_logicop_mode logicop)
-{
- return logicop | (logicop << 4);
-}
-
-static uint32_t
-br13_for_cpp(int cpp)
-{
- switch (cpp) {
- case 16:
- return BR13_32323232;
- case 8:
- return BR13_16161616;
- case 4:
- return BR13_8888;
- case 2:
- return BR13_565;
- case 1:
- return BR13_8;
- default:
- unreachable("not reached");
- }
-}
-
-/**
- * Emits the packet for switching the blitter from X to Y tiled or back.
- *
- * This has to be called in a single BEGIN_BATCH_BLT_TILED() /
- * ADVANCE_BATCH_TILED(). This is because BCS_SWCTRL is saved and restored as
- * part of the power context, not a render context, and if the batchbuffer was
- * to get flushed between setting and blitting, or blitting and restoring, our
- * tiling state would leak into other unsuspecting applications (like the X
- * server).
- */
-static uint32_t *
-set_blitter_tiling(struct brw_context *brw,
- bool dst_y_tiled, bool src_y_tiled,
- uint32_t *__map)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- const unsigned n_dwords = devinfo->ver >= 8 ? 5 : 4;
- assert(devinfo->ver >= 6);
-
- /* Idle the blitter before we update how tiling is interpreted. */
- OUT_BATCH(MI_FLUSH_DW | (n_dwords - 2));
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- if (n_dwords == 5)
- OUT_BATCH(0);
-
- OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
- OUT_BATCH(BCS_SWCTRL);
- OUT_BATCH((BCS_SWCTRL_DST_Y | BCS_SWCTRL_SRC_Y) << 16 |
- (dst_y_tiled ? BCS_SWCTRL_DST_Y : 0) |
- (src_y_tiled ? BCS_SWCTRL_SRC_Y : 0));
- return __map;
-}
-#define SET_BLITTER_TILING(...) __map = set_blitter_tiling(__VA_ARGS__, __map)
-
-#define BEGIN_BATCH_BLT_TILED(n, dst_y_tiled, src_y_tiled) \
- unsigned set_tiling_batch_size = 0; \
- if (dst_y_tiled || src_y_tiled) { \
- if (devinfo->ver >= 8) \
- set_tiling_batch_size = 16; \
- else \
- set_tiling_batch_size = 14; \
- } \
- BEGIN_BATCH_BLT(n + set_tiling_batch_size); \
- if (dst_y_tiled || src_y_tiled) \
- SET_BLITTER_TILING(brw, dst_y_tiled, src_y_tiled)
-
-#define ADVANCE_BATCH_TILED(dst_y_tiled, src_y_tiled) \
- if (dst_y_tiled || src_y_tiled) \
- SET_BLITTER_TILING(brw, false, false); \
- ADVANCE_BATCH()
-
-bool
-brw_miptree_blit_compatible_formats(mesa_format src, mesa_format dst)
-{
- /* The BLT doesn't handle sRGB conversion */
- assert(src == _mesa_get_srgb_format_linear(src));
- assert(dst == _mesa_get_srgb_format_linear(dst));
-
- /* No swizzle or format conversions possible, except... */
- if (src == dst)
- return true;
-
- /* ...we can either discard the alpha channel when going from A->X,
- * or we can fill the alpha channel with 0xff when going from X->A
- */
- if (src == MESA_FORMAT_B8G8R8A8_UNORM || src == MESA_FORMAT_B8G8R8X8_UNORM)
- return (dst == MESA_FORMAT_B8G8R8A8_UNORM ||
- dst == MESA_FORMAT_B8G8R8X8_UNORM);
-
- if (src == MESA_FORMAT_R8G8B8A8_UNORM || src == MESA_FORMAT_R8G8B8X8_UNORM)
- return (dst == MESA_FORMAT_R8G8B8A8_UNORM ||
- dst == MESA_FORMAT_R8G8B8X8_UNORM);
-
- /* We can also discard alpha when going from A2->X2 for 2 bit alpha,
- * however we can't fill the alpha channel with two 1 bits when going
- * from X2->A2, because brw_miptree_set_alpha_to_one() is not yet
- * ready for this / can only handle 8 bit alpha.
- */
- if (src == MESA_FORMAT_B10G10R10A2_UNORM)
- return (dst == MESA_FORMAT_B10G10R10A2_UNORM ||
- dst == MESA_FORMAT_B10G10R10X2_UNORM);
-
- if (src == MESA_FORMAT_R10G10B10A2_UNORM)
- return (dst == MESA_FORMAT_R10G10B10A2_UNORM ||
- dst == MESA_FORMAT_R10G10B10X2_UNORM);
-
- return false;
-}
-
-static void
-get_blit_intratile_offset_el(const struct brw_context *brw,
- struct brw_mipmap_tree *mt,
- uint32_t total_x_offset_el,
- uint32_t total_y_offset_el,
- uint64_t *tile_offset_B,
- uint32_t *x_offset_el,
- uint32_t *y_offset_el)
-{
- ASSERTED uint32_t z_offset_el, array_offset;
- isl_tiling_get_intratile_offset_el(mt->surf.tiling, mt->surf.dim,
- mt->surf.msaa_layout,
- mt->cpp * 8, mt->surf.samples,
- mt->surf.row_pitch_B,
- mt->surf.array_pitch_el_rows,
- total_x_offset_el, total_y_offset_el, 0, 0,
- tile_offset_B,
- x_offset_el, y_offset_el,
- &z_offset_el, &array_offset);
- assert(z_offset_el == 0);
- assert(array_offset == 0);
-
- if (mt->surf.tiling == ISL_TILING_LINEAR) {
- /* From the Broadwell PRM docs for XY_SRC_COPY_BLT::SourceBaseAddress:
- *
- * "Base address of the destination surface: X=0, Y=0. Lower 32bits
- * of the 48bit addressing. When Src Tiling is enabled (Bit_15
- * enabled), this address must be 4KB-aligned. When Tiling is not
- * enabled, this address should be CL (64byte) aligned."
- *
- * The offsets we get from ISL in the tiled case are already aligned.
- * In the linear case, we need to do some of our own aligning.
- */
- uint32_t delta = *tile_offset_B & 63;
- assert(delta % mt->cpp == 0);
- *tile_offset_B -= delta;
- *x_offset_el += delta / mt->cpp;
- } else {
- assert(*tile_offset_B % 4096 == 0);
- }
-}
-
-static bool
-alignment_valid(struct brw_context *brw, unsigned offset,
- enum isl_tiling tiling)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- /* Tiled buffers must be page-aligned (4K). */
- if (tiling != ISL_TILING_LINEAR)
- return (offset & 4095) == 0;
-
- /* On Gfx8+, linear buffers must be cacheline-aligned. */
- if (devinfo->ver >= 8)
- return (offset & 63) == 0;
-
- return true;
-}
-
-static uint32_t
-xy_blit_cmd(enum isl_tiling src_tiling, enum isl_tiling dst_tiling,
- uint32_t cpp)
-{
- uint32_t CMD = 0;
-
- assert(cpp <= 4);
- switch (cpp) {
- case 1:
- case 2:
- CMD = XY_SRC_COPY_BLT_CMD;
- break;
- case 4:
- CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
- break;
- default:
- unreachable("not reached");
- }
-
- if (dst_tiling != ISL_TILING_LINEAR)
- CMD |= XY_DST_TILED;
-
- if (src_tiling != ISL_TILING_LINEAR)
- CMD |= XY_SRC_TILED;
-
- return CMD;
-}
-
-/* Copy BitBlt
- */
-static bool
-emit_copy_blit(struct brw_context *brw,
- GLuint cpp,
- int32_t src_pitch,
- struct brw_bo *src_buffer,
- GLuint src_offset,
- enum isl_tiling src_tiling,
- int32_t dst_pitch,
- struct brw_bo *dst_buffer,
- GLuint dst_offset,
- enum isl_tiling dst_tiling,
- GLshort src_x, GLshort src_y,
- GLshort dst_x, GLshort dst_y,
- GLshort w, GLshort h,
- enum gl_logicop_mode logic_op)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- GLuint CMD, BR13;
- int dst_y2 = dst_y + h;
- int dst_x2 = dst_x + w;
- bool dst_y_tiled = dst_tiling == ISL_TILING_Y0;
- bool src_y_tiled = src_tiling == ISL_TILING_Y0;
- uint32_t src_tile_w, src_tile_h;
- uint32_t dst_tile_w, dst_tile_h;
-
- if ((dst_y_tiled || src_y_tiled) && devinfo->ver < 6)
- return false;
-
- const unsigned bo_sizes = dst_buffer->size + src_buffer->size;
-
- /* do space check before going any further */
- if (!brw_batch_has_aperture_space(brw, bo_sizes))
- brw_batch_flush(brw);
-
- if (!brw_batch_has_aperture_space(brw, bo_sizes))
- return false;
-
- unsigned length = devinfo->ver >= 8 ? 10 : 8;
-
- brw_batch_require_space(brw, length * 4);
- DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
- __func__,
- src_buffer, src_pitch, src_offset, src_x, src_y,
- dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h);
-
- isl_get_tile_dims(src_tiling, cpp, &src_tile_w, &src_tile_h);
- isl_get_tile_dims(dst_tiling, cpp, &dst_tile_w, &dst_tile_h);
-
- /* For Tiled surfaces, the pitch has to be a multiple of the Tile width
- * (X direction width of the Tile). This is ensured while allocating the
- * buffer object.
- */
- assert(src_tiling == ISL_TILING_LINEAR || (src_pitch % src_tile_w) == 0);
- assert(dst_tiling == ISL_TILING_LINEAR || (dst_pitch % dst_tile_w) == 0);
-
- /* For big formats (such as floating point), do the copy using 16 or
- * 32bpp and multiply the coordinates.
- */
- if (cpp > 4) {
- if (cpp % 4 == 2) {
- dst_x *= cpp / 2;
- dst_x2 *= cpp / 2;
- src_x *= cpp / 2;
- cpp = 2;
- } else {
- assert(cpp % 4 == 0);
- dst_x *= cpp / 4;
- dst_x2 *= cpp / 4;
- src_x *= cpp / 4;
- cpp = 4;
- }
- }
-
- if (!alignment_valid(brw, dst_offset, dst_tiling))
- return false;
- if (!alignment_valid(brw, src_offset, src_tiling))
- return false;
-
- /* Blit pitch must be dword-aligned. Otherwise, the hardware appears to drop
- * the low bits. Offsets must be naturally aligned.
- */
- if (src_pitch % 4 != 0 || src_offset % cpp != 0 ||
- dst_pitch % 4 != 0 || dst_offset % cpp != 0)
- return false;
-
- assert(cpp <= 4);
- BR13 = br13_for_cpp(cpp) | translate_raster_op(logic_op) << 16;
-
- CMD = xy_blit_cmd(src_tiling, dst_tiling, cpp);
-
- /* For tiled source and destination, pitch value should be specified
- * as a number of Dwords.
- */
- if (dst_tiling != ISL_TILING_LINEAR)
- dst_pitch /= 4;
-
- if (src_tiling != ISL_TILING_LINEAR)
- src_pitch /= 4;
-
- if (dst_y2 <= dst_y || dst_x2 <= dst_x)
- return true;
-
- assert(dst_x < dst_x2);
- assert(dst_y < dst_y2);
-
- BEGIN_BATCH_BLT_TILED(length, dst_y_tiled, src_y_tiled);
- OUT_BATCH(CMD | (length - 2));
- OUT_BATCH(BR13 | (uint16_t)dst_pitch);
- OUT_BATCH(SET_FIELD(dst_y, BLT_Y) | SET_FIELD(dst_x, BLT_X));
- OUT_BATCH(SET_FIELD(dst_y2, BLT_Y) | SET_FIELD(dst_x2, BLT_X));
- if (devinfo->ver >= 8) {
- OUT_RELOC64(dst_buffer, RELOC_WRITE, dst_offset);
- } else {
- OUT_RELOC(dst_buffer, RELOC_WRITE, dst_offset);
- }
- OUT_BATCH(SET_FIELD(src_y, BLT_Y) | SET_FIELD(src_x, BLT_X));
- OUT_BATCH((uint16_t)src_pitch);
- if (devinfo->ver >= 8) {
- OUT_RELOC64(src_buffer, 0, src_offset);
- } else {
- OUT_RELOC(src_buffer, 0, src_offset);
- }
-
- ADVANCE_BATCH_TILED(dst_y_tiled, src_y_tiled);
-
- brw_emit_mi_flush(brw);
-
- return true;
-}
-
-static bool
-emit_miptree_blit(struct brw_context *brw,
- struct brw_mipmap_tree *src_mt,
- uint32_t src_x, uint32_t src_y,
- struct brw_mipmap_tree *dst_mt,
- uint32_t dst_x, uint32_t dst_y,
- uint32_t width, uint32_t height,
- bool reverse, enum gl_logicop_mode logicop)
-{
- /* According to the Ivy Bridge PRM, Vol1 Part4, section 1.2.1.2 (Graphics
- * Data Size Limitations):
- *
- * The BLT engine is capable of transferring very large quantities of
- * graphics data. Any graphics data read from and written to the
- * destination is permitted to represent a number of pixels that
- * occupies up to 65,536 scan lines and up to 32,768 bytes per scan line
- * at the destination. The maximum number of pixels that may be
- * represented per scan line’s worth of graphics data depends on the
- * color depth.
- *
- * The blitter's pitch is a signed 16-bit integer, but measured in bytes
- * for linear surfaces and DWords for tiled surfaces. So the maximum
- * pitch is 32k linear and 128k tiled.
- */
- if (brw_miptree_blt_pitch(src_mt) >= 32768 ||
- brw_miptree_blt_pitch(dst_mt) >= 32768) {
- perf_debug("Falling back due to >= 32k/128k pitch\n");
- return false;
- }
-
- /* We need to split the blit into chunks that each fit within the blitter's
- * restrictions. We can't use a chunk size of 32768 because we need to
- * ensure that src_tile_x + chunk_size fits. We choose 16384 because it's
- * a nice round power of two, big enough that performance won't suffer, and
- * small enough to guarantee everything fits.
- */
- const uint32_t max_chunk_size = 16384;
-
- for (uint32_t chunk_x = 0; chunk_x < width; chunk_x += max_chunk_size) {
- for (uint32_t chunk_y = 0; chunk_y < height; chunk_y += max_chunk_size) {
- const uint32_t chunk_w = MIN2(max_chunk_size, width - chunk_x);
- const uint32_t chunk_h = MIN2(max_chunk_size, height - chunk_y);
-
- uint64_t src_offset;
- uint32_t src_tile_x, src_tile_y;
- get_blit_intratile_offset_el(brw, src_mt,
- src_x + chunk_x, src_y + chunk_y,
- &src_offset, &src_tile_x, &src_tile_y);
-
- uint64_t dst_offset;
- uint32_t dst_tile_x, dst_tile_y;
- get_blit_intratile_offset_el(brw, dst_mt,
- dst_x + chunk_x, dst_y + chunk_y,
- &dst_offset, &dst_tile_x, &dst_tile_y);
-
- if (!emit_copy_blit(brw,
- src_mt->cpp,
- reverse ? -src_mt->surf.row_pitch_B :
- src_mt->surf.row_pitch_B,
- src_mt->bo, src_mt->offset + src_offset,
- src_mt->surf.tiling,
- dst_mt->surf.row_pitch_B,
- dst_mt->bo, dst_mt->offset + dst_offset,
- dst_mt->surf.tiling,
- src_tile_x, src_tile_y,
- dst_tile_x, dst_tile_y,
- chunk_w, chunk_h,
- logicop)) {
- /* If this is ever going to fail, it will fail on the first chunk */
- assert(chunk_x == 0 && chunk_y == 0);
- return false;
- }
- }
- }
-
- return true;
-}
-
-/**
- * Implements a rectangular block transfer (blit) of pixels between two
- * miptrees.
- *
- * Our blitter can operate on 1, 2, or 4-byte-per-pixel data, with generous,
- * but limited, pitches and sizes allowed.
- *
- * The src/dst coordinates are relative to the given level/slice of the
- * miptree.
- *
- * If @src_flip or @dst_flip is set, then the rectangle within that miptree
- * will be inverted (including scanline order) when copying. This is common
- * in GL when copying between window system and user-created
- * renderbuffers/textures.
- */
-bool
-brw_miptree_blit(struct brw_context *brw,
- struct brw_mipmap_tree *src_mt,
- int src_level, int src_slice,
- uint32_t src_x, uint32_t src_y, bool src_flip,
- struct brw_mipmap_tree *dst_mt,
- int dst_level, int dst_slice,
- uint32_t dst_x, uint32_t dst_y, bool dst_flip,
- uint32_t width, uint32_t height,
- enum gl_logicop_mode logicop)
-{
- /* The blitter doesn't understand multisampling at all. */
- if (src_mt->surf.samples > 1 || dst_mt->surf.samples > 1)
- return false;
-
- /* No sRGB decode or encode is done by the hardware blitter, which is
- * consistent with what we want in many callers (glCopyTexSubImage(),
- * texture validation, etc.).
- */
- mesa_format src_format = _mesa_get_srgb_format_linear(src_mt->format);
- mesa_format dst_format = _mesa_get_srgb_format_linear(dst_mt->format);
-
- /* The blitter doesn't support doing any format conversions. We do also
- * support blitting ARGB8888 to XRGB8888 (trivial, the values dropped into
- * the X channel don't matter), and XRGB8888 to ARGB8888 by setting the A
- * channel to 1.0 at the end. Also trivially ARGB2101010 to XRGB2101010,
- * but not XRGB2101010 to ARGB2101010 yet.
- */
- if (!brw_miptree_blit_compatible_formats(src_format, dst_format)) {
- perf_debug("%s: Can't use hardware blitter from %s to %s, "
- "falling back.\n", __func__,
- _mesa_get_format_name(src_format),
- _mesa_get_format_name(dst_format));
- return false;
- }
-
- /* The blitter has no idea about HiZ or fast color clears, so we need to
- * resolve the miptrees before we do anything.
- */
- brw_miptree_access_raw(brw, src_mt, src_level, src_slice, false);
- brw_miptree_access_raw(brw, dst_mt, dst_level, dst_slice, true);
-
- if (src_flip) {
- const unsigned h0 = src_mt->surf.phys_level0_sa.height;
- src_y = minify(h0, src_level - src_mt->first_level) - src_y - height;
- }
-
- if (dst_flip) {
- const unsigned h0 = dst_mt->surf.phys_level0_sa.height;
- dst_y = minify(h0, dst_level - dst_mt->first_level) - dst_y - height;
- }
-
- uint32_t src_image_x, src_image_y, dst_image_x, dst_image_y;
- brw_miptree_get_image_offset(src_mt, src_level, src_slice,
- &src_image_x, &src_image_y);
- brw_miptree_get_image_offset(dst_mt, dst_level, dst_slice,
- &dst_image_x, &dst_image_y);
- src_x += src_image_x;
- src_y += src_image_y;
- dst_x += dst_image_x;
- dst_y += dst_image_y;
-
- if (!emit_miptree_blit(brw, src_mt, src_x, src_y,
- dst_mt, dst_x, dst_y, width, height,
- src_flip != dst_flip, logicop)) {
- return false;
- }
-
- /* XXX This could be done in a single pass using XY_FULL_MONO_PATTERN_BLT */
- if (_mesa_get_format_bits(src_format, GL_ALPHA_BITS) == 0 &&
- _mesa_get_format_bits(dst_format, GL_ALPHA_BITS) > 0) {
- brw_miptree_set_alpha_to_one(brw, dst_mt, dst_x, dst_y, width, height);
- }
-
- return true;
-}
-
-bool
-brw_miptree_copy(struct brw_context *brw,
- struct brw_mipmap_tree *src_mt,
- int src_level, int src_slice,
- uint32_t src_x, uint32_t src_y,
- struct brw_mipmap_tree *dst_mt,
- int dst_level, int dst_slice,
- uint32_t dst_x, uint32_t dst_y,
- uint32_t src_width, uint32_t src_height)
-{
- /* The blitter doesn't understand multisampling at all. */
- if (src_mt->surf.samples > 1 || dst_mt->surf.samples > 1)
- return false;
-
- if (src_mt->format == MESA_FORMAT_S_UINT8)
- return false;
-
- /* The blitter has no idea about HiZ or fast color clears, so we need to
- * resolve the miptrees before we do anything.
- */
- brw_miptree_access_raw(brw, src_mt, src_level, src_slice, false);
- brw_miptree_access_raw(brw, dst_mt, dst_level, dst_slice, true);
-
- uint32_t src_image_x, src_image_y;
- brw_miptree_get_image_offset(src_mt, src_level, src_slice,
- &src_image_x, &src_image_y);
-
- if (_mesa_is_format_compressed(src_mt->format)) {
- GLuint bw, bh;
- _mesa_get_format_block_size(src_mt->format, &bw, &bh);
-
- /* Compressed textures need not have dimensions that are a multiple of
- * the block size. Rectangles in compressed textures do need to be a
- * multiple of the block size. The one exception is that the right and
- * bottom edges may be at the right or bottom edge of the miplevel even
- * if it's not aligned.
- */
- assert(src_x % bw == 0);
- assert(src_y % bh == 0);
-
- assert(src_width % bw == 0 ||
- src_x + src_width ==
- minify(src_mt->surf.logical_level0_px.width, src_level));
- assert(src_height % bh == 0 ||
- src_y + src_height ==
- minify(src_mt->surf.logical_level0_px.height, src_level));
-
- src_x /= (int)bw;
- src_y /= (int)bh;
- src_width = DIV_ROUND_UP(src_width, (int)bw);
- src_height = DIV_ROUND_UP(src_height, (int)bh);
- }
- src_x += src_image_x;
- src_y += src_image_y;
-
- uint32_t dst_image_x, dst_image_y;
- brw_miptree_get_image_offset(dst_mt, dst_level, dst_slice,
- &dst_image_x, &dst_image_y);
-
- if (_mesa_is_format_compressed(dst_mt->format)) {
- GLuint bw, bh;
- _mesa_get_format_block_size(dst_mt->format, &bw, &bh);
-
- assert(dst_x % bw == 0);
- assert(dst_y % bh == 0);
-
- dst_x /= (int)bw;
- dst_y /= (int)bh;
- }
- dst_x += dst_image_x;
- dst_y += dst_image_y;
-
- return emit_miptree_blit(brw, src_mt, src_x, src_y,
- dst_mt, dst_x, dst_y,
- src_width, src_height, false, COLOR_LOGICOP_COPY);
-}
-
-bool
-brw_emit_immediate_color_expand_blit(struct brw_context *brw,
- GLuint cpp,
- GLubyte *src_bits, GLuint src_size,
- GLuint fg_color,
- GLshort dst_pitch,
- struct brw_bo *dst_buffer,
- GLuint dst_offset,
- enum isl_tiling dst_tiling,
- GLshort x, GLshort y,
- GLshort w, GLshort h,
- enum gl_logicop_mode logic_op)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- int dwords = ALIGN(src_size, 8) / 4;
- uint32_t opcode, br13, blit_cmd;
-
- if (dst_tiling != ISL_TILING_LINEAR) {
- if (dst_offset & 4095)
- return false;
- if (dst_tiling == ISL_TILING_Y0)
- return false;
- }
-
- assert((unsigned) logic_op <= 0x0f);
- assert(dst_pitch > 0);
-
- if (w < 0 || h < 0)
- return true;
-
- DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d, %d bytes %d dwords\n",
- __func__,
- dst_buffer, dst_pitch, dst_offset, x, y, w, h, src_size, dwords);
-
- unsigned xy_setup_blt_length = devinfo->ver >= 8 ? 10 : 8;
- brw_batch_require_space(brw, (xy_setup_blt_length * 4) +
- (3 * 4) + dwords * 4);
-
- opcode = XY_SETUP_BLT_CMD;
- if (cpp == 4)
- opcode |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
- if (dst_tiling != ISL_TILING_LINEAR) {
- opcode |= XY_DST_TILED;
- dst_pitch /= 4;
- }
-
- br13 = dst_pitch | (translate_raster_op(logic_op) << 16) | (1 << 29);
- br13 |= br13_for_cpp(cpp);
-
- blit_cmd = XY_TEXT_IMMEDIATE_BLIT_CMD | XY_TEXT_BYTE_PACKED; /* packing? */
- if (dst_tiling != ISL_TILING_LINEAR)
- blit_cmd |= XY_DST_TILED;
-
- BEGIN_BATCH_BLT(xy_setup_blt_length + 3);
- OUT_BATCH(opcode | (xy_setup_blt_length - 2));
- OUT_BATCH(br13);
- OUT_BATCH((0 << 16) | 0); /* clip x1, y1 */
- OUT_BATCH((100 << 16) | 100); /* clip x2, y2 */
- if (devinfo->ver >= 8) {
- OUT_RELOC64(dst_buffer, RELOC_WRITE, dst_offset);
- } else {
- OUT_RELOC(dst_buffer, RELOC_WRITE, dst_offset);
- }
- OUT_BATCH(0); /* bg */
- OUT_BATCH(fg_color); /* fg */
- OUT_BATCH(0); /* pattern base addr */
- if (devinfo->ver >= 8)
- OUT_BATCH(0);
-
- OUT_BATCH(blit_cmd | ((3 - 2) + dwords));
- OUT_BATCH(SET_FIELD(y, BLT_Y) | SET_FIELD(x, BLT_X));
- OUT_BATCH(SET_FIELD(y + h, BLT_Y) | SET_FIELD(x + w, BLT_X));
- ADVANCE_BATCH();
-
- brw_batch_data(brw, src_bits, dwords * 4);
-
- brw_emit_mi_flush(brw);
-
- return true;
-}
-
-/**
- * Used to initialize the alpha value of an ARGB8888 miptree after copying
- * into it from an XRGB8888 source.
- *
- * This is very common with glCopyTexImage2D(). Note that the coordinates are
- * relative to the start of the miptree, not relative to a slice within the
- * miptree.
- */
-static void
-brw_miptree_set_alpha_to_one(struct brw_context *brw,
- struct brw_mipmap_tree *mt,
- int x, int y, int width, int height)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- uint32_t BR13, CMD;
- int pitch, cpp;
-
- pitch = mt->surf.row_pitch_B;
- cpp = mt->cpp;
-
- DBG("%s dst:buf(%p)/%d %d,%d sz:%dx%d\n",
- __func__, mt->bo, pitch, x, y, width, height);
-
- /* Note: Currently only handles 8 bit alpha channel. Extension to < 8 Bit
- * alpha channel would be likely possible via ROP code 0xfa instead of 0xf0
- * and writing a suitable bit-mask instead of 0xffffffff.
- */
- BR13 = br13_for_cpp(cpp) | 0xf0 << 16;
- CMD = XY_COLOR_BLT_CMD;
- CMD |= XY_BLT_WRITE_ALPHA;
-
- if (mt->surf.tiling != ISL_TILING_LINEAR) {
- CMD |= XY_DST_TILED;
- pitch /= 4;
- }
- BR13 |= pitch;
-
- /* do space check before going any further */
- if (!brw_batch_has_aperture_space(brw, mt->bo->size))
- brw_batch_flush(brw);
-
- unsigned length = devinfo->ver >= 8 ? 7 : 6;
- const bool dst_y_tiled = mt->surf.tiling == ISL_TILING_Y0;
-
- /* We need to split the blit into chunks that each fit within the blitter's
- * restrictions. We can't use a chunk size of 32768 because we need to
- * ensure that src_tile_x + chunk_size fits. We choose 16384 because it's
- * a nice round power of two, big enough that performance won't suffer, and
- * small enough to guarantee everything fits.
- */
- const uint32_t max_chunk_size = 16384;
-
- for (uint32_t chunk_x = 0; chunk_x < width; chunk_x += max_chunk_size) {
- for (uint32_t chunk_y = 0; chunk_y < height; chunk_y += max_chunk_size) {
- const uint32_t chunk_w = MIN2(max_chunk_size, width - chunk_x);
- const uint32_t chunk_h = MIN2(max_chunk_size, height - chunk_y);
-
- uint64_t offset_B;
- uint32_t tile_x, tile_y;
- get_blit_intratile_offset_el(brw, mt,
- x + chunk_x, y + chunk_y,
- &offset_B, &tile_x, &tile_y);
-
- BEGIN_BATCH_BLT_TILED(length, dst_y_tiled, false);
- OUT_BATCH(CMD | (length - 2));
- OUT_BATCH(BR13);
- OUT_BATCH(SET_FIELD(y + chunk_y, BLT_Y) |
- SET_FIELD(x + chunk_x, BLT_X));
- OUT_BATCH(SET_FIELD(y + chunk_y + chunk_h, BLT_Y) |
- SET_FIELD(x + chunk_x + chunk_w, BLT_X));
- if (devinfo->ver >= 8) {
- OUT_RELOC64(mt->bo, RELOC_WRITE, mt->offset + offset_B);
- } else {
- OUT_RELOC(mt->bo, RELOC_WRITE, mt->offset + offset_B);
- }
- OUT_BATCH(0xffffffff); /* white, but only alpha gets written */
- ADVANCE_BATCH_TILED(dst_y_tiled, false);
- }
- }
-
- brw_emit_mi_flush(brw);
-}
+++ /dev/null
-/*
- * Copyright 2003 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef BRW_BLIT_H
-#define BRW_BLIT_H
-
-#include "brw_context.h"
-
-bool brw_miptree_blit_compatible_formats(mesa_format src, mesa_format dst);
-
-bool brw_miptree_blit(struct brw_context *brw,
- struct brw_mipmap_tree *src_mt,
- int src_level, int src_slice,
- uint32_t src_x, uint32_t src_y, bool src_flip,
- struct brw_mipmap_tree *dst_mt,
- int dst_level, int dst_slice,
- uint32_t dst_x, uint32_t dst_y, bool dst_flip,
- uint32_t width, uint32_t height,
- enum gl_logicop_mode logicop);
-
-bool brw_miptree_copy(struct brw_context *brw,
- struct brw_mipmap_tree *src_mt,
- int src_level, int src_slice,
- uint32_t src_x, uint32_t src_y,
- struct brw_mipmap_tree *dst_mt,
- int dst_level, int dst_slice,
- uint32_t dst_x, uint32_t dst_y,
- uint32_t src_width, uint32_t src_height);
-
-bool
-brw_emit_immediate_color_expand_blit(struct brw_context *brw,
- GLuint cpp,
- GLubyte *src_bits, GLuint src_size,
- GLuint fg_color,
- GLshort dst_pitch,
- struct brw_bo *dst_buffer,
- GLuint dst_offset,
- enum isl_tiling dst_tiling,
- GLshort x, GLshort y,
- GLshort w, GLshort h,
- enum gl_logicop_mode logic_op);
-
-#endif
+++ /dev/null
-/*
- * Copyright © 2012 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "main/context.h"
-#include "main/teximage.h"
-#include "main/blend.h"
-#include "main/bufferobj.h"
-#include "main/enums.h"
-#include "main/fbobject.h"
-#include "main/image.h"
-#include "main/renderbuffer.h"
-#include "main/glformats.h"
-
-#include "brw_blorp.h"
-#include "brw_context.h"
-#include "brw_defines.h"
-#include "brw_meta_util.h"
-#include "brw_state.h"
-#include "brw_buffer_objects.h"
-#include "brw_fbo.h"
-#include "dev/intel_debug.h"
-
-#define FILE_DEBUG_FLAG DEBUG_BLORP
-
-static bool
-brw_blorp_lookup_shader(struct blorp_batch *batch,
- const void *key, uint32_t key_size,
- uint32_t *kernel_out, void *prog_data_out)
-{
- struct brw_context *brw = batch->driver_batch;
- return brw_search_cache(&brw->cache, BRW_CACHE_BLORP_PROG, key, key_size,
- kernel_out, prog_data_out, true);
-}
-
-static bool
-brw_blorp_upload_shader(struct blorp_batch *batch, uint32_t stage,
- const void *key, uint32_t key_size,
- const void *kernel, uint32_t kernel_size,
- const struct brw_stage_prog_data *prog_data,
- uint32_t prog_data_size,
- uint32_t *kernel_out, void *prog_data_out)
-{
- struct brw_context *brw = batch->driver_batch;
- brw_upload_cache(&brw->cache, BRW_CACHE_BLORP_PROG, key, key_size,
- kernel, kernel_size, prog_data, prog_data_size,
- kernel_out, prog_data_out);
- return true;
-}
-
-void
-brw_blorp_init(struct brw_context *brw)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- blorp_init(&brw->blorp, brw, &brw->isl_dev, NULL);
-
- brw->blorp.compiler = brw->screen->compiler;
-
- switch (devinfo->ver) {
- case 4:
- if (devinfo->verx10 == 45) {
- brw->blorp.exec = gfx45_blorp_exec;
- } else {
- brw->blorp.exec = gfx4_blorp_exec;
- }
- break;
- case 5:
- brw->blorp.exec = gfx5_blorp_exec;
- break;
- case 6:
- brw->blorp.exec = gfx6_blorp_exec;
- break;
- case 7:
- if (devinfo->verx10 == 75) {
- brw->blorp.exec = gfx75_blorp_exec;
- } else {
- brw->blorp.exec = gfx7_blorp_exec;
- }
- break;
- case 8:
- brw->blorp.exec = gfx8_blorp_exec;
- break;
- case 9:
- brw->blorp.exec = gfx9_blorp_exec;
- break;
- case 11:
- brw->blorp.exec = gfx11_blorp_exec;
- break;
-
- default:
- unreachable("Invalid gen");
- }
-
- brw->blorp.lookup_shader = brw_blorp_lookup_shader;
- brw->blorp.upload_shader = brw_blorp_upload_shader;
-}
-
-static void
-blorp_surf_for_miptree(struct brw_context *brw,
- struct blorp_surf *surf,
- const struct brw_mipmap_tree *mt,
- enum isl_aux_usage aux_usage,
- bool is_render_target,
- unsigned *level,
- unsigned start_layer, unsigned num_layers)
-{
- if (mt->surf.msaa_layout == ISL_MSAA_LAYOUT_ARRAY) {
- const unsigned num_samples = mt->surf.samples;
- for (unsigned i = 0; i < num_layers; i++) {
- for (unsigned s = 0; s < num_samples; s++) {
- const unsigned phys_layer = (start_layer + i) * num_samples + s;
- brw_miptree_check_level_layer(mt, *level, phys_layer);
- }
- }
- } else {
- for (unsigned i = 0; i < num_layers; i++)
- brw_miptree_check_level_layer(mt, *level, start_layer + i);
- }
-
- *surf = (struct blorp_surf) {
- .surf = &mt->surf,
- .addr = (struct blorp_address) {
- .buffer = mt->bo,
- .offset = mt->offset,
- .reloc_flags = is_render_target ? EXEC_OBJECT_WRITE : 0,
- .mocs = brw_mocs(&brw->isl_dev, mt->bo),
- },
- .aux_usage = aux_usage,
- .tile_x_sa = mt->level[*level].level_x,
- .tile_y_sa = mt->level[*level].level_y,
- };
-
- if (surf->aux_usage == ISL_AUX_USAGE_HIZ &&
- !brw_miptree_level_has_hiz(mt, *level))
- surf->aux_usage = ISL_AUX_USAGE_NONE;
-
- if (surf->aux_usage != ISL_AUX_USAGE_NONE) {
- /* We only really need a clear color if we also have an auxiliary
- * surface. Without one, it does nothing.
- */
- surf->clear_color =
- brw_miptree_get_clear_color(mt, (struct brw_bo **)
- &surf->clear_color_addr.buffer,
- &surf->clear_color_addr.offset);
-
- surf->aux_surf = &mt->aux_buf->surf;
- surf->aux_addr = (struct blorp_address) {
- .reloc_flags = is_render_target ? EXEC_OBJECT_WRITE : 0,
- .mocs = surf->addr.mocs,
- };
-
- surf->aux_addr.buffer = mt->aux_buf->bo;
- surf->aux_addr.offset = mt->aux_buf->offset;
- } else {
- surf->aux_addr = (struct blorp_address) {
- .buffer = NULL,
- };
- memset(&surf->clear_color, 0, sizeof(surf->clear_color));
- }
- assert((surf->aux_usage == ISL_AUX_USAGE_NONE) ==
- (surf->aux_addr.buffer == NULL));
-
- if (!is_render_target && brw->screen->devinfo.ver == 9)
- gfx9_apply_single_tex_astc5x5_wa(brw, mt->format, surf->aux_usage);
-
- /* ISL wants real levels, not offset ones. */
- *level -= mt->first_level;
-}
-
-static bool
-brw_blorp_supports_dst_format(struct brw_context *brw, mesa_format format)
-{
- /* If it's renderable, it's definitely supported. */
- if (brw->mesa_format_supports_render[format])
- return true;
-
- /* BLORP can't compress anything */
- if (_mesa_is_format_compressed(format))
- return false;
-
- /* No exotic formats such as GL_LUMINANCE_ALPHA */
- if (_mesa_get_format_bits(format, GL_RED_BITS) == 0 &&
- _mesa_get_format_bits(format, GL_DEPTH_BITS) == 0 &&
- _mesa_get_format_bits(format, GL_STENCIL_BITS) == 0)
- return false;
-
- return true;
-}
-
-static enum isl_format
-brw_blorp_to_isl_format(struct brw_context *brw, mesa_format format,
- bool is_render_target)
-{
- switch (format) {
- case MESA_FORMAT_NONE:
- return ISL_FORMAT_UNSUPPORTED;
- case MESA_FORMAT_S_UINT8:
- return ISL_FORMAT_R8_UINT;
- case MESA_FORMAT_Z24_UNORM_X8_UINT:
- case MESA_FORMAT_Z24_UNORM_S8_UINT:
- return ISL_FORMAT_R24_UNORM_X8_TYPELESS;
- case MESA_FORMAT_Z_FLOAT32:
- case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
- return ISL_FORMAT_R32_FLOAT;
- case MESA_FORMAT_Z_UNORM16:
- return ISL_FORMAT_R16_UNORM;
- default:
- if (is_render_target) {
- assert(brw_blorp_supports_dst_format(brw, format));
- if (brw->mesa_format_supports_render[format]) {
- return brw->mesa_to_isl_render_format[format];
- } else {
- return brw_isl_format_for_mesa_format(format);
- }
- } else {
- /* Some destinations (is_render_target == true) are supported by
- * blorp even though we technically can't render to them.
- */
- return brw_isl_format_for_mesa_format(format);
- }
- }
-}
-
-/**
- * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gfx7.5+
- * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED). The mappings are
- *
- * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
- * 0 1 2 3 4 5
- * 4 5 6 7 0 1
- * SCS_RED, SCS_GREEN, SCS_BLUE, SCS_ALPHA, SCS_ZERO, SCS_ONE
- *
- * which is simply adding 4 then modding by 8 (or anding with 7).
- *
- * We then may need to apply workarounds for textureGather hardware bugs.
- */
-static enum isl_channel_select
-swizzle_to_scs(GLenum swizzle)
-{
- return (enum isl_channel_select)((swizzle + 4) & 7);
-}
-
-/**
- * Note: if the src (or dst) is a 2D multisample array texture on Gfx7+ using
- * INTEL_MSAA_LAYOUT_UMS or INTEL_MSAA_LAYOUT_CMS, src_layer (dst_layer) is
- * the physical layer holding sample 0. So, for example, if
- * src_mt->surf.samples == 4, then logical layer n corresponds to src_layer ==
- * 4*n.
- */
-void
-brw_blorp_blit_miptrees(struct brw_context *brw,
- struct brw_mipmap_tree *src_mt,
- unsigned src_level, unsigned src_layer,
- mesa_format src_format, int src_swizzle,
- struct brw_mipmap_tree *dst_mt,
- unsigned dst_level, unsigned dst_layer,
- mesa_format dst_format,
- float src_x0, float src_y0,
- float src_x1, float src_y1,
- float dst_x0, float dst_y0,
- float dst_x1, float dst_y1,
- GLenum gl_filter, bool mirror_x, bool mirror_y,
- bool decode_srgb, bool encode_srgb)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- DBG("%s from %dx %s mt %p %d %d (%f,%f) (%f,%f) "
- "to %dx %s mt %p %d %d (%f,%f) (%f,%f) (flip %d,%d)\n",
- __func__,
- src_mt->surf.samples, _mesa_get_format_name(src_mt->format), src_mt,
- src_level, src_layer, src_x0, src_y0, src_x1, src_y1,
- dst_mt->surf.samples, _mesa_get_format_name(dst_mt->format), dst_mt,
- dst_level, dst_layer, dst_x0, dst_y0, dst_x1, dst_y1,
- mirror_x, mirror_y);
-
- if (src_format == MESA_FORMAT_NONE)
- src_format = src_mt->format;
-
- if (dst_format == MESA_FORMAT_NONE)
- dst_format = dst_mt->format;
-
- if (!decode_srgb)
- src_format = _mesa_get_srgb_format_linear(src_format);
-
- if (!encode_srgb)
- dst_format = _mesa_get_srgb_format_linear(dst_format);
-
- /* When doing a multisample resolve of a GL_LUMINANCE32F or GL_INTENSITY32F
- * texture, the above code configures the source format for L32_FLOAT or
- * I32_FLOAT, and the destination format for R32_FLOAT. On Sandy Bridge,
- * the SAMPLE message appears to handle multisampled L32_FLOAT and
- * I32_FLOAT textures incorrectly, resulting in blocky artifacts. So work
- * around the problem by using a source format of R32_FLOAT. This
- * shouldn't affect rendering correctness, since the destination format is
- * R32_FLOAT, so only the contents of the red channel matters.
- */
- if (devinfo->ver == 6 &&
- src_mt->surf.samples > 1 && dst_mt->surf.samples <= 1 &&
- src_mt->format == dst_mt->format &&
- (dst_format == MESA_FORMAT_L_FLOAT32 ||
- dst_format == MESA_FORMAT_I_FLOAT32)) {
- src_format = dst_format = MESA_FORMAT_R_FLOAT32;
- }
-
- enum blorp_filter blorp_filter;
- if (fabsf(dst_x1 - dst_x0) == fabsf(src_x1 - src_x0) &&
- fabsf(dst_y1 - dst_y0) == fabsf(src_y1 - src_y0)) {
- if (src_mt->surf.samples > 1 && dst_mt->surf.samples <= 1) {
- /* From the OpenGL ES 3.2 specification, section 16.2.1:
- *
- * "If the read framebuffer is multisampled (its effective value
- * of SAMPLE_BUFFERS is one) and the draw framebuffer is not (its
- * value of SAMPLE_BUFFERS is zero), the samples corresponding to
- * each pixel location in the source are converted to a single
- * sample before being written to the destination. The filter
- * parameter is ignored. If the source formats are integer types
- * or stencil values, a single sample’s value is selected for each
- * pixel. If the source formats are floating-point or normalized
- * types, the sample values for each pixel are resolved in an
- * implementation-dependent manner. If the source formats are
- * depth values, sample values are resolved in an implementation-
- * dependent manner where the result will be between the minimum
- * and maximum depth values in the pixel."
- *
- * For depth and stencil resolves, we choose to always use the value
- * at sample 0.
- */
- GLenum base_format = _mesa_get_format_base_format(src_mt->format);
- if (base_format == GL_DEPTH_COMPONENT ||
- base_format == GL_STENCIL_INDEX ||
- base_format == GL_DEPTH_STENCIL ||
- _mesa_is_format_integer(src_mt->format)) {
- /* The OpenGL ES 3.2 spec says:
- *
- * "If the source formats are integer types or stencil values,
- * a single sample's value is selected for each pixel."
- *
- * Just take sample 0 in this case.
- */
- blorp_filter = BLORP_FILTER_SAMPLE_0;
- } else {
- blorp_filter = BLORP_FILTER_AVERAGE;
- }
- } else {
- /* From the OpenGL 4.6 specification, section 18.3.1:
- *
- * "If the source and destination dimensions are identical, no
- * filtering is applied."
- *
- * Using BLORP_FILTER_NONE will also handle the upsample case by
- * replicating the one value in the source to all values in the
- * destination.
- */
- blorp_filter = BLORP_FILTER_NONE;
- }
- } else if (gl_filter == GL_LINEAR ||
- gl_filter == GL_SCALED_RESOLVE_FASTEST_EXT ||
- gl_filter == GL_SCALED_RESOLVE_NICEST_EXT) {
- blorp_filter = BLORP_FILTER_BILINEAR;
- } else {
- blorp_filter = BLORP_FILTER_NEAREST;
- }
-
- enum isl_format src_isl_format =
- brw_blorp_to_isl_format(brw, src_format, false);
- enum isl_aux_usage src_aux_usage =
- brw_miptree_texture_aux_usage(brw, src_mt, src_isl_format,
- 0 /* The astc5x5 WA isn't needed */);
- /* We do format workarounds for some depth formats so we can't reliably
- * sample with HiZ. One of these days, we should fix that.
- */
- if (src_aux_usage == ISL_AUX_USAGE_HIZ && src_mt->format != src_format)
- src_aux_usage = ISL_AUX_USAGE_NONE;
- const bool src_clear_supported =
- src_aux_usage != ISL_AUX_USAGE_NONE && src_mt->format == src_format;
- brw_miptree_prepare_access(brw, src_mt, src_level, 1, src_layer, 1,
- src_aux_usage, src_clear_supported);
-
- enum isl_format dst_isl_format =
- brw_blorp_to_isl_format(brw, dst_format, true);
- enum isl_aux_usage dst_aux_usage =
- brw_miptree_render_aux_usage(brw, dst_mt, dst_isl_format, false, false);
- const bool dst_clear_supported = dst_aux_usage != ISL_AUX_USAGE_NONE;
- brw_miptree_prepare_access(brw, dst_mt, dst_level, 1, dst_layer, 1,
- dst_aux_usage, dst_clear_supported);
-
- struct blorp_surf src_surf, dst_surf;
- blorp_surf_for_miptree(brw, &src_surf, src_mt, src_aux_usage, false,
- &src_level, src_layer, 1);
- blorp_surf_for_miptree(brw, &dst_surf, dst_mt, dst_aux_usage, true,
- &dst_level, dst_layer, 1);
-
- struct isl_swizzle src_isl_swizzle = {
- .r = swizzle_to_scs(GET_SWZ(src_swizzle, 0)),
- .g = swizzle_to_scs(GET_SWZ(src_swizzle, 1)),
- .b = swizzle_to_scs(GET_SWZ(src_swizzle, 2)),
- .a = swizzle_to_scs(GET_SWZ(src_swizzle, 3)),
- };
-
- struct blorp_batch batch;
- blorp_batch_init(&brw->blorp, &batch, brw, 0);
- blorp_blit(&batch, &src_surf, src_level, src_layer,
- src_isl_format, src_isl_swizzle,
- &dst_surf, dst_level, dst_layer,
- dst_isl_format, ISL_SWIZZLE_IDENTITY,
- src_x0, src_y0, src_x1, src_y1,
- dst_x0, dst_y0, dst_x1, dst_y1,
- blorp_filter, mirror_x, mirror_y);
- blorp_batch_finish(&batch);
-
- brw_miptree_finish_write(brw, dst_mt, dst_level, dst_layer, 1,
- dst_aux_usage);
-}
-
-void
-brw_blorp_copy_miptrees(struct brw_context *brw,
- struct brw_mipmap_tree *src_mt,
- unsigned src_level, unsigned src_layer,
- struct brw_mipmap_tree *dst_mt,
- unsigned dst_level, unsigned dst_layer,
- unsigned src_x, unsigned src_y,
- unsigned dst_x, unsigned dst_y,
- unsigned src_width, unsigned src_height)
-{
- DBG("%s from %dx %s mt %p %d %d (%d,%d) %dx%d"
- "to %dx %s mt %p %d %d (%d,%d)\n",
- __func__,
- src_mt->surf.samples, _mesa_get_format_name(src_mt->format), src_mt,
- src_level, src_layer, src_x, src_y, src_width, src_height,
- dst_mt->surf.samples, _mesa_get_format_name(dst_mt->format), dst_mt,
- dst_level, dst_layer, dst_x, dst_y);
-
- enum isl_aux_usage src_aux_usage, dst_aux_usage;
- bool src_clear_supported, dst_clear_supported;
-
- switch (src_mt->aux_usage) {
- case ISL_AUX_USAGE_HIZ:
- if (brw_miptree_sample_with_hiz(brw, src_mt)) {
- src_aux_usage = src_mt->aux_usage;
- src_clear_supported = true;
- } else {
- src_aux_usage = ISL_AUX_USAGE_NONE;
- src_clear_supported = false;
- }
- break;
- case ISL_AUX_USAGE_MCS:
- case ISL_AUX_USAGE_CCS_E:
- src_aux_usage = src_mt->aux_usage;
- src_clear_supported = false;
- break;
- default:
- src_aux_usage = ISL_AUX_USAGE_NONE;
- src_clear_supported = false;
- break;
- }
-
- switch (dst_mt->aux_usage) {
- case ISL_AUX_USAGE_MCS:
- case ISL_AUX_USAGE_CCS_E:
- dst_aux_usage = dst_mt->aux_usage;
- dst_clear_supported = false;
- break;
- default:
- dst_aux_usage = ISL_AUX_USAGE_NONE;
- dst_clear_supported = false;
- break;
- }
-
- brw_miptree_prepare_access(brw, src_mt, src_level, 1, src_layer, 1,
- src_aux_usage, src_clear_supported);
- brw_miptree_prepare_access(brw, dst_mt, dst_level, 1, dst_layer, 1,
- dst_aux_usage, dst_clear_supported);
-
- struct blorp_surf src_surf, dst_surf;
- blorp_surf_for_miptree(brw, &src_surf, src_mt, src_aux_usage, false,
- &src_level, src_layer, 1);
- blorp_surf_for_miptree(brw, &dst_surf, dst_mt, dst_aux_usage, true,
- &dst_level, dst_layer, 1);
-
- /* The hardware seems to have issues with having a two different format
- * views of the same texture in the sampler cache at the same time. It's
- * unclear exactly what the issue is but it hurts glCopyImageSubData
- * particularly badly because it does a lot of format reinterprets. We
- * badly need better understanding of the issue and a better fix but this
- * works for now and fixes CTS tests.
- *
- * TODO: Remove this hack!
- */
- brw_emit_pipe_control_flush(brw, PIPE_CONTROL_CS_STALL |
- PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
-
- struct blorp_batch batch;
- blorp_batch_init(&brw->blorp, &batch, brw, 0);
- blorp_copy(&batch, &src_surf, src_level, src_layer,
- &dst_surf, dst_level, dst_layer,
- src_x, src_y, dst_x, dst_y, src_width, src_height);
- blorp_batch_finish(&batch);
-
- brw_emit_pipe_control_flush(brw, PIPE_CONTROL_CS_STALL |
- PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
-
- brw_miptree_finish_write(brw, dst_mt, dst_level, dst_layer, 1,
- dst_aux_usage);
-}
-
-void
-brw_blorp_copy_buffers(struct brw_context *brw,
- struct brw_bo *src_bo,
- unsigned src_offset,
- struct brw_bo *dst_bo,
- unsigned dst_offset,
- unsigned size)
-{
- DBG("%s %d bytes from %p[%d] to %p[%d]",
- __func__, size, src_bo, src_offset, dst_bo, dst_offset);
-
- struct blorp_batch batch;
- struct blorp_address src = {
- .buffer = src_bo, .offset = src_offset,
- .mocs = brw_mocs(&brw->isl_dev, src_bo),
- };
- struct blorp_address dst = {
- .buffer = dst_bo, .offset = dst_offset,
- .mocs = brw_mocs(&brw->isl_dev, dst_bo),
- };
-
- blorp_batch_init(&brw->blorp, &batch, brw, 0);
- blorp_buffer_copy(&batch, src, dst, size);
- blorp_batch_finish(&batch);
-}
-
-
-static struct brw_mipmap_tree *
-find_miptree(GLbitfield buffer_bit, struct brw_renderbuffer *irb)
-{
- struct brw_mipmap_tree *mt = irb->mt;
- if (buffer_bit == GL_STENCIL_BUFFER_BIT && mt->stencil_mt)
- mt = mt->stencil_mt;
- return mt;
-}
-
-static int
-blorp_get_texture_swizzle(const struct brw_renderbuffer *irb)
-{
- return irb->Base.Base._BaseFormat == GL_RGB ?
- MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE) :
- SWIZZLE_XYZW;
-}
-
-static void
-do_blorp_blit(struct brw_context *brw, GLbitfield buffer_bit,
- struct brw_renderbuffer *src_irb, mesa_format src_format,
- struct brw_renderbuffer *dst_irb, mesa_format dst_format,
- GLfloat srcX0, GLfloat srcY0, GLfloat srcX1, GLfloat srcY1,
- GLfloat dstX0, GLfloat dstY0, GLfloat dstX1, GLfloat dstY1,
- GLenum filter, bool mirror_x, bool mirror_y)
-{
- const struct gl_context *ctx = &brw->ctx;
-
- /* Find source/dst miptrees */
- struct brw_mipmap_tree *src_mt = find_miptree(buffer_bit, src_irb);
- struct brw_mipmap_tree *dst_mt = find_miptree(buffer_bit, dst_irb);
-
- const bool do_srgb = ctx->Color.sRGBEnabled;
-
- /* Do the blit */
- brw_blorp_blit_miptrees(brw,
- src_mt, src_irb->mt_level, src_irb->mt_layer,
- src_format, blorp_get_texture_swizzle(src_irb),
- dst_mt, dst_irb->mt_level, dst_irb->mt_layer,
- dst_format,
- srcX0, srcY0, srcX1, srcY1,
- dstX0, dstY0, dstX1, dstY1,
- filter, mirror_x, mirror_y,
- do_srgb, do_srgb);
-
- dst_irb->need_downsample = true;
-}
-
-static bool
-try_blorp_blit(struct brw_context *brw,
- const struct gl_framebuffer *read_fb,
- const struct gl_framebuffer *draw_fb,
- GLfloat srcX0, GLfloat srcY0, GLfloat srcX1, GLfloat srcY1,
- GLfloat dstX0, GLfloat dstY0, GLfloat dstX1, GLfloat dstY1,
- GLenum filter, GLbitfield buffer_bit)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- struct gl_context *ctx = &brw->ctx;
-
- /* Sync up the state of window system buffers. We need to do this before
- * we go looking for the buffers.
- */
- brw_prepare_render(brw);
-
- bool mirror_x, mirror_y;
- if (brw_meta_mirror_clip_and_scissor(ctx, read_fb, draw_fb,
- &srcX0, &srcY0, &srcX1, &srcY1,
- &dstX0, &dstY0, &dstX1, &dstY1,
- &mirror_x, &mirror_y))
- return true;
-
- /* Find buffers */
- struct brw_renderbuffer *src_irb;
- struct brw_renderbuffer *dst_irb;
- struct brw_mipmap_tree *src_mt;
- struct brw_mipmap_tree *dst_mt;
- switch (buffer_bit) {
- case GL_COLOR_BUFFER_BIT:
- src_irb = brw_renderbuffer(read_fb->_ColorReadBuffer);
- for (unsigned i = 0; i < draw_fb->_NumColorDrawBuffers; ++i) {
- dst_irb = brw_renderbuffer(draw_fb->_ColorDrawBuffers[i]);
- if (dst_irb)
- do_blorp_blit(brw, buffer_bit,
- src_irb, src_irb->Base.Base.Format,
- dst_irb, dst_irb->Base.Base.Format,
- srcX0, srcY0, srcX1, srcY1,
- dstX0, dstY0, dstX1, dstY1,
- filter, mirror_x, mirror_y);
- }
- break;
- case GL_DEPTH_BUFFER_BIT:
- src_irb =
- brw_renderbuffer(read_fb->Attachment[BUFFER_DEPTH].Renderbuffer);
- dst_irb =
- brw_renderbuffer(draw_fb->Attachment[BUFFER_DEPTH].Renderbuffer);
- src_mt = find_miptree(buffer_bit, src_irb);
- dst_mt = find_miptree(buffer_bit, dst_irb);
-
- /* We also can't handle any combined depth-stencil formats because we
- * have to reinterpret as a color format.
- */
- if (_mesa_get_format_base_format(src_mt->format) == GL_DEPTH_STENCIL ||
- _mesa_get_format_base_format(dst_mt->format) == GL_DEPTH_STENCIL)
- return false;
-
- do_blorp_blit(brw, buffer_bit, src_irb, MESA_FORMAT_NONE,
- dst_irb, MESA_FORMAT_NONE, srcX0, srcY0,
- srcX1, srcY1, dstX0, dstY0, dstX1, dstY1,
- filter, mirror_x, mirror_y);
- break;
- case GL_STENCIL_BUFFER_BIT:
- /* Blorp doesn't support combined depth stencil which is all we have
- * prior to gfx6.
- */
- if (devinfo->ver < 6)
- return false;
-
- src_irb =
- brw_renderbuffer(read_fb->Attachment[BUFFER_STENCIL].Renderbuffer);
- dst_irb =
- brw_renderbuffer(draw_fb->Attachment[BUFFER_STENCIL].Renderbuffer);
- do_blorp_blit(brw, buffer_bit, src_irb, MESA_FORMAT_NONE,
- dst_irb, MESA_FORMAT_NONE, srcX0, srcY0,
- srcX1, srcY1, dstX0, dstY0, dstX1, dstY1,
- filter, mirror_x, mirror_y);
- break;
- default:
- unreachable("not reached");
- }
-
- return true;
-}
-
-static void
-apply_y_flip(int *y0, int *y1, int height)
-{
- int tmp = height - *y0;
- *y0 = height - *y1;
- *y1 = tmp;
-}
-
-bool
-brw_blorp_copytexsubimage(struct brw_context *brw,
- struct gl_renderbuffer *src_rb,
- struct gl_texture_image *dst_image,
- int slice,
- int srcX0, int srcY0,
- int dstX0, int dstY0,
- int width, int height)
-{
- struct gl_context *ctx = &brw->ctx;
- struct brw_renderbuffer *src_irb = brw_renderbuffer(src_rb);
- struct brw_texture_image *intel_image = brw_texture_image(dst_image);
-
- /* No pixel transfer operations (zoom, bias, mapping), just a blit */
- if (brw->ctx._ImageTransferState)
- return false;
-
- /* Sync up the state of window system buffers. We need to do this before
- * we go looking at the src renderbuffer's miptree.
- */
- brw_prepare_render(brw);
-
- struct brw_mipmap_tree *src_mt = src_irb->mt;
- struct brw_mipmap_tree *dst_mt = intel_image->mt;
-
- /* We can't handle any combined depth-stencil formats because we have to
- * reinterpret as a color format.
- */
- if (_mesa_get_format_base_format(src_mt->format) == GL_DEPTH_STENCIL ||
- _mesa_get_format_base_format(dst_mt->format) == GL_DEPTH_STENCIL)
- return false;
-
- if (!brw_blorp_supports_dst_format(brw, dst_image->TexFormat))
- return false;
-
- /* Source clipping shouldn't be necessary, since copytexsubimage (in
- * src/mesa/main/teximage.c) calls _mesa_clip_copytexsubimage() which
- * takes care of it.
- *
- * Destination clipping shouldn't be necessary since the restrictions on
- * glCopyTexSubImage prevent the user from specifying a destination rectangle
- * that falls outside the bounds of the destination texture.
- * See error_check_subtexture_dimensions().
- */
-
- int srcY1 = srcY0 + height;
- int srcX1 = srcX0 + width;
- int dstX1 = dstX0 + width;
- int dstY1 = dstY0 + height;
-
- /* Account for the fact that in the system framebuffer, the origin is at
- * the lower left.
- */
- bool mirror_y = ctx->ReadBuffer->FlipY;
- if (mirror_y)
- apply_y_flip(&srcY0, &srcY1, src_rb->Height);
-
- /* Account for face selection and texture view MinLayer */
- int dst_slice = slice + dst_image->TexObject->Attrib.MinLayer + dst_image->Face;
- int dst_level = dst_image->Level + dst_image->TexObject->Attrib.MinLevel;
-
- brw_blorp_blit_miptrees(brw,
- src_mt, src_irb->mt_level, src_irb->mt_layer,
- src_rb->Format, blorp_get_texture_swizzle(src_irb),
- dst_mt, dst_level, dst_slice,
- dst_image->TexFormat,
- srcX0, srcY0, srcX1, srcY1,
- dstX0, dstY0, dstX1, dstY1,
- GL_NEAREST, false, mirror_y,
- false, false);
-
- /* If we're copying to a packed depth stencil texture and the source
- * framebuffer has separate stencil, we need to also copy the stencil data
- * over.
- */
- src_rb = ctx->ReadBuffer->Attachment[BUFFER_STENCIL].Renderbuffer;
- if (_mesa_get_format_bits(dst_image->TexFormat, GL_STENCIL_BITS) > 0 &&
- src_rb != NULL) {
- src_irb = brw_renderbuffer(src_rb);
- src_mt = src_irb->mt;
-
- if (src_mt->stencil_mt)
- src_mt = src_mt->stencil_mt;
- if (dst_mt->stencil_mt)
- dst_mt = dst_mt->stencil_mt;
-
- if (src_mt != dst_mt) {
- brw_blorp_blit_miptrees(brw,
- src_mt, src_irb->mt_level, src_irb->mt_layer,
- src_mt->format,
- blorp_get_texture_swizzle(src_irb),
- dst_mt, dst_level, dst_slice,
- dst_mt->format,
- srcX0, srcY0, srcX1, srcY1,
- dstX0, dstY0, dstX1, dstY1,
- GL_NEAREST, false, mirror_y,
- false, false);
- }
- }
-
- return true;
-}
-
-
-GLbitfield
-brw_blorp_framebuffer(struct brw_context *brw,
- struct gl_framebuffer *readFb,
- struct gl_framebuffer *drawFb,
- GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
- GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
- GLbitfield mask, GLenum filter)
-{
- static GLbitfield buffer_bits[] = {
- GL_COLOR_BUFFER_BIT,
- GL_DEPTH_BUFFER_BIT,
- GL_STENCIL_BUFFER_BIT,
- };
-
- for (unsigned int i = 0; i < ARRAY_SIZE(buffer_bits); ++i) {
- if ((mask & buffer_bits[i]) &&
- try_blorp_blit(brw, readFb, drawFb,
- srcX0, srcY0, srcX1, srcY1,
- dstX0, dstY0, dstX1, dstY1,
- filter, buffer_bits[i])) {
- mask &= ~buffer_bits[i];
- }
- }
-
- /* try_blorp_blit should always be successful for color blits. */
- assert(!(mask & GL_COLOR_BUFFER_BIT));
- return mask;
-}
-
-static struct brw_bo *
-blorp_get_client_bo(struct brw_context *brw,
- unsigned w, unsigned h, unsigned d,
- GLenum target, GLenum format, GLenum type,
- const void *pixels,
- const struct gl_pixelstore_attrib *packing,
- uint32_t *offset_out, uint32_t *row_stride_out,
- uint32_t *image_stride_out, bool read_only)
-{
- /* Account for SKIP_PIXELS, SKIP_ROWS, ALIGNMENT, and SKIP_IMAGES */
- const GLuint dims = _mesa_get_texture_dimensions(target);
- const uint32_t first_pixel = _mesa_image_offset(dims, packing, w, h,
- format, type, 0, 0, 0);
- const uint32_t last_pixel = _mesa_image_offset(dims, packing, w, h,
- format, type,
- d - 1, h - 1, w);
- const uint32_t stride = _mesa_image_row_stride(packing, w, format, type);
- const uint32_t size = last_pixel - first_pixel;
-
- *row_stride_out = stride;
- *image_stride_out = _mesa_image_image_stride(packing, w, h, format, type);
-
- if (packing->BufferObj) {
- const uint32_t offset = first_pixel + (intptr_t)pixels;
-
- if (!read_only) {
- const int32_t cpp = _mesa_bytes_per_pixel(format, type);
- assert(cpp > 0);
-
- if ((offset % cpp) || (stride % cpp)) {
- perf_debug("Bad PBO alignment; fallback to CPU mapping\n");
- return NULL;
- }
- }
-
- /* This is a user-provided PBO. We just need to get the BO out */
- struct brw_buffer_object *intel_pbo =
- brw_buffer_object(packing->BufferObj);
- struct brw_bo *bo =
- brw_bufferobj_buffer(brw, intel_pbo, offset, size, !read_only);
-
- /* We take a reference to the BO so that the caller can just always
- * unref without having to worry about whether it's a user PBO or one
- * we created.
- */
- brw_bo_reference(bo);
-
- *offset_out = offset;
- return bo;
- } else {
- /* Someone should have already checked that there is data to upload. */
- assert(pixels);
-
- /* Creating a temp buffer currently only works for upload */
- assert(read_only);
-
- /* This is not a user-provided PBO. Instead, pixels is a pointer to CPU
- * data which we need to copy into a BO.
- */
- struct brw_bo *bo =
- brw_bo_alloc(brw->bufmgr, "tmp_tex_subimage_src", size,
- BRW_MEMZONE_OTHER);
- if (bo == NULL) {
- perf_debug("%s: temp bo creation failed: size = %u\n", __func__,
- size);
- return NULL;
- }
-
- if (brw_bo_subdata(bo, 0, size, pixels + first_pixel)) {
- perf_debug("%s: temp bo upload failed\n", __func__);
- brw_bo_unreference(bo);
- return NULL;
- }
-
- *offset_out = 0;
- return bo;
- }
-}
-
-/* Consider all the restrictions and determine the format of the source. */
-static mesa_format
-blorp_get_client_format(struct brw_context *brw,
- GLenum format, GLenum type,
- const struct gl_pixelstore_attrib *packing)
-{
- if (brw->ctx._ImageTransferState)
- return MESA_FORMAT_NONE;
-
- if (packing->SwapBytes || packing->LsbFirst || packing->Invert) {
- perf_debug("%s: unsupported gl_pixelstore_attrib\n", __func__);
- return MESA_FORMAT_NONE;
- }
-
- if (format != GL_RED &&
- format != GL_RG &&
- format != GL_RGB &&
- format != GL_BGR &&
- format != GL_RGBA &&
- format != GL_BGRA &&
- format != GL_ALPHA &&
- format != GL_RED_INTEGER &&
- format != GL_RG_INTEGER &&
- format != GL_RGB_INTEGER &&
- format != GL_BGR_INTEGER &&
- format != GL_RGBA_INTEGER &&
- format != GL_BGRA_INTEGER) {
- perf_debug("%s: %s not supported", __func__,
- _mesa_enum_to_string(format));
- return MESA_FORMAT_NONE;
- }
-
- return _mesa_tex_format_from_format_and_type(&brw->ctx, format, type);
-}
-
-bool
-brw_blorp_upload_miptree(struct brw_context *brw,
- struct brw_mipmap_tree *dst_mt,
- mesa_format dst_format,
- uint32_t level, uint32_t x, uint32_t y, uint32_t z,
- uint32_t width, uint32_t height, uint32_t depth,
- GLenum target, GLenum format, GLenum type,
- const void *pixels,
- const struct gl_pixelstore_attrib *packing)
-{
- const mesa_format src_format =
- blorp_get_client_format(brw, format, type, packing);
- if (src_format == MESA_FORMAT_NONE)
- return false;
-
- if (!brw->mesa_format_supports_render[dst_format]) {
- perf_debug("%s: can't use %s as render target\n", __func__,
- _mesa_get_format_name(dst_format));
- return false;
- }
-
- uint32_t src_offset, src_row_stride, src_image_stride;
- struct brw_bo *src_bo =
- blorp_get_client_bo(brw, width, height, depth,
- target, format, type, pixels, packing,
- &src_offset, &src_row_stride,
- &src_image_stride, true);
- if (src_bo == NULL)
- return false;
-
- /* Now that source is offset to correct starting point, adjust the
- * given dimensions to treat 1D arrays as 2D.
- */
- if (target == GL_TEXTURE_1D_ARRAY) {
- assert(depth == 1);
- assert(z == 0);
- depth = height;
- height = 1;
- z = y;
- y = 0;
- src_image_stride = src_row_stride;
- }
-
- brw_miptree_check_level_layer(dst_mt, level, z + depth - 1);
-
- bool result = false;
-
- /* Blit slice-by-slice creating a single-slice miptree for each layer. Even
- * in case of linear buffers hardware wants image arrays to be aligned by
- * four rows. This way hardware only gets one image at a time and any
- * source alignment will do.
- */
- for (unsigned i = 0; i < depth; ++i) {
- struct brw_mipmap_tree *src_mt =
- brw_miptree_create_for_bo(brw, src_bo, src_format,
- src_offset + i * src_image_stride,
- width, height, 1,
- src_row_stride,
- ISL_TILING_LINEAR, 0);
-
- if (!src_mt) {
- perf_debug("%s: miptree creation for src failed\n", __func__);
- goto err;
- }
-
- /* In case exact match is needed, copy using equivalent UINT formats
- * preventing hardware from changing presentation for SNORM -1.
- */
- if (src_mt->format == dst_format) {
- brw_blorp_copy_miptrees(brw, src_mt, 0, 0,
- dst_mt, level, z + i,
- 0, 0, x, y, width, height);
- } else {
- brw_blorp_blit_miptrees(brw, src_mt, 0, 0,
- src_format, SWIZZLE_XYZW,
- dst_mt, level, z + i,
- dst_format,
- 0, 0, width, height,
- x, y, x + width, y + height,
- GL_NEAREST, false, false, false, false);
- }
-
- brw_miptree_release(&src_mt);
- }
-
- result = true;
-
-err:
- brw_bo_unreference(src_bo);
-
- return result;
-}
-
-bool
-brw_blorp_download_miptree(struct brw_context *brw,
- struct brw_mipmap_tree *src_mt,
- mesa_format src_format, uint32_t src_swizzle,
- uint32_t level, uint32_t x, uint32_t y, uint32_t z,
- uint32_t width, uint32_t height, uint32_t depth,
- GLenum target, GLenum format, GLenum type,
- bool y_flip, const void *pixels,
- const struct gl_pixelstore_attrib *packing)
-{
- const mesa_format dst_format =
- blorp_get_client_format(brw, format, type, packing);
- if (dst_format == MESA_FORMAT_NONE)
- return false;
-
- if (!brw->mesa_format_supports_render[dst_format]) {
- perf_debug("%s: can't use %s as render target\n", __func__,
- _mesa_get_format_name(dst_format));
- return false;
- }
-
- /* We can't fetch from LUMINANCE or intensity as that would require a
- * non-trivial swizzle.
- */
- switch (_mesa_get_format_base_format(src_format)) {
- case GL_LUMINANCE:
- case GL_LUMINANCE_ALPHA:
- case GL_INTENSITY:
- return false;
- default:
- break;
- }
-
- /* This pass only works for PBOs */
- assert(packing->BufferObj);
-
- uint32_t dst_offset, dst_row_stride, dst_image_stride;
- struct brw_bo *dst_bo =
- blorp_get_client_bo(brw, width, height, depth,
- target, format, type, pixels, packing,
- &dst_offset, &dst_row_stride,
- &dst_image_stride, false);
- if (dst_bo == NULL)
- return false;
-
- /* Now that source is offset to correct starting point, adjust the
- * given dimensions to treat 1D arrays as 2D.
- */
- if (target == GL_TEXTURE_1D_ARRAY) {
- assert(depth == 1);
- assert(z == 0);
- depth = height;
- height = 1;
- z = y;
- y = 0;
- dst_image_stride = dst_row_stride;
- }
-
- brw_miptree_check_level_layer(src_mt, level, z + depth - 1);
-
- int y0 = y;
- int y1 = y + height;
- if (y_flip) {
- apply_y_flip(&y0, &y1, minify(src_mt->surf.phys_level0_sa.height,
- level - src_mt->first_level));
- }
-
- bool result = false;
-
- /* Blit slice-by-slice creating a single-slice miptree for each layer. Even
- * in case of linear buffers hardware wants image arrays to be aligned by
- * four rows. This way hardware only gets one image at a time and any
- * source alignment will do.
- */
- for (unsigned i = 0; i < depth; ++i) {
- struct brw_mipmap_tree *dst_mt =
- brw_miptree_create_for_bo(brw, dst_bo, dst_format,
- dst_offset + i * dst_image_stride,
- width, height, 1,
- dst_row_stride,
- ISL_TILING_LINEAR, 0);
-
- if (!dst_mt) {
- perf_debug("%s: miptree creation for src failed\n", __func__);
- goto err;
- }
-
- /* In case exact match is needed, copy using equivalent UINT formats
- * preventing hardware from changing presentation for SNORM -1.
- */
- if (dst_mt->format == src_format && !y_flip &&
- src_swizzle == SWIZZLE_XYZW) {
- brw_blorp_copy_miptrees(brw, src_mt, level, z + i,
- dst_mt, 0, 0,
- x, y, 0, 0, width, height);
- } else {
- brw_blorp_blit_miptrees(brw, src_mt, level, z + i,
- src_format, src_swizzle,
- dst_mt, 0, 0, dst_format,
- x, y0, x + width, y1,
- 0, 0, width, height,
- GL_NEAREST, false, y_flip, false, false);
- }
-
- brw_miptree_release(&dst_mt);
- }
-
- result = true;
-
- /* As we implement PBO transfers by binding the user-provided BO as a
- * fake framebuffer and rendering to it. This breaks the invariant of the
- * GL that nothing is able to render to a BO, causing nondeterministic
- * corruption issues because the render cache is not coherent with a
- * number of other caches that the BO could potentially be bound to
- * afterwards.
- *
- * This could be solved in the same way that we guarantee texture
- * coherency after a texture is attached to a framebuffer and
- * rendered to, but that would involve checking *all* BOs bound to
- * the pipeline for the case we need to emit a cache flush due to
- * previous rendering to any of them -- Including vertex, index,
- * uniform, atomic counter, shader image, transform feedback,
- * indirect draw buffers, etc.
- *
- * That would increase the per-draw call overhead even though it's
- * very unlikely that any of the BOs bound to the pipeline has been
- * rendered to via a PBO at any point, so it seems better to just
- * flush here unconditionally.
- */
- brw_emit_mi_flush(brw);
-
-err:
- brw_bo_unreference(dst_bo);
-
- return result;
-}
-
-static bool
-set_write_disables(const struct brw_renderbuffer *irb,
- const unsigned color_mask, uint8_t *color_write_disable)
-{
- /* Format information in the renderbuffer represents the requirements
- * given by the client. There are cases where the backing miptree uses,
- * for example, RGBA to represent RGBX. Since the client is only expecting
- * RGB we can treat alpha as not used and write whatever we like into it.
- */
- const GLenum base_format = irb->Base.Base._BaseFormat;
- const int components = _mesa_components_in_format(base_format);
- assert(components > 0);
- *color_write_disable = ~color_mask & BITFIELD_MASK(components);
- return *color_write_disable;
-}
-
-static void
-do_single_blorp_clear(struct brw_context *brw, struct gl_framebuffer *fb,
- struct gl_renderbuffer *rb, unsigned buf,
- bool partial_clear, bool encode_srgb)
-{
- struct gl_context *ctx = &brw->ctx;
- struct brw_renderbuffer *irb = brw_renderbuffer(rb);
- uint32_t x0, x1, y0, y1;
-
- mesa_format format = irb->Base.Base.Format;
- if (!encode_srgb)
- format = _mesa_get_srgb_format_linear(format);
- enum isl_format isl_format = brw->mesa_to_isl_render_format[format];
-
- x0 = fb->_Xmin;
- x1 = fb->_Xmax;
- if (fb->FlipY) {
- y0 = rb->Height - fb->_Ymax;
- y1 = rb->Height - fb->_Ymin;
- } else {
- y0 = fb->_Ymin;
- y1 = fb->_Ymax;
- }
-
- /* If the clear region is empty, just return. */
- if (x0 == x1 || y0 == y1)
- return;
-
- bool can_fast_clear = !partial_clear;
-
- if (INTEL_DEBUG(DEBUG_NO_FAST_CLEAR))
- can_fast_clear = false;
-
- uint8_t color_write_disable = 0;
- if (set_write_disables(irb, GET_COLORMASK(ctx->Color.ColorMask, buf),
- &color_write_disable))
- can_fast_clear = false;
-
- /* We store clear colors as floats or uints as needed. If there are
- * texture views in play, the formats will not properly be respected
- * during resolves because the resolve operations only know about the
- * miptree and not the renderbuffer.
- */
- if (irb->Base.Base.Format != irb->mt->format)
- can_fast_clear = false;
-
- if (!irb->mt->supports_fast_clear ||
- !brw_is_color_fast_clear_compatible(brw, irb->mt, &ctx->Color.ClearColor))
- can_fast_clear = false;
-
- /* Surface state can only record one fast clear color value. Therefore
- * unless different levels/layers agree on the color it can be used to
- * represent only single level/layer. Here it will be reserved for the
- * first slice (level 0, layer 0).
- */
- if (irb->layer_count > 1 || irb->mt_level || irb->mt_layer)
- can_fast_clear = false;
-
- unsigned level = irb->mt_level;
- const unsigned num_layers = fb->MaxNumLayers ? irb->layer_count : 1;
-
- /* If the MCS buffer hasn't been allocated yet, we need to allocate it now.
- */
- if (can_fast_clear && !irb->mt->aux_buf) {
- assert(irb->mt->aux_usage == ISL_AUX_USAGE_CCS_D);
- if (!brw_miptree_alloc_aux(brw, irb->mt)) {
- /* We're out of memory. Fall back to a non-fast clear. */
- can_fast_clear = false;
- }
- }
-
- if (can_fast_clear) {
- const enum isl_aux_state aux_state =
- brw_miptree_get_aux_state(irb->mt, irb->mt_level, irb->mt_layer);
- union isl_color_value clear_color =
- brw_meta_convert_fast_clear_color(brw, irb->mt,
- &ctx->Color.ClearColor);
-
- /* If the buffer is already in ISL_AUX_STATE_CLEAR and the clear color
- * hasn't changed, the clear is redundant and can be skipped.
- */
- if (!brw_miptree_set_clear_color(brw, irb->mt, clear_color) &&
- aux_state == ISL_AUX_STATE_CLEAR) {
- return;
- }
-
- DBG("%s (fast) to mt %p level %d layers %d+%d\n", __FUNCTION__,
- irb->mt, irb->mt_level, irb->mt_layer, num_layers);
-
- /* We can't setup the blorp_surf until we've allocated the MCS above */
- struct blorp_surf surf;
- blorp_surf_for_miptree(brw, &surf, irb->mt, irb->mt->aux_usage, true,
- &level, irb->mt_layer, num_layers);
-
- /* Ivybrigde PRM Vol 2, Part 1, "11.7 MCS Buffer for Render Target(s)":
- *
- * "Any transition from any value in {Clear, Render, Resolve} to a
- * different value in {Clear, Render, Resolve} requires end of pipe
- * synchronization."
- *
- * In other words, fast clear ops are not properly synchronized with
- * other drawing. We need to use a PIPE_CONTROL to ensure that the
- * contents of the previous draw hit the render target before we resolve
- * and again afterwards to ensure that the resolve is complete before we
- * do any more regular drawing.
- */
- brw_emit_end_of_pipe_sync(brw, PIPE_CONTROL_RENDER_TARGET_FLUSH);
-
- struct blorp_batch batch;
- blorp_batch_init(&brw->blorp, &batch, brw, 0);
- blorp_fast_clear(&batch, &surf, isl_format_srgb_to_linear(isl_format),
- ISL_SWIZZLE_IDENTITY,
- level, irb->mt_layer, num_layers, x0, y0, x1, y1);
- blorp_batch_finish(&batch);
-
- brw_emit_end_of_pipe_sync(brw, PIPE_CONTROL_RENDER_TARGET_FLUSH);
-
- /* Now that the fast clear has occurred, put the buffer in
- * INTEL_FAST_CLEAR_STATE_CLEAR so that we won't waste time doing
- * redundant clears.
- */
- brw_miptree_set_aux_state(brw, irb->mt, irb->mt_level,
- irb->mt_layer, num_layers,
- ISL_AUX_STATE_CLEAR);
- } else {
- DBG("%s (slow) to mt %p level %d layer %d+%d\n", __FUNCTION__,
- irb->mt, irb->mt_level, irb->mt_layer, num_layers);
-
- enum isl_aux_usage aux_usage =
- brw_miptree_render_aux_usage(brw, irb->mt, isl_format, false, false);
- brw_miptree_prepare_render(brw, irb->mt, level, irb->mt_layer,
- num_layers, aux_usage);
-
- struct blorp_surf surf;
- blorp_surf_for_miptree(brw, &surf, irb->mt, aux_usage, true,
- &level, irb->mt_layer, num_layers);
-
- union isl_color_value clear_color;
- memcpy(clear_color.f32, ctx->Color.ClearColor.f, sizeof(float) * 4);
-
- struct blorp_batch batch;
- blorp_batch_init(&brw->blorp, &batch, brw, 0);
- blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY,
- level, irb->mt_layer, num_layers,
- x0, y0, x1, y1,
- clear_color, color_write_disable);
- blorp_batch_finish(&batch);
-
- brw_miptree_finish_render(brw, irb->mt, level, irb->mt_layer,
- num_layers, aux_usage);
- }
-
- return;
-}
-
-void
-brw_blorp_clear_color(struct brw_context *brw, struct gl_framebuffer *fb,
- GLbitfield mask, bool partial_clear, bool encode_srgb)
-{
- for (unsigned buf = 0; buf < fb->_NumColorDrawBuffers; buf++) {
- struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[buf];
- struct brw_renderbuffer *irb = brw_renderbuffer(rb);
-
- /* Only clear the buffers present in the provided mask */
- if (((1 << fb->_ColorDrawBufferIndexes[buf]) & mask) == 0)
- continue;
-
- /* If this is an ES2 context or GL_ARB_ES2_compatibility is supported,
- * the framebuffer can be complete with some attachments missing. In
- * this case the _ColorDrawBuffers pointer will be NULL.
- */
- if (rb == NULL)
- continue;
-
- do_single_blorp_clear(brw, fb, rb, buf, partial_clear, encode_srgb);
- irb->need_downsample = true;
- }
-
- return;
-}
-
-void
-brw_blorp_clear_depth_stencil(struct brw_context *brw,
- struct gl_framebuffer *fb,
- GLbitfield mask, bool partial_clear)
-{
- const struct gl_context *ctx = &brw->ctx;
- struct gl_renderbuffer *depth_rb =
- fb->Attachment[BUFFER_DEPTH].Renderbuffer;
- struct gl_renderbuffer *stencil_rb =
- fb->Attachment[BUFFER_STENCIL].Renderbuffer;
-
- if (!depth_rb || ctx->Depth.Mask == GL_FALSE)
- mask &= ~BUFFER_BIT_DEPTH;
-
- if (!stencil_rb || (ctx->Stencil.WriteMask[0] & 0xff) == 0)
- mask &= ~BUFFER_BIT_STENCIL;
-
- if (!(mask & (BUFFER_BITS_DEPTH_STENCIL)))
- return;
-
- uint32_t x0, x1, y0, y1, rb_height;
- if (depth_rb) {
- rb_height = depth_rb->Height;
- if (stencil_rb) {
- assert(depth_rb->Width == stencil_rb->Width);
- assert(depth_rb->Height == stencil_rb->Height);
- }
- } else {
- assert(stencil_rb);
- rb_height = stencil_rb->Height;
- }
-
- x0 = fb->_Xmin;
- x1 = fb->_Xmax;
- if (fb->FlipY) {
- y0 = rb_height - fb->_Ymax;
- y1 = rb_height - fb->_Ymin;
- } else {
- y0 = fb->_Ymin;
- y1 = fb->_Ymax;
- }
-
- /* If the clear region is empty, just return. */
- if (x0 == x1 || y0 == y1)
- return;
-
- uint32_t level = 0, start_layer = 0, num_layers;
- struct blorp_surf depth_surf, stencil_surf;
-
- struct brw_mipmap_tree *depth_mt = NULL;
- if (mask & BUFFER_BIT_DEPTH) {
- struct brw_renderbuffer *irb = brw_renderbuffer(depth_rb);
- depth_mt = find_miptree(GL_DEPTH_BUFFER_BIT, irb);
-
- level = irb->mt_level;
- start_layer = irb->mt_layer;
- num_layers = fb->MaxNumLayers ? irb->layer_count : 1;
-
- brw_miptree_prepare_depth(brw, depth_mt, level, start_layer, num_layers);
-
- unsigned depth_level = level;
- blorp_surf_for_miptree(brw, &depth_surf, depth_mt, depth_mt->aux_usage,
- true, &depth_level, start_layer, num_layers);
- assert(depth_level == level);
- }
-
- uint8_t stencil_mask = 0;
- struct brw_mipmap_tree *stencil_mt = NULL;
- if (mask & BUFFER_BIT_STENCIL) {
- struct brw_renderbuffer *irb = brw_renderbuffer(stencil_rb);
- stencil_mt = find_miptree(GL_STENCIL_BUFFER_BIT, irb);
-
- if (mask & BUFFER_BIT_DEPTH) {
- assert(level == irb->mt_level);
- assert(start_layer == irb->mt_layer);
- assert(num_layers == fb->MaxNumLayers ? irb->layer_count : 1);
- }
-
- level = irb->mt_level;
- start_layer = irb->mt_layer;
- num_layers = fb->MaxNumLayers ? irb->layer_count : 1;
-
- stencil_mask = ctx->Stencil.WriteMask[0] & 0xff;
-
- brw_miptree_prepare_access(brw, stencil_mt, level, 1,
- start_layer, num_layers,
- ISL_AUX_USAGE_NONE, false);
-
- unsigned stencil_level = level;
- blorp_surf_for_miptree(brw, &stencil_surf, stencil_mt,
- ISL_AUX_USAGE_NONE, true,
- &stencil_level, start_layer, num_layers);
- }
-
- assert((mask & BUFFER_BIT_DEPTH) || stencil_mask);
-
- struct blorp_batch batch;
- blorp_batch_init(&brw->blorp, &batch, brw, 0);
- blorp_clear_depth_stencil(&batch, &depth_surf, &stencil_surf,
- level, start_layer, num_layers,
- x0, y0, x1, y1,
- (mask & BUFFER_BIT_DEPTH), ctx->Depth.Clear,
- stencil_mask, ctx->Stencil.Clear);
- blorp_batch_finish(&batch);
-
- if (mask & BUFFER_BIT_DEPTH) {
- brw_miptree_finish_depth(brw, depth_mt, level,
- start_layer, num_layers, true);
- }
-
- if (stencil_mask) {
- brw_miptree_finish_write(brw, stencil_mt, level,
- start_layer, num_layers,
- ISL_AUX_USAGE_NONE);
- }
-}
-
-void
-brw_blorp_resolve_color(struct brw_context *brw, struct brw_mipmap_tree *mt,
- unsigned level, unsigned layer,
- enum isl_aux_op resolve_op)
-{
- DBG("%s to mt %p level %u layer %u\n", __FUNCTION__, mt, level, layer);
-
- const mesa_format format = _mesa_get_srgb_format_linear(mt->format);
-
- struct blorp_surf surf;
- blorp_surf_for_miptree(brw, &surf, mt, mt->aux_usage, true,
- &level, layer, 1 /* num_layers */);
-
- /* Ivybrigde PRM Vol 2, Part 1, "11.7 MCS Buffer for Render Target(s)":
- *
- * "Any transition from any value in {Clear, Render, Resolve} to a
- * different value in {Clear, Render, Resolve} requires end of pipe
- * synchronization."
- *
- * In other words, fast clear ops are not properly synchronized with
- * other drawing. We need to use a PIPE_CONTROL to ensure that the
- * contents of the previous draw hit the render target before we resolve
- * and again afterwards to ensure that the resolve is complete before we
- * do any more regular drawing.
- */
- brw_emit_end_of_pipe_sync(brw, PIPE_CONTROL_RENDER_TARGET_FLUSH);
-
-
- struct blorp_batch batch;
- blorp_batch_init(&brw->blorp, &batch, brw, 0);
- blorp_ccs_resolve(&batch, &surf, level, layer, 1,
- brw_blorp_to_isl_format(brw, format, true),
- resolve_op);
- blorp_batch_finish(&batch);
-
- /* See comment above */
- brw_emit_end_of_pipe_sync(brw, PIPE_CONTROL_RENDER_TARGET_FLUSH);
-}
-
-void
-brw_blorp_mcs_partial_resolve(struct brw_context *brw,
- struct brw_mipmap_tree *mt,
- uint32_t start_layer, uint32_t num_layers)
-{
- DBG("%s to mt %p layers %u-%u\n", __FUNCTION__, mt,
- start_layer, start_layer + num_layers - 1);
-
- assert(mt->aux_usage == ISL_AUX_USAGE_MCS);
-
- const mesa_format format = _mesa_get_srgb_format_linear(mt->format);
- enum isl_format isl_format = brw_blorp_to_isl_format(brw, format, true);
-
- struct blorp_surf surf;
- uint32_t level = 0;
- blorp_surf_for_miptree(brw, &surf, mt, ISL_AUX_USAGE_MCS, true,
- &level, start_layer, num_layers);
-
- struct blorp_batch batch;
- blorp_batch_init(&brw->blorp, &batch, brw, 0);
- blorp_mcs_partial_resolve(&batch, &surf, isl_format,
- start_layer, num_layers);
- blorp_batch_finish(&batch);
-}
-
-/**
- * Perform a HiZ or depth resolve operation.
- *
- * For an overview of HiZ ops, see the following sections of the Sandy Bridge
- * PRM, Volume 1, Part 2:
- * - 7.5.3.1 Depth Buffer Clear
- * - 7.5.3.2 Depth Buffer Resolve
- * - 7.5.3.3 Hierarchical Depth Buffer Resolve
- */
-void
-brw_hiz_exec(struct brw_context *brw, struct brw_mipmap_tree *mt,
- unsigned int level, unsigned int start_layer,
- unsigned int num_layers, enum isl_aux_op op)
-{
- assert(brw_miptree_level_has_hiz(mt, level));
- assert(op != ISL_AUX_OP_NONE);
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- const char *opname = NULL;
-
- switch (op) {
- case ISL_AUX_OP_FULL_RESOLVE:
- opname = "depth resolve";
- break;
- case ISL_AUX_OP_AMBIGUATE:
- opname = "hiz ambiguate";
- break;
- case ISL_AUX_OP_FAST_CLEAR:
- opname = "depth clear";
- break;
- case ISL_AUX_OP_PARTIAL_RESOLVE:
- case ISL_AUX_OP_NONE:
- unreachable("Invalid HiZ op");
- }
-
- DBG("%s %s to mt %p level %d layers %d-%d\n",
- __func__, opname, mt, level, start_layer, start_layer + num_layers - 1);
-
- /* The following stalls and flushes are only documented to be required for
- * HiZ clear operations. However, they also seem to be required for
- * resolve operations.
- */
- if (devinfo->ver == 6) {
- /* From the Sandy Bridge PRM, volume 2 part 1, page 313:
- *
- * "If other rendering operations have preceded this clear, a
- * PIPE_CONTROL with write cache flush enabled and Z-inhibit
- * disabled must be issued before the rectangle primitive used for
- * the depth buffer clear operation.
- */
- brw_emit_pipe_control_flush(brw,
- PIPE_CONTROL_RENDER_TARGET_FLUSH |
- PIPE_CONTROL_DEPTH_CACHE_FLUSH |
- PIPE_CONTROL_CS_STALL);
- } else if (devinfo->ver >= 7) {
- /*
- * From the Ivybridge PRM, volume 2, "Depth Buffer Clear":
- *
- * If other rendering operations have preceded this clear, a
- * PIPE_CONTROL with depth cache flush enabled, Depth Stall bit
- * enabled must be issued before the rectangle primitive used for
- * the depth buffer clear operation.
- *
- * Same applies for Gfx8 and Gfx9.
- *
- * In addition, from the Ivybridge PRM, volume 2, 1.10.4.1
- * PIPE_CONTROL, Depth Cache Flush Enable:
- *
- * This bit must not be set when Depth Stall Enable bit is set in
- * this packet.
- *
- * This is confirmed to hold for real, HSW gets immediate gpu hangs.
- *
- * Therefore issue two pipe control flushes, one for cache flush and
- * another for depth stall.
- */
- brw_emit_pipe_control_flush(brw,
- PIPE_CONTROL_DEPTH_CACHE_FLUSH |
- PIPE_CONTROL_CS_STALL);
-
- brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_STALL);
- }
-
- assert(mt->aux_usage == ISL_AUX_USAGE_HIZ && mt->aux_buf);
-
- struct blorp_surf surf;
- blorp_surf_for_miptree(brw, &surf, mt, ISL_AUX_USAGE_HIZ, true,
- &level, start_layer, num_layers);
-
- struct blorp_batch batch;
- blorp_batch_init(&brw->blorp, &batch, brw,
- BLORP_BATCH_NO_UPDATE_CLEAR_COLOR);
- blorp_hiz_op(&batch, &surf, level, start_layer, num_layers, op);
- blorp_batch_finish(&batch);
-
- /* The following stalls and flushes are only documented to be required for
- * HiZ clear operations. However, they also seem to be required for
- * resolve operations.
- */
- if (devinfo->ver == 6) {
- /* From the Sandy Bridge PRM, volume 2 part 1, page 314:
- *
- * "DevSNB, DevSNB-B{W/A}]: Depth buffer clear pass must be
- * followed by a PIPE_CONTROL command with DEPTH_STALL bit set
- * and Then followed by Depth FLUSH'
- */
- brw_emit_pipe_control_flush(brw,
- PIPE_CONTROL_DEPTH_STALL);
-
- brw_emit_pipe_control_flush(brw,
- PIPE_CONTROL_DEPTH_CACHE_FLUSH |
- PIPE_CONTROL_CS_STALL);
- } else if (devinfo->ver >= 8) {
- /*
- * From the Broadwell PRM, volume 7, "Depth Buffer Clear":
- *
- * "Depth buffer clear pass using any of the methods (WM_STATE,
- * 3DSTATE_WM or 3DSTATE_WM_HZ_OP) must be followed by a
- * PIPE_CONTROL command with DEPTH_STALL bit and Depth FLUSH bits
- * "set" before starting to render. DepthStall and DepthFlush are
- * not needed between consecutive depth clear passes nor is it
- * required if the depth clear pass was done with
- * 'full_surf_clear' bit set in the 3DSTATE_WM_HZ_OP."
- *
- * TODO: Such as the spec says, this could be conditional.
- */
- brw_emit_pipe_control_flush(brw,
- PIPE_CONTROL_DEPTH_CACHE_FLUSH |
- PIPE_CONTROL_DEPTH_STALL);
-
- }
-}
+++ /dev/null
-/*
- * Copyright © 2012 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#ifndef BRW_BLORP_H
-#define BRW_BLORP_H
-
-#include "blorp/blorp.h"
-#include "brw_mipmap_tree.h"
-#include "program/prog_instruction.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-void brw_blorp_init(struct brw_context *brw);
-
-void
-brw_blorp_blit_miptrees(struct brw_context *brw,
- struct brw_mipmap_tree *src_mt,
- unsigned src_level, unsigned src_layer,
- mesa_format src_format, int src_swizzle,
- struct brw_mipmap_tree *dst_mt,
- unsigned dst_level, unsigned dst_layer,
- mesa_format dst_format,
- float src_x0, float src_y0,
- float src_x1, float src_y1,
- float dst_x0, float dst_y0,
- float dst_x1, float dst_y1,
- GLenum filter, bool mirror_x, bool mirror_y,
- bool decode_srgb, bool encode_srgb);
-
-void
-brw_blorp_copy_miptrees(struct brw_context *brw,
- struct brw_mipmap_tree *src_mt,
- unsigned src_level, unsigned src_logical_layer,
- struct brw_mipmap_tree *dst_mt,
- unsigned dst_level, unsigned dst_logical_layer,
- unsigned src_x, unsigned src_y,
- unsigned dst_x, unsigned dst_y,
- unsigned src_width, unsigned src_height);
-
-void
-brw_blorp_copy_buffers(struct brw_context *brw,
- struct brw_bo *src_bo,
- unsigned src_offset,
- struct brw_bo *dst_bo,
- unsigned dst_offset,
- unsigned size);
-
-bool
-brw_blorp_upload_miptree(struct brw_context *brw,
- struct brw_mipmap_tree *dst_mt,
- mesa_format dst_format,
- uint32_t level, uint32_t x, uint32_t y, uint32_t z,
- uint32_t width, uint32_t height, uint32_t depth,
- GLenum target, GLenum format, GLenum type,
- const void *pixels,
- const struct gl_pixelstore_attrib *packing);
-
-bool
-brw_blorp_download_miptree(struct brw_context *brw,
- struct brw_mipmap_tree *src_mt,
- mesa_format src_format, uint32_t src_swizzle,
- uint32_t level, uint32_t x, uint32_t y, uint32_t z,
- uint32_t width, uint32_t height, uint32_t depth,
- GLenum target, GLenum format, GLenum type,
- bool y_flip, const void *pixels,
- const struct gl_pixelstore_attrib *packing);
-
-void
-brw_blorp_clear_color(struct brw_context *brw, struct gl_framebuffer *fb,
- GLbitfield mask, bool partial_clear, bool encode_srgb);
-void
-brw_blorp_clear_depth_stencil(struct brw_context *brw,
- struct gl_framebuffer *fb,
- GLbitfield mask, bool partial_clear);
-
-void
-brw_blorp_resolve_color(struct brw_context *brw,
- struct brw_mipmap_tree *mt,
- unsigned level, unsigned layer,
- enum isl_aux_op resolve_op);
-
-void
-brw_blorp_mcs_partial_resolve(struct brw_context *brw,
- struct brw_mipmap_tree *mt,
- uint32_t start_layer, uint32_t num_layers);
-
-void
-brw_hiz_exec(struct brw_context *brw, struct brw_mipmap_tree *mt,
- unsigned int level, unsigned int start_layer,
- unsigned int num_layers, enum isl_aux_op op);
-
-void gfx4_blorp_exec(struct blorp_batch *batch,
- const struct blorp_params *params);
-void gfx45_blorp_exec(struct blorp_batch *batch,
- const struct blorp_params *params);
-void gfx5_blorp_exec(struct blorp_batch *batch,
- const struct blorp_params *params);
-void gfx6_blorp_exec(struct blorp_batch *batch,
- const struct blorp_params *params);
-void gfx7_blorp_exec(struct blorp_batch *batch,
- const struct blorp_params *params);
-void gfx75_blorp_exec(struct blorp_batch *batch,
- const struct blorp_params *params);
-void gfx8_blorp_exec(struct blorp_batch *batch,
- const struct blorp_params *params);
-void gfx9_blorp_exec(struct blorp_batch *batch,
- const struct blorp_params *params);
-void gfx11_blorp_exec(struct blorp_batch *batch,
- const struct blorp_params *params);
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
-#endif /* BRW_BLORP_H */
+++ /dev/null
-/*
- * Copyright 2003 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * @file brw_buffer_objects.c
- *
- * This provides core GL buffer object functionality.
- */
-
-#include "main/mtypes.h"
-#include "main/macros.h"
-#include "main/streaming-load-memcpy.h"
-#include "main/bufferobj.h"
-#include "x86/common_x86_asm.h"
-#include "util/u_memory.h"
-
-#include "brw_context.h"
-#include "brw_blorp.h"
-#include "brw_buffer_objects.h"
-#include "brw_batch.h"
-
-static void
-mark_buffer_gpu_usage(struct brw_buffer_object *intel_obj,
- uint32_t offset, uint32_t size)
-{
- intel_obj->gpu_active_start = MIN2(intel_obj->gpu_active_start, offset);
- intel_obj->gpu_active_end = MAX2(intel_obj->gpu_active_end, offset + size);
-}
-
-static void
-mark_buffer_inactive(struct brw_buffer_object *intel_obj)
-{
- intel_obj->gpu_active_start = ~0;
- intel_obj->gpu_active_end = 0;
-}
-
-static void
-mark_buffer_valid_data(struct brw_buffer_object *intel_obj,
- uint32_t offset, uint32_t size)
-{
- intel_obj->valid_data_start = MIN2(intel_obj->valid_data_start, offset);
- intel_obj->valid_data_end = MAX2(intel_obj->valid_data_end, offset + size);
-}
-
-static void
-mark_buffer_invalid(struct brw_buffer_object *intel_obj)
-{
- intel_obj->valid_data_start = ~0;
- intel_obj->valid_data_end = 0;
-}
-
-/** Allocates a new brw_bo to store the data for the buffer object. */
-static void
-alloc_buffer_object(struct brw_context *brw,
- struct brw_buffer_object *intel_obj)
-{
- const struct gl_context *ctx = &brw->ctx;
-
- uint64_t size = intel_obj->Base.Size;
- if (ctx->Const.RobustAccess) {
- /* Pad out buffer objects with an extra 2kB (half a page).
- *
- * When pushing UBOs, we need to safeguard against 3DSTATE_CONSTANT_*
- * reading out of bounds memory. The application might bind a UBO that's
- * smaller than what the program expects. Ideally, we'd bind an extra
- * push buffer containing zeros, but we have a limited number of those,
- * so it's not always viable. Our only safe option is to pad all buffer
- * objects by the maximum push data length, so that it will never read
- * past the end of a BO.
- *
- * This is unfortunate, but it should result in at most 1 extra page,
- * which probably isn't too terrible.
- */
- size += 64 * 32; /* max read length of 64 256-bit units */
- }
- intel_obj->buffer =
- brw_bo_alloc(brw->bufmgr, "bufferobj", size, BRW_MEMZONE_OTHER);
-
- /* the buffer might be bound as a uniform buffer, need to update it
- */
- if (intel_obj->Base.UsageHistory & USAGE_UNIFORM_BUFFER)
- brw->ctx.NewDriverState |= BRW_NEW_UNIFORM_BUFFER;
- if (intel_obj->Base.UsageHistory & USAGE_SHADER_STORAGE_BUFFER)
- brw->ctx.NewDriverState |= BRW_NEW_UNIFORM_BUFFER;
- if (intel_obj->Base.UsageHistory & USAGE_TEXTURE_BUFFER)
- brw->ctx.NewDriverState |= BRW_NEW_TEXTURE_BUFFER;
- if (intel_obj->Base.UsageHistory & USAGE_ATOMIC_COUNTER_BUFFER)
- brw->ctx.NewDriverState |= BRW_NEW_UNIFORM_BUFFER;
-
- mark_buffer_inactive(intel_obj);
- mark_buffer_invalid(intel_obj);
-}
-
-static void
-release_buffer(struct brw_buffer_object *intel_obj)
-{
- brw_bo_unreference(intel_obj->buffer);
- intel_obj->buffer = NULL;
-}
-
-/**
- * The NewBufferObject() driver hook.
- *
- * Allocates a new brw_buffer_object structure and initializes it.
- *
- * There is some duplication between mesa's bufferobjects and our
- * bufmgr buffers. Both have an integer handle and a hashtable to
- * lookup an opaque structure. It would be nice if the handles and
- * internal structure where somehow shared.
- */
-static struct gl_buffer_object *
-brw_new_buffer_object(struct gl_context * ctx, GLuint name)
-{
- struct brw_buffer_object *obj = CALLOC_STRUCT(brw_buffer_object);
- if (!obj) {
- _mesa_error_no_memory(__func__);
- return NULL;
- }
-
- _mesa_initialize_buffer_object(ctx, &obj->Base, name);
-
- obj->buffer = NULL;
-
- return &obj->Base;
-}
-
-/**
- * The DeleteBuffer() driver hook.
- *
- * Deletes a single OpenGL buffer object. Used by glDeleteBuffers().
- */
-static void
-brw_delete_buffer(struct gl_context * ctx, struct gl_buffer_object *obj)
-{
- struct brw_buffer_object *intel_obj = brw_buffer_object(obj);
-
- assert(intel_obj);
-
- /* Buffer objects are automatically unmapped when deleting according
- * to the spec, but Mesa doesn't do UnmapBuffer for us at context destroy
- * (though it does if you call glDeleteBuffers)
- */
- _mesa_buffer_unmap_all_mappings(ctx, obj);
-
- brw_bo_unreference(intel_obj->buffer);
- _mesa_delete_buffer_object(ctx, obj);
-}
-
-
-/**
- * The BufferData() driver hook.
- *
- * Implements glBufferData(), which recreates a buffer object's data store
- * and populates it with the given data, if present.
- *
- * Any data that was previously stored in the buffer object is lost.
- *
- * \return true for success, false if out of memory
- */
-static GLboolean
-brw_buffer_data(struct gl_context *ctx,
- GLenum target,
- GLsizeiptrARB size,
- const GLvoid *data,
- GLenum usage,
- GLbitfield storageFlags,
- struct gl_buffer_object *obj)
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_buffer_object *intel_obj = brw_buffer_object(obj);
-
- /* Part of the ABI, but this function doesn't use it.
- */
- (void) target;
-
- intel_obj->Base.Size = size;
- intel_obj->Base.Usage = usage;
- intel_obj->Base.StorageFlags = storageFlags;
-
- assert(!obj->Mappings[MAP_USER].Pointer); /* Mesa should have unmapped it */
- assert(!obj->Mappings[MAP_INTERNAL].Pointer);
-
- if (intel_obj->buffer != NULL)
- release_buffer(intel_obj);
-
- if (size != 0) {
- alloc_buffer_object(brw, intel_obj);
- if (!intel_obj->buffer)
- return false;
-
- if (data != NULL) {
- brw_bo_subdata(intel_obj->buffer, 0, size, data);
- mark_buffer_valid_data(intel_obj, 0, size);
- }
- }
-
- return true;
-}
-
-static GLboolean
-brw_buffer_data_mem(struct gl_context *ctx,
- GLenum target,
- GLsizeiptrARB size,
- struct gl_memory_object *memObj,
- GLuint64 offset,
- GLenum usage,
- struct gl_buffer_object *bufObj)
-{
- struct brw_buffer_object *intel_obj = brw_buffer_object(bufObj);
- struct brw_memory_object *intel_memObj = brw_memory_object(memObj);
-
- /* Part of the ABI, but this function doesn't use it.
- */
- (void) target;
-
- intel_obj->Base.Size = size;
- intel_obj->Base.Usage = usage;
- intel_obj->Base.StorageFlags = 0;
-
- assert(!bufObj->Mappings[MAP_USER].Pointer); /* Mesa should have unmapped it */
- assert(!bufObj->Mappings[MAP_INTERNAL].Pointer);
-
- if (intel_obj->buffer != NULL)
- release_buffer(intel_obj);
-
- if (size != 0) {
- intel_obj->buffer = intel_memObj->bo;
- mark_buffer_valid_data(intel_obj, offset, size);
- }
-
- return true;
-}
-
-/**
- * The BufferSubData() driver hook.
- *
- * Implements glBufferSubData(), which replaces a portion of the data in a
- * buffer object.
- *
- * If the data range specified by (size + offset) extends beyond the end of
- * the buffer or if data is NULL, no copy is performed.
- */
-static void
-brw_buffer_subdata(struct gl_context *ctx,
- GLintptrARB offset,
- GLsizeiptrARB size,
- const GLvoid *data,
- struct gl_buffer_object *obj)
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_buffer_object *intel_obj = brw_buffer_object(obj);
- bool busy;
-
- if (size == 0)
- return;
-
- assert(intel_obj);
-
- /* See if we can unsynchronized write the data into the user's BO. This
- * avoids GPU stalls in unfortunately common user patterns (uploading
- * sequentially into a BO, with draw calls in between each upload).
- *
- * Once we've hit this path, we mark this GL BO as preferring stalling to
- * blits, so that we can hopefully hit this path again in the future
- * (otherwise, an app that might occasionally stall but mostly not will end
- * up with blitting all the time, at the cost of bandwidth)
- */
- if (offset + size <= intel_obj->gpu_active_start ||
- intel_obj->gpu_active_end <= offset ||
- offset + size <= intel_obj->valid_data_start ||
- intel_obj->valid_data_end <= offset) {
- void *map = brw_bo_map(brw, intel_obj->buffer, MAP_WRITE | MAP_ASYNC);
- memcpy(map + offset, data, size);
- brw_bo_unmap(intel_obj->buffer);
-
- if (intel_obj->gpu_active_end > intel_obj->gpu_active_start)
- intel_obj->prefer_stall_to_blit = true;
-
- mark_buffer_valid_data(intel_obj, offset, size);
- return;
- }
-
- busy =
- brw_bo_busy(intel_obj->buffer) ||
- brw_batch_references(&brw->batch, intel_obj->buffer);
-
- if (busy) {
- if (size == intel_obj->Base.Size ||
- (intel_obj->valid_data_start >= offset &&
- intel_obj->valid_data_end <= offset + size)) {
- /* Replace the current busy bo so the subdata doesn't stall. */
- brw_bo_unreference(intel_obj->buffer);
- alloc_buffer_object(brw, intel_obj);
- } else if (!intel_obj->prefer_stall_to_blit) {
- perf_debug("Using a blit copy to avoid stalling on "
- "glBufferSubData(%ld, %ld) (%ldkb) to a busy "
- "(%d-%d) / valid (%d-%d) buffer object.\n",
- (long)offset, (long)offset + size, (long)(size/1024),
- intel_obj->gpu_active_start,
- intel_obj->gpu_active_end,
- intel_obj->valid_data_start,
- intel_obj->valid_data_end);
- struct brw_bo *temp_bo =
- brw_bo_alloc(brw->bufmgr, "subdata temp", size, BRW_MEMZONE_OTHER);
-
- brw_bo_subdata(temp_bo, 0, size, data);
-
- brw_blorp_copy_buffers(brw,
- temp_bo, 0,
- intel_obj->buffer, offset,
- size);
- brw_emit_mi_flush(brw);
-
- brw_bo_unreference(temp_bo);
- mark_buffer_valid_data(intel_obj, offset, size);
- return;
- } else {
- perf_debug("Stalling on glBufferSubData(%ld, %ld) (%ldkb) to a busy "
- "(%d-%d) buffer object. Use glMapBufferRange() to "
- "avoid this.\n",
- (long)offset, (long)offset + size, (long)(size/1024),
- intel_obj->gpu_active_start,
- intel_obj->gpu_active_end);
- brw_batch_flush(brw);
- }
- }
-
- brw_bo_subdata(intel_obj->buffer, offset, size, data);
- mark_buffer_inactive(intel_obj);
- mark_buffer_valid_data(intel_obj, offset, size);
-}
-
-/* Typedef for memcpy function (used in brw_get_buffer_subdata below). */
-typedef void *(*mem_copy_fn)(void *dest, const void *src, size_t n);
-
-/**
- * The GetBufferSubData() driver hook.
- *
- * Implements glGetBufferSubData(), which copies a subrange of a buffer
- * object into user memory.
- */
-static void
-brw_get_buffer_subdata(struct gl_context *ctx,
- GLintptrARB offset,
- GLsizeiptrARB size,
- GLvoid *data,
- struct gl_buffer_object *obj)
-{
- struct brw_buffer_object *intel_obj = brw_buffer_object(obj);
- struct brw_context *brw = brw_context(ctx);
-
- assert(intel_obj);
- if (brw_batch_references(&brw->batch, intel_obj->buffer)) {
- brw_batch_flush(brw);
- }
-
- unsigned int map_flags = MAP_READ;
- mem_copy_fn memcpy_fn = memcpy;
-#ifdef USE_SSE41
- if (!intel_obj->buffer->cache_coherent && cpu_has_sse4_1) {
- /* Rather than acquire a new WB mmaping of the buffer object and pull
- * it into the CPU cache, keep using the WC mmap that we have for writes,
- * and use the magic movntd instructions instead.
- */
- map_flags |= MAP_COHERENT;
- memcpy_fn = (mem_copy_fn) _mesa_streaming_load_memcpy;
- }
-#endif
-
- void *map = brw_bo_map(brw, intel_obj->buffer, map_flags);
- if (unlikely(!map)) {
- _mesa_error_no_memory(__func__);
- return;
- }
- memcpy_fn(data, map + offset, size);
- brw_bo_unmap(intel_obj->buffer);
-
- mark_buffer_inactive(intel_obj);
-}
-
-
-/**
- * The MapBufferRange() driver hook.
- *
- * This implements both glMapBufferRange() and glMapBuffer().
- *
- * The goal of this extension is to allow apps to accumulate their rendering
- * at the same time as they accumulate their buffer object. Without it,
- * you'd end up blocking on execution of rendering every time you mapped
- * the buffer to put new data in.
- *
- * We support it in 3 ways: If unsynchronized, then don't bother
- * flushing the batchbuffer before mapping the buffer, which can save blocking
- * in many cases. If we would still block, and they allow the whole buffer
- * to be invalidated, then just allocate a new buffer to replace the old one.
- * If not, and we'd block, and they allow the subrange of the buffer to be
- * invalidated, then we can make a new little BO, let them write into that,
- * and blit it into the real BO at unmap time.
- */
-static void *
-brw_map_buffer_range(struct gl_context *ctx,
- GLintptr offset, GLsizeiptr length,
- GLbitfield access, struct gl_buffer_object *obj,
- gl_map_buffer_index index)
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_buffer_object *intel_obj = brw_buffer_object(obj);
-
- assert(intel_obj);
-
- STATIC_ASSERT(GL_MAP_UNSYNCHRONIZED_BIT == MAP_ASYNC);
- STATIC_ASSERT(GL_MAP_WRITE_BIT == MAP_WRITE);
- STATIC_ASSERT(GL_MAP_READ_BIT == MAP_READ);
- STATIC_ASSERT(GL_MAP_PERSISTENT_BIT == MAP_PERSISTENT);
- STATIC_ASSERT(GL_MAP_COHERENT_BIT == MAP_COHERENT);
- assert((access & MAP_INTERNAL_MASK) == 0);
-
- /* _mesa_MapBufferRange (GL entrypoint) sets these, but the vbo module also
- * internally uses our functions directly.
- */
- obj->Mappings[index].Offset = offset;
- obj->Mappings[index].Length = length;
- obj->Mappings[index].AccessFlags = access;
-
- if (intel_obj->buffer == NULL) {
- obj->Mappings[index].Pointer = NULL;
- return NULL;
- }
-
- /* If the access is synchronized (like a normal buffer mapping), then get
- * things flushed out so the later mapping syncs appropriately through GEM.
- * If the user doesn't care about existing buffer contents and mapping would
- * cause us to block, then throw out the old buffer.
- *
- * If they set INVALIDATE_BUFFER, we can pitch the current contents to
- * achieve the required synchronization.
- */
- if (!(access & GL_MAP_UNSYNCHRONIZED_BIT)) {
- if (brw_batch_references(&brw->batch, intel_obj->buffer)) {
- if (access & GL_MAP_INVALIDATE_BUFFER_BIT) {
- brw_bo_unreference(intel_obj->buffer);
- alloc_buffer_object(brw, intel_obj);
- } else {
- perf_debug("Stalling on the GPU for mapping a busy buffer "
- "object\n");
- brw_batch_flush(brw);
- }
- } else if (brw_bo_busy(intel_obj->buffer) &&
- (access & GL_MAP_INVALIDATE_BUFFER_BIT)) {
- brw_bo_unreference(intel_obj->buffer);
- alloc_buffer_object(brw, intel_obj);
- }
- }
-
- if (access & MAP_WRITE)
- mark_buffer_valid_data(intel_obj, offset, length);
-
- /* If the user is mapping a range of an active buffer object but
- * doesn't require the current contents of that range, make a new
- * BO, and we'll copy what they put in there out at unmap or
- * FlushRange time.
- *
- * That is, unless they're looking for a persistent mapping -- we would
- * need to do blits in the MemoryBarrier call, and it's easier to just do a
- * GPU stall and do a mapping.
- */
- if (!(access & (GL_MAP_UNSYNCHRONIZED_BIT | GL_MAP_PERSISTENT_BIT)) &&
- (access & GL_MAP_INVALIDATE_RANGE_BIT) &&
- brw_bo_busy(intel_obj->buffer)) {
- /* Ensure that the base alignment of the allocation meets the alignment
- * guarantees the driver has advertised to the application.
- */
- const unsigned alignment = ctx->Const.MinMapBufferAlignment;
-
- intel_obj->map_extra[index] = (uintptr_t) offset % alignment;
- intel_obj->range_map_bo[index] =
- brw_bo_alloc(brw->bufmgr, "BO blit temp",
- length + intel_obj->map_extra[index],
- BRW_MEMZONE_OTHER);
- void *map = brw_bo_map(brw, intel_obj->range_map_bo[index], access);
- obj->Mappings[index].Pointer = map + intel_obj->map_extra[index];
- return obj->Mappings[index].Pointer;
- }
-
- void *map = brw_bo_map(brw, intel_obj->buffer, access);
- if (!(access & GL_MAP_UNSYNCHRONIZED_BIT)) {
- mark_buffer_inactive(intel_obj);
- }
-
- obj->Mappings[index].Pointer = map + offset;
- return obj->Mappings[index].Pointer;
-}
-
-/**
- * The FlushMappedBufferRange() driver hook.
- *
- * Implements glFlushMappedBufferRange(), which signifies that modifications
- * have been made to a range of a mapped buffer, and it should be flushed.
- *
- * This is only used for buffers mapped with GL_MAP_FLUSH_EXPLICIT_BIT.
- *
- * Ideally we'd use a BO to avoid taking up cache space for the temporary
- * data, but FlushMappedBufferRange may be followed by further writes to
- * the pointer, so we would have to re-map after emitting our blit, which
- * would defeat the point.
- */
-static void
-brw_flush_mapped_buffer_range(struct gl_context *ctx,
- GLintptr offset, GLsizeiptr length,
- struct gl_buffer_object *obj,
- gl_map_buffer_index index)
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_buffer_object *intel_obj = brw_buffer_object(obj);
-
- assert(obj->Mappings[index].AccessFlags & GL_MAP_FLUSH_EXPLICIT_BIT);
-
- /* If we gave a direct mapping of the buffer instead of using a temporary,
- * then there's nothing to do.
- */
- if (intel_obj->range_map_bo[index] == NULL)
- return;
-
- if (length == 0)
- return;
-
- /* Note that we're not unmapping our buffer while executing the blit. We
- * need to have a mapping still at the end of this call, since the user
- * gets to make further modifications and glFlushMappedBufferRange() calls.
- * This is safe, because:
- *
- * - On LLC platforms, we're using a CPU mapping that's coherent with the
- * GPU (except for the render caches), so the kernel doesn't need to do
- * any flushing work for us except for what happens at batch exec time
- * anyway.
- *
- * - On non-LLC platforms, we're using a GTT mapping that writes directly
- * to system memory (except for the chipset cache that gets flushed at
- * batch exec time).
- *
- * In both cases we don't need to stall for the previous blit to complete
- * so we can re-map (and we definitely don't want to, since that would be
- * slow): If the user edits a part of their buffer that's previously been
- * blitted, then our lack of synchoronization is fine, because either
- * they'll get some too-new data in the first blit and not do another blit
- * of that area (but in that case the results are undefined), or they'll do
- * another blit of that area and the complete newer data will land the
- * second time.
- */
- brw_blorp_copy_buffers(brw,
- intel_obj->range_map_bo[index],
- intel_obj->map_extra[index] + offset,
- intel_obj->buffer,
- obj->Mappings[index].Offset + offset,
- length);
- mark_buffer_gpu_usage(intel_obj,
- obj->Mappings[index].Offset + offset,
- length);
- brw_emit_mi_flush(brw);
-}
-
-
-/**
- * The UnmapBuffer() driver hook.
- *
- * Implements glUnmapBuffer().
- */
-static GLboolean
-brw_unmap_buffer(struct gl_context *ctx,
- struct gl_buffer_object *obj,
- gl_map_buffer_index index)
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_buffer_object *intel_obj = brw_buffer_object(obj);
-
- assert(intel_obj);
- assert(obj->Mappings[index].Pointer);
- if (intel_obj->range_map_bo[index] != NULL) {
- brw_bo_unmap(intel_obj->range_map_bo[index]);
-
- if (!(obj->Mappings[index].AccessFlags & GL_MAP_FLUSH_EXPLICIT_BIT)) {
- brw_blorp_copy_buffers(brw,
- intel_obj->range_map_bo[index],
- intel_obj->map_extra[index],
- intel_obj->buffer, obj->Mappings[index].Offset,
- obj->Mappings[index].Length);
- mark_buffer_gpu_usage(intel_obj, obj->Mappings[index].Offset,
- obj->Mappings[index].Length);
- brw_emit_mi_flush(brw);
- }
-
- /* Since we've emitted some blits to buffers that will (likely) be used
- * in rendering operations in other cache domains in this batch, emit a
- * flush. Once again, we wish for a domain tracker in libdrm to cover
- * usage inside of a batchbuffer.
- */
-
- brw_bo_unreference(intel_obj->range_map_bo[index]);
- intel_obj->range_map_bo[index] = NULL;
- } else if (intel_obj->buffer != NULL) {
- brw_bo_unmap(intel_obj->buffer);
- }
- obj->Mappings[index].Pointer = NULL;
- obj->Mappings[index].Offset = 0;
- obj->Mappings[index].Length = 0;
-
- return true;
-}
-
-/**
- * Gets a pointer to the object's BO, and marks the given range as being used
- * on the GPU.
- *
- * Anywhere that uses buffer objects in the pipeline should be using this to
- * mark the range of the buffer that is being accessed by the pipeline.
- */
-struct brw_bo *
-brw_bufferobj_buffer(struct brw_context *brw,
- struct brw_buffer_object *intel_obj,
- uint32_t offset, uint32_t size, bool write)
-{
- /* This is needed so that things like transform feedback and texture buffer
- * objects that need a BO but don't want to check that they exist for
- * draw-time validation can just always get a BO from a GL buffer object.
- */
- if (intel_obj->buffer == NULL)
- alloc_buffer_object(brw, intel_obj);
-
- mark_buffer_gpu_usage(intel_obj, offset, size);
-
- /* If writing, (conservatively) mark this section as having valid data. */
- if (write)
- mark_buffer_valid_data(intel_obj, offset, size);
-
- return intel_obj->buffer;
-}
-
-/**
- * The CopyBufferSubData() driver hook.
- *
- * Implements glCopyBufferSubData(), which copies a portion of one buffer
- * object's data to another. Independent source and destination offsets
- * are allowed.
- */
-static void
-brw_copy_buffer_subdata(struct gl_context *ctx,
- struct gl_buffer_object *src,
- struct gl_buffer_object *dst,
- GLintptr read_offset, GLintptr write_offset,
- GLsizeiptr size)
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_buffer_object *intel_src = brw_buffer_object(src);
- struct brw_buffer_object *intel_dst = brw_buffer_object(dst);
- struct brw_bo *src_bo, *dst_bo;
-
- if (size == 0)
- return;
-
- dst_bo = brw_bufferobj_buffer(brw, intel_dst, write_offset, size, true);
- src_bo = brw_bufferobj_buffer(brw, intel_src, read_offset, size, false);
-
- brw_blorp_copy_buffers(brw,
- src_bo, read_offset,
- dst_bo, write_offset, size);
-
- /* Since we've emitted some blits to buffers that will (likely) be used
- * in rendering operations in other cache domains in this batch, emit a
- * flush. Once again, we wish for a domain tracker in libdrm to cover
- * usage inside of a batchbuffer.
- */
- brw_emit_mi_flush(brw);
-}
-
-void
-brw_init_buffer_object_functions(struct dd_function_table *functions)
-{
- functions->NewBufferObject = brw_new_buffer_object;
- functions->DeleteBuffer = brw_delete_buffer;
- functions->BufferData = brw_buffer_data;
- functions->BufferDataMem = brw_buffer_data_mem;
- functions->BufferSubData = brw_buffer_subdata;
- functions->GetBufferSubData = brw_get_buffer_subdata;
- functions->MapBufferRange = brw_map_buffer_range;
- functions->FlushMappedBufferRange = brw_flush_mapped_buffer_range;
- functions->UnmapBuffer = brw_unmap_buffer;
- functions->CopyBufferSubData = brw_copy_buffer_subdata;
-}
+++ /dev/null
-/*
- * Copyright 2005 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef BRW_BUFFEROBJ_H
-#define BRW_BUFFEROBJ_H
-
-#include "main/mtypes.h"
-
-struct brw_context;
-struct gl_buffer_object;
-
-
-/**
- * Intel vertex/pixel buffer object, derived from Mesa's gl_buffer_object.
- */
-struct brw_buffer_object
-{
- struct gl_buffer_object Base;
- struct brw_bo *buffer; /* the low-level buffer manager's buffer handle */
-
- struct brw_bo *range_map_bo[MAP_COUNT];
-
- /**
- * Alignment offset from the range_map_bo temporary mapping to the returned
- * obj->Pointer (caused by GL_ARB_map_buffer_alignment).
- */
- unsigned map_extra[MAP_COUNT];
-
- /** @{
- * Tracking for what range of the BO may currently be in use by the GPU.
- *
- * Users often want to either glBufferSubData() or glMapBufferRange() a
- * buffer object where some subset of it is busy on the GPU, without either
- * stalling or doing an extra blit (since our blits are extra expensive,
- * given that we have to reupload most of the 3D state when switching
- * rings). We wish they'd just use glMapBufferRange() with the
- * UNSYNC|INVALIDATE_RANGE flag or the INVALIDATE_BUFFER flag, but lots
- * don't.
- *
- * To work around apps, we track what range of the BO we might have used on
- * the GPU as vertex data, tranform feedback output, buffer textures, etc.,
- * and just do glBufferSubData() with an unsynchronized map when they're
- * outside of that range.
- *
- * If gpu_active_start > gpu_active_end, then the GPU is not currently
- * accessing the BO (and we can map it without synchronization).
- */
- uint32_t gpu_active_start;
- uint32_t gpu_active_end;
-
- /** @{
- * Tracking for what range of the BO may contain valid data.
- *
- * Users may create a large buffer object and only fill part of it
- * with valid data. This is a conservative estimate of what part
- * of the buffer contains valid data that we have to preserve.
- */
- uint32_t valid_data_start;
- uint32_t valid_data_end;
- /** @} */
-
- /**
- * If we've avoided stalls/blits using the active tracking, flag the buffer
- * for (occasional) stalling in the future to avoid getting stuck in a
- * cycle of blitting on buffer wraparound.
- */
- bool prefer_stall_to_blit;
- /** @} */
-};
-
-
-/* Get the bm buffer associated with a GL bufferobject:
- */
-struct brw_bo *brw_bufferobj_buffer(struct brw_context *brw,
- struct brw_buffer_object *obj,
- uint32_t offset,
- uint32_t size,
- bool write);
-
-void brw_upload_data(struct brw_uploader *upload,
- const void *data,
- uint32_t size,
- uint32_t alignment,
- struct brw_bo **out_bo,
- uint32_t *out_offset);
-
-void *brw_upload_space(struct brw_uploader *upload,
- uint32_t size,
- uint32_t alignment,
- struct brw_bo **out_bo,
- uint32_t *out_offset);
-
-void brw_upload_finish(struct brw_uploader *upload);
-void brw_upload_init(struct brw_uploader *upload,
- struct brw_bufmgr *bufmgr,
- unsigned default_size);
-
-/* Hook the bufferobject implementation into mesa:
- */
-void brw_init_buffer_object_functions(struct dd_function_table *functions);
-
-static inline struct brw_buffer_object *
-brw_buffer_object(struct gl_buffer_object *obj)
-{
- return (struct brw_buffer_object *) obj;
-}
-
-struct brw_memory_object {
- struct gl_memory_object Base;
- struct brw_bo *bo;
-};
-
-static inline struct brw_memory_object *
-brw_memory_object(struct gl_memory_object *obj)
-{
- return (struct brw_memory_object *)obj;
-}
-
-#endif
+++ /dev/null
-/*
- * Copyright 2003 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "brw_context.h"
-#include "brw_buffers.h"
-#include "brw_fbo.h"
-#include "brw_mipmap_tree.h"
-
-#include "main/fbobject.h"
-#include "main/framebuffer.h"
-#include "main/renderbuffer.h"
-
-static void
-brw_drawbuffer(struct gl_context *ctx)
-{
- if (_mesa_is_front_buffer_drawing(ctx->DrawBuffer)) {
- struct brw_context *const brw = brw_context(ctx);
-
- /* If we might be front-buffer rendering on this buffer for the first
- * time, invalidate our DRI drawable so we'll ask for new buffers
- * (including the fake front) before we start rendering again.
- */
- if (brw->driContext->driDrawablePriv)
- dri2InvalidateDrawable(brw->driContext->driDrawablePriv);
- brw_prepare_render(brw);
- }
-}
-
-
-static void
-brw_readbuffer(struct gl_context * ctx, GLenum mode)
-{
- if (_mesa_is_front_buffer_reading(ctx->ReadBuffer)) {
- struct brw_context *const brw = brw_context(ctx);
-
- /* If we might be front-buffer reading on this buffer for the first
- * time, invalidate our DRI drawable so we'll ask for new buffers
- * (including the fake front) before we start reading again.
- */
- if (brw->driContext->driDrawablePriv)
- dri2InvalidateDrawable(brw->driContext->driReadablePriv);
- brw_prepare_render(brw);
- }
-}
-
-
-void
-brw_init_buffer_functions(struct dd_function_table *functions)
-{
- functions->DrawBuffer = brw_drawbuffer;
- functions->ReadBuffer = brw_readbuffer;
-}
+++ /dev/null
-/*
- * Copyright 2006 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef BRW_BUFFERS_H
-#define BRW_BUFFERS_H
-
-#include "dri_util.h"
-#include "drm-uapi/drm.h"
-#include "brw_context.h"
-
-extern void brw_init_buffer_functions(struct dd_function_table *functions);
-
-#endif /* BRW_BUFFERS_H */
+++ /dev/null
-/*
- * Copyright © 2007 Red Hat Inc.
- * Copyright © 2007-2017 Intel Corporation
- * Copyright © 2006 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-/*
- * Authors: Thomas Hellström <thellstrom@vmware.com>
- * Keith Whitwell <keithw@vmware.com>
- * Eric Anholt <eric@anholt.net>
- * Dave Airlie <airlied@linux.ie>
- */
-
-#include <xf86drm.h>
-#include <util/u_atomic.h>
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include <assert.h>
-#include <sys/ioctl.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <stdbool.h>
-
-#include "errno.h"
-#include "common/intel_clflush.h"
-#include "dev/intel_debug.h"
-#include "common/intel_gem.h"
-#include "dev/intel_device_info.h"
-#include "libdrm_macros.h"
-#include "main/macros.h"
-#include "util/macros.h"
-#include "util/hash_table.h"
-#include "util/list.h"
-#include "util/os_file.h"
-#include "util/u_dynarray.h"
-#include "util/vma.h"
-#include "brw_bufmgr.h"
-#include "brw_context.h"
-#include "string.h"
-
-#include "drm-uapi/i915_drm.h"
-
-#ifdef HAVE_VALGRIND
-#include <valgrind.h>
-#include <memcheck.h>
-#define VG(x) x
-#else
-#define VG(x)
-#endif
-
-/* Bufmgr is not aware of brw_context. */
-#undef WARN_ONCE
-#define WARN_ONCE(cond, fmt...) do { \
- if (unlikely(cond)) { \
- static bool _warned = false; \
- if (!_warned) { \
- fprintf(stderr, "WARNING: "); \
- fprintf(stderr, fmt); \
- _warned = true; \
- } \
- } \
-} while (0)
-
-
-/* VALGRIND_FREELIKE_BLOCK unfortunately does not actually undo the earlier
- * VALGRIND_MALLOCLIKE_BLOCK but instead leaves vg convinced the memory is
- * leaked. All because it does not call VG(cli_free) from its
- * VG_USERREQ__FREELIKE_BLOCK handler. Instead of treating the memory like
- * and allocation, we mark it available for use upon mmapping and remove
- * it upon unmapping.
- */
-#define VG_DEFINED(ptr, size) VG(VALGRIND_MAKE_MEM_DEFINED(ptr, size))
-#define VG_NOACCESS(ptr, size) VG(VALGRIND_MAKE_MEM_NOACCESS(ptr, size))
-
-/* On FreeBSD PAGE_SIZE is already defined in
- * /usr/include/machine/param.h that is indirectly
- * included here.
- */
-#ifndef PAGE_SIZE
-#define PAGE_SIZE 4096
-#endif
-
-#define FILE_DEBUG_FLAG DEBUG_BUFMGR
-
-static inline int
-atomic_add_unless(int *v, int add, int unless)
-{
- int c, old;
- c = p_atomic_read(v);
- while (c != unless && (old = p_atomic_cmpxchg(v, c, c + add)) != c)
- c = old;
- return c == unless;
-}
-
-/**
- * i965 fixed-size bucketing VMA allocator.
- *
- * The BO cache maintains "cache buckets" for buffers of various sizes.
- * All buffers in a given bucket are identically sized - when allocating,
- * we always round up to the bucket size. This means that virtually all
- * allocations are fixed-size; only buffers which are too large to fit in
- * a bucket can be variably-sized.
- *
- * We create an allocator for each bucket. Each contains a free-list, where
- * each node contains a <starting address, 64-bit bitmap> pair. Each bit
- * represents a bucket-sized block of memory. (At the first level, each
- * bit corresponds to a page. For the second bucket, bits correspond to
- * two pages, and so on.) 1 means a block is free, and 0 means it's in-use.
- * The lowest bit in the bitmap is for the first block.
- *
- * This makes allocations cheap - any bit of any node will do. We can pick
- * the head of the list and use ffs() to find a free block. If there are
- * none, we allocate 64 blocks from a larger allocator - either a bigger
- * bucketing allocator, or a fallback top-level allocator for large objects.
- */
-struct vma_bucket_node {
- uint64_t start_address;
- uint64_t bitmap;
-};
-
-struct bo_cache_bucket {
- /** List of cached BOs. */
- struct list_head head;
-
- /** Size of this bucket, in bytes. */
- uint64_t size;
-
- /** List of vma_bucket_nodes. */
- struct util_dynarray vma_list[BRW_MEMZONE_COUNT];
-};
-
-struct bo_export {
- /** File descriptor associated with a handle export. */
- int drm_fd;
-
- /** GEM handle in drm_fd */
- uint32_t gem_handle;
-
- struct list_head link;
-};
-
-struct brw_bufmgr {
- uint32_t refcount;
-
- struct list_head link;
-
- int fd;
-
- mtx_t lock;
-
- /** Array of lists of cached gem objects of power-of-two sizes */
- struct bo_cache_bucket cache_bucket[14 * 4];
- int num_buckets;
- time_t time;
-
- struct hash_table *name_table;
- struct hash_table *handle_table;
-
- struct util_vma_heap vma_allocator[BRW_MEMZONE_COUNT];
-
- bool has_llc:1;
- bool has_mmap_wc:1;
- bool has_mmap_offset:1;
- bool bo_reuse:1;
-
- uint64_t initial_kflags;
-};
-
-static mtx_t global_bufmgr_list_mutex = _MTX_INITIALIZER_NP;
-static struct list_head global_bufmgr_list = {
- .next = &global_bufmgr_list,
- .prev = &global_bufmgr_list,
-};
-
-static int bo_set_tiling_internal(struct brw_bo *bo, uint32_t tiling_mode,
- uint32_t stride);
-
-static void bo_free(struct brw_bo *bo);
-
-static uint64_t vma_alloc(struct brw_bufmgr *bufmgr,
- enum brw_memory_zone memzone,
- uint64_t size, uint64_t alignment);
-
-static struct brw_bo *
-hash_find_bo(struct hash_table *ht, unsigned int key)
-{
- struct hash_entry *entry = _mesa_hash_table_search(ht, &key);
- return entry ? (struct brw_bo *) entry->data : NULL;
-}
-
-static uint64_t
-bo_tile_size(struct brw_bufmgr *bufmgr, uint64_t size, uint32_t tiling)
-{
- if (tiling == I915_TILING_NONE)
- return size;
-
- /* 965+ just need multiples of page size for tiling */
- return ALIGN(size, PAGE_SIZE);
-}
-
-/*
- * Round a given pitch up to the minimum required for X tiling on a
- * given chip. We use 512 as the minimum to allow for a later tiling
- * change.
- */
-static uint32_t
-bo_tile_pitch(struct brw_bufmgr *bufmgr, uint32_t pitch, uint32_t tiling)
-{
- unsigned long tile_width;
-
- /* If untiled, then just align it so that we can do rendering
- * to it with the 3D engine.
- */
- if (tiling == I915_TILING_NONE)
- return ALIGN(pitch, 64);
-
- if (tiling == I915_TILING_X)
- tile_width = 512;
- else
- tile_width = 128;
-
- /* 965 is flexible */
- return ALIGN(pitch, tile_width);
-}
-
-/**
- * This function finds the correct bucket fit for the input size.
- * The function works with O(1) complexity when the requested size
- * was queried instead of iterating the size through all the buckets.
- */
-static struct bo_cache_bucket *
-bucket_for_size(struct brw_bufmgr *bufmgr, uint64_t size)
-{
- /* Calculating the pages and rounding up to the page size. */
- const unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
-
- /* Row Bucket sizes clz((x-1) | 3) Row Column
- * in pages stride size
- * 0: 1 2 3 4 -> 30 30 30 30 4 1
- * 1: 5 6 7 8 -> 29 29 29 29 4 1
- * 2: 10 12 14 16 -> 28 28 28 28 8 2
- * 3: 20 24 28 32 -> 27 27 27 27 16 4
- */
- const unsigned row = 30 - __builtin_clz((pages - 1) | 3);
- const unsigned row_max_pages = 4 << row;
-
- /* The '& ~2' is the special case for row 1. In row 1, max pages /
- * 2 is 2, but the previous row maximum is zero (because there is
- * no previous row). All row maximum sizes are power of 2, so that
- * is the only case where that bit will be set.
- */
- const unsigned prev_row_max_pages = (row_max_pages / 2) & ~2;
- int col_size_log2 = row - 1;
- col_size_log2 += (col_size_log2 < 0);
-
- const unsigned col = (pages - prev_row_max_pages +
- ((1 << col_size_log2) - 1)) >> col_size_log2;
-
- /* Calculating the index based on the row and column. */
- const unsigned index = (row * 4) + (col - 1);
-
- return (index < bufmgr->num_buckets) ?
- &bufmgr->cache_bucket[index] : NULL;
-}
-
-static enum brw_memory_zone
-memzone_for_address(uint64_t address)
-{
- const uint64_t _4GB = 1ull << 32;
-
- if (address >= _4GB)
- return BRW_MEMZONE_OTHER;
-
- return BRW_MEMZONE_LOW_4G;
-}
-
-static uint64_t
-bucket_vma_alloc(struct brw_bufmgr *bufmgr,
- struct bo_cache_bucket *bucket,
- enum brw_memory_zone memzone)
-{
- struct util_dynarray *vma_list = &bucket->vma_list[memzone];
- struct vma_bucket_node *node;
-
- if (vma_list->size == 0) {
- /* This bucket allocator is out of space - allocate a new block of
- * memory for 64 blocks from a larger allocator (either a larger
- * bucket or util_vma).
- *
- * We align the address to the node size (64 blocks) so that
- * bucket_vma_free can easily compute the starting address of this
- * block by rounding any address we return down to the node size.
- *
- * Set the first bit used, and return the start address.
- */
- uint64_t node_size = 64ull * bucket->size;
- node = util_dynarray_grow(vma_list, struct vma_bucket_node, 1);
-
- if (unlikely(!node))
- return 0ull;
-
- uint64_t addr = vma_alloc(bufmgr, memzone, node_size, node_size);
- node->start_address = intel_48b_address(addr);
- node->bitmap = ~1ull;
- return node->start_address;
- }
-
- /* Pick any bit from any node - they're all the right size and free. */
- node = util_dynarray_top_ptr(vma_list, struct vma_bucket_node);
- int bit = ffsll(node->bitmap) - 1;
- assert(bit >= 0 && bit <= 63);
-
- /* Reserve the memory by clearing the bit. */
- assert((node->bitmap & (1ull << bit)) != 0ull);
- node->bitmap &= ~(1ull << bit);
-
- uint64_t addr = node->start_address + bit * bucket->size;
-
- /* If this node is now completely full, remove it from the free list. */
- if (node->bitmap == 0ull) {
- (void) util_dynarray_pop(vma_list, struct vma_bucket_node);
- }
-
- return addr;
-}
-
-static void
-bucket_vma_free(struct bo_cache_bucket *bucket, uint64_t address)
-{
- enum brw_memory_zone memzone = memzone_for_address(address);
- struct util_dynarray *vma_list = &bucket->vma_list[memzone];
- const uint64_t node_bytes = 64ull * bucket->size;
- struct vma_bucket_node *node = NULL;
-
- /* bucket_vma_alloc allocates 64 blocks at a time, and aligns it to
- * that 64 block size. So, we can round down to get the starting address.
- */
- uint64_t start = (address / node_bytes) * node_bytes;
-
- /* Dividing the offset from start by bucket size gives us the bit index. */
- int bit = (address - start) / bucket->size;
-
- assert(start + bit * bucket->size == address);
-
- util_dynarray_foreach(vma_list, struct vma_bucket_node, cur) {
- if (cur->start_address == start) {
- node = cur;
- break;
- }
- }
-
- if (!node) {
- /* No node - the whole group of 64 blocks must have been in-use. */
- node = util_dynarray_grow(vma_list, struct vma_bucket_node, 1);
-
- if (unlikely(!node))
- return; /* bogus, leaks some GPU VMA, but nothing we can do... */
-
- node->start_address = start;
- node->bitmap = 0ull;
- }
-
- /* Set the bit to return the memory. */
- assert((node->bitmap & (1ull << bit)) == 0ull);
- node->bitmap |= 1ull << bit;
-
- /* The block might be entirely free now, and if so, we could return it
- * to the larger allocator. But we may as well hang on to it, in case
- * we get more allocations at this block size.
- */
-}
-
-static struct bo_cache_bucket *
-get_bucket_allocator(struct brw_bufmgr *bufmgr, uint64_t size)
-{
- /* Skip using the bucket allocator for very large sizes, as it allocates
- * 64 of them and this can balloon rather quickly.
- */
- if (size > 1024 * PAGE_SIZE)
- return NULL;
-
- struct bo_cache_bucket *bucket = bucket_for_size(bufmgr, size);
-
- if (bucket && bucket->size == size)
- return bucket;
-
- return NULL;
-}
-
-/**
- * Allocate a section of virtual memory for a buffer, assigning an address.
- *
- * This uses either the bucket allocator for the given size, or the large
- * object allocator (util_vma).
- */
-static uint64_t
-vma_alloc(struct brw_bufmgr *bufmgr,
- enum brw_memory_zone memzone,
- uint64_t size,
- uint64_t alignment)
-{
- /* Without softpin support, we let the kernel assign addresses. */
- assert(brw_using_softpin(bufmgr));
-
- alignment = ALIGN(alignment, PAGE_SIZE);
-
- struct bo_cache_bucket *bucket = get_bucket_allocator(bufmgr, size);
- uint64_t addr;
-
- if (bucket) {
- addr = bucket_vma_alloc(bufmgr, bucket, memzone);
- } else {
- addr = util_vma_heap_alloc(&bufmgr->vma_allocator[memzone], size,
- alignment);
- }
-
- assert((addr >> 48ull) == 0);
- assert((addr % alignment) == 0);
-
- return intel_canonical_address(addr);
-}
-
-/**
- * Free a virtual memory area, allowing the address to be reused.
- */
-static void
-vma_free(struct brw_bufmgr *bufmgr,
- uint64_t address,
- uint64_t size)
-{
- assert(brw_using_softpin(bufmgr));
-
- /* Un-canonicalize the address. */
- address = intel_48b_address(address);
-
- if (address == 0ull)
- return;
-
- struct bo_cache_bucket *bucket = get_bucket_allocator(bufmgr, size);
-
- if (bucket) {
- bucket_vma_free(bucket, address);
- } else {
- enum brw_memory_zone memzone = memzone_for_address(address);
- util_vma_heap_free(&bufmgr->vma_allocator[memzone], address, size);
- }
-}
-
-int
-brw_bo_busy(struct brw_bo *bo)
-{
- struct brw_bufmgr *bufmgr = bo->bufmgr;
- struct drm_i915_gem_busy busy = { .handle = bo->gem_handle };
-
- int ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
- if (ret == 0) {
- bo->idle = !busy.busy;
- return busy.busy;
- }
- return false;
-}
-
-int
-brw_bo_madvise(struct brw_bo *bo, int state)
-{
- struct drm_i915_gem_madvise madv = {
- .handle = bo->gem_handle,
- .madv = state,
- .retained = 1,
- };
-
- drmIoctl(bo->bufmgr->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv);
-
- return madv.retained;
-}
-
-/* drop the oldest entries that have been purged by the kernel */
-static void
-brw_bo_cache_purge_bucket(struct brw_bufmgr *bufmgr,
- struct bo_cache_bucket *bucket)
-{
- list_for_each_entry_safe(struct brw_bo, bo, &bucket->head, head) {
- if (brw_bo_madvise(bo, I915_MADV_DONTNEED))
- break;
-
- list_del(&bo->head);
- bo_free(bo);
- }
-}
-
-static struct brw_bo *
-bo_calloc(void)
-{
- struct brw_bo *bo = calloc(1, sizeof(*bo));
- if (!bo)
- return NULL;
-
- list_inithead(&bo->exports);
-
- return bo;
-}
-
-static struct brw_bo *
-bo_alloc_internal(struct brw_bufmgr *bufmgr,
- const char *name,
- uint64_t size,
- enum brw_memory_zone memzone,
- unsigned flags,
- uint32_t tiling_mode,
- uint32_t stride)
-{
- struct brw_bo *bo;
- int ret;
- struct bo_cache_bucket *bucket;
- bool alloc_from_cache;
- uint64_t bo_size;
- bool busy = false;
- bool zeroed = false;
-
- if (flags & BO_ALLOC_BUSY)
- busy = true;
-
- if (flags & BO_ALLOC_ZEROED)
- zeroed = true;
-
- /* BUSY does doesn't really jive with ZEROED as we have to wait for it to
- * be idle before we can memset. Just disallow that combination.
- */
- assert(!(busy && zeroed));
-
- /* Round the allocated size up to a power of two number of pages. */
- bucket = bucket_for_size(bufmgr, size);
-
- /* If we don't have caching at this size, don't actually round the
- * allocation up.
- */
- if (bucket == NULL) {
- unsigned int page_size = getpagesize();
- bo_size = size == 0 ? page_size : ALIGN(size, page_size);
- } else {
- bo_size = bucket->size;
- }
- assert(bo_size);
-
- mtx_lock(&bufmgr->lock);
- /* Get a buffer out of the cache if available */
-retry:
- alloc_from_cache = false;
- if (bucket != NULL && !list_is_empty(&bucket->head)) {
- if (busy && !zeroed) {
- /* Allocate new render-target BOs from the tail (MRU)
- * of the list, as it will likely be hot in the GPU
- * cache and in the aperture for us. If the caller
- * asked us to zero the buffer, we don't want this
- * because we are going to mmap it.
- */
- bo = LIST_ENTRY(struct brw_bo, bucket->head.prev, head);
- list_del(&bo->head);
- alloc_from_cache = true;
- } else {
- /* For non-render-target BOs (where we're probably
- * going to map it first thing in order to fill it
- * with data), check if the last BO in the cache is
- * unbusy, and only reuse in that case. Otherwise,
- * allocating a new buffer is probably faster than
- * waiting for the GPU to finish.
- */
- bo = LIST_ENTRY(struct brw_bo, bucket->head.next, head);
- if (!brw_bo_busy(bo)) {
- alloc_from_cache = true;
- list_del(&bo->head);
- }
- }
-
- if (alloc_from_cache) {
- assert(list_is_empty(&bo->exports));
- if (!brw_bo_madvise(bo, I915_MADV_WILLNEED)) {
- bo_free(bo);
- brw_bo_cache_purge_bucket(bufmgr, bucket);
- goto retry;
- }
-
- if (bo_set_tiling_internal(bo, tiling_mode, stride)) {
- bo_free(bo);
- goto retry;
- }
-
- if (zeroed) {
- void *map = brw_bo_map(NULL, bo, MAP_WRITE | MAP_RAW);
- if (!map) {
- bo_free(bo);
- goto retry;
- }
- memset(map, 0, bo_size);
- }
- }
- }
-
- if (alloc_from_cache) {
- /* If the cache BO isn't in the right memory zone, free the old
- * memory and assign it a new address.
- */
- if ((bo->kflags & EXEC_OBJECT_PINNED) &&
- memzone != memzone_for_address(bo->gtt_offset)) {
- vma_free(bufmgr, bo->gtt_offset, bo->size);
- bo->gtt_offset = 0ull;
- }
- } else {
- bo = bo_calloc();
- if (!bo)
- goto err;
-
- bo->size = bo_size;
- bo->idle = true;
-
- struct drm_i915_gem_create create = { .size = bo_size };
-
- /* All new BOs we get from the kernel are zeroed, so we don't need to
- * worry about that here.
- */
- ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CREATE, &create);
- if (ret != 0) {
- free(bo);
- goto err;
- }
-
- bo->gem_handle = create.handle;
-
- bo->bufmgr = bufmgr;
-
- bo->tiling_mode = I915_TILING_NONE;
- bo->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
- bo->stride = 0;
-
- if (bo_set_tiling_internal(bo, tiling_mode, stride))
- goto err_free;
-
- /* Calling set_domain() will allocate pages for the BO outside of the
- * struct mutex lock in the kernel, which is more efficient than waiting
- * to create them during the first execbuf that uses the BO.
- */
- struct drm_i915_gem_set_domain sd = {
- .handle = bo->gem_handle,
- .read_domains = I915_GEM_DOMAIN_CPU,
- .write_domain = 0,
- };
-
- if (drmIoctl(bo->bufmgr->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &sd) != 0)
- goto err_free;
- }
-
- bo->name = name;
- p_atomic_set(&bo->refcount, 1);
- bo->reusable = true;
- bo->cache_coherent = bufmgr->has_llc;
- bo->index = -1;
- bo->kflags = bufmgr->initial_kflags;
-
- if ((bo->kflags & EXEC_OBJECT_PINNED) && bo->gtt_offset == 0ull) {
- bo->gtt_offset = vma_alloc(bufmgr, memzone, bo->size, 1);
-
- if (bo->gtt_offset == 0ull)
- goto err_free;
- }
-
- mtx_unlock(&bufmgr->lock);
-
- DBG("bo_create: buf %d (%s) %llub\n", bo->gem_handle, bo->name,
- (unsigned long long) size);
-
- return bo;
-
-err_free:
- bo_free(bo);
-err:
- mtx_unlock(&bufmgr->lock);
- return NULL;
-}
-
-struct brw_bo *
-brw_bo_alloc(struct brw_bufmgr *bufmgr,
- const char *name, uint64_t size,
- enum brw_memory_zone memzone)
-{
- return bo_alloc_internal(bufmgr, name, size, memzone,
- 0, I915_TILING_NONE, 0);
-}
-
-struct brw_bo *
-brw_bo_alloc_tiled(struct brw_bufmgr *bufmgr, const char *name,
- uint64_t size, enum brw_memory_zone memzone,
- uint32_t tiling_mode, uint32_t pitch,
- unsigned flags)
-{
- return bo_alloc_internal(bufmgr, name, size, memzone,
- flags, tiling_mode, pitch);
-}
-
-struct brw_bo *
-brw_bo_alloc_tiled_2d(struct brw_bufmgr *bufmgr, const char *name,
- int x, int y, int cpp, enum brw_memory_zone memzone,
- uint32_t tiling, uint32_t *pitch, unsigned flags)
-{
- uint64_t size;
- uint32_t stride;
- unsigned long aligned_y, height_alignment;
-
- /* If we're tiled, our allocations are in 8 or 32-row blocks,
- * so failure to align our height means that we won't allocate
- * enough pages.
- *
- * If we're untiled, we still have to align to 2 rows high
- * because the data port accesses 2x2 blocks even if the
- * bottom row isn't to be rendered, so failure to align means
- * we could walk off the end of the GTT and fault. This is
- * documented on 965, and may be the case on older chipsets
- * too so we try to be careful.
- */
- aligned_y = y;
- height_alignment = 2;
-
- if (tiling == I915_TILING_X)
- height_alignment = 8;
- else if (tiling == I915_TILING_Y)
- height_alignment = 32;
- aligned_y = ALIGN(y, height_alignment);
-
- stride = x * cpp;
- stride = bo_tile_pitch(bufmgr, stride, tiling);
- size = stride * aligned_y;
- size = bo_tile_size(bufmgr, size, tiling);
- *pitch = stride;
-
- if (tiling == I915_TILING_NONE)
- stride = 0;
-
- return bo_alloc_internal(bufmgr, name, size, memzone,
- flags, tiling, stride);
-}
-
-/**
- * Returns a brw_bo wrapping the given buffer object handle.
- *
- * This can be used when one application needs to pass a buffer object
- * to another.
- */
-struct brw_bo *
-brw_bo_gem_create_from_name(struct brw_bufmgr *bufmgr,
- const char *name, unsigned int handle)
-{
- struct brw_bo *bo;
-
- /* At the moment most applications only have a few named bo.
- * For instance, in a DRI client only the render buffers passed
- * between X and the client are named. And since X returns the
- * alternating names for the front/back buffer a linear search
- * provides a sufficiently fast match.
- */
- mtx_lock(&bufmgr->lock);
- bo = hash_find_bo(bufmgr->name_table, handle);
- if (bo) {
- brw_bo_reference(bo);
- goto out;
- }
-
- struct drm_gem_open open_arg = { .name = handle };
- int ret = drmIoctl(bufmgr->fd, DRM_IOCTL_GEM_OPEN, &open_arg);
- if (ret != 0) {
- DBG("Couldn't reference %s handle 0x%08x: %s\n",
- name, handle, strerror(errno));
- bo = NULL;
- goto out;
- }
- /* Now see if someone has used a prime handle to get this
- * object from the kernel before by looking through the list
- * again for a matching gem_handle
- */
- bo = hash_find_bo(bufmgr->handle_table, open_arg.handle);
- if (bo) {
- brw_bo_reference(bo);
- goto out;
- }
-
- bo = bo_calloc();
- if (!bo)
- goto out;
-
- p_atomic_set(&bo->refcount, 1);
-
- bo->size = open_arg.size;
- bo->gtt_offset = 0;
- bo->bufmgr = bufmgr;
- bo->gem_handle = open_arg.handle;
- bo->name = name;
- bo->global_name = handle;
- bo->reusable = false;
- bo->external = true;
- bo->kflags = bufmgr->initial_kflags;
-
- if (bo->kflags & EXEC_OBJECT_PINNED)
- bo->gtt_offset = vma_alloc(bufmgr, BRW_MEMZONE_OTHER, bo->size, 1);
-
- _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo);
- _mesa_hash_table_insert(bufmgr->name_table, &bo->global_name, bo);
-
- struct drm_i915_gem_get_tiling get_tiling = { .handle = bo->gem_handle };
- ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling);
- if (ret != 0)
- goto err_unref;
-
- bo->tiling_mode = get_tiling.tiling_mode;
- bo->swizzle_mode = get_tiling.swizzle_mode;
- /* XXX stride is unknown */
- DBG("bo_create_from_handle: %d (%s)\n", handle, bo->name);
-
-out:
- mtx_unlock(&bufmgr->lock);
- return bo;
-
-err_unref:
- bo_free(bo);
- mtx_unlock(&bufmgr->lock);
- return NULL;
-}
-
-static void
-bo_free(struct brw_bo *bo)
-{
- struct brw_bufmgr *bufmgr = bo->bufmgr;
-
- if (bo->map_cpu) {
- VG_NOACCESS(bo->map_cpu, bo->size);
- drm_munmap(bo->map_cpu, bo->size);
- }
- if (bo->map_wc) {
- VG_NOACCESS(bo->map_wc, bo->size);
- drm_munmap(bo->map_wc, bo->size);
- }
- if (bo->map_gtt) {
- VG_NOACCESS(bo->map_gtt, bo->size);
- drm_munmap(bo->map_gtt, bo->size);
- }
-
- if (bo->external) {
- struct hash_entry *entry;
-
- if (bo->global_name) {
- entry = _mesa_hash_table_search(bufmgr->name_table, &bo->global_name);
- _mesa_hash_table_remove(bufmgr->name_table, entry);
- }
-
- entry = _mesa_hash_table_search(bufmgr->handle_table, &bo->gem_handle);
- _mesa_hash_table_remove(bufmgr->handle_table, entry);
- } else {
- assert(list_is_empty(&bo->exports));
- }
-
- /* Close this object */
- struct drm_gem_close close = { .handle = bo->gem_handle };
- int ret = drmIoctl(bufmgr->fd, DRM_IOCTL_GEM_CLOSE, &close);
- if (ret != 0) {
- DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n",
- bo->gem_handle, bo->name, strerror(errno));
- }
-
- if (bo->kflags & EXEC_OBJECT_PINNED)
- vma_free(bo->bufmgr, bo->gtt_offset, bo->size);
-
- free(bo);
-}
-
-/** Frees all cached buffers significantly older than @time. */
-static void
-cleanup_bo_cache(struct brw_bufmgr *bufmgr, time_t time)
-{
- int i;
-
- if (bufmgr->time == time)
- return;
-
- for (i = 0; i < bufmgr->num_buckets; i++) {
- struct bo_cache_bucket *bucket = &bufmgr->cache_bucket[i];
-
- list_for_each_entry_safe(struct brw_bo, bo, &bucket->head, head) {
- if (time - bo->free_time <= 1)
- break;
-
- list_del(&bo->head);
-
- bo_free(bo);
- }
- }
-
- bufmgr->time = time;
-}
-
-static void
-bo_unreference_final(struct brw_bo *bo, time_t time)
-{
- struct brw_bufmgr *bufmgr = bo->bufmgr;
- struct bo_cache_bucket *bucket;
-
- DBG("bo_unreference final: %d (%s)\n", bo->gem_handle, bo->name);
-
- list_for_each_entry_safe(struct bo_export, export, &bo->exports, link) {
- struct drm_gem_close close = { .handle = export->gem_handle };
- intel_ioctl(export->drm_fd, DRM_IOCTL_GEM_CLOSE, &close);
-
- list_del(&export->link);
- free(export);
- }
-
- bucket = bucket_for_size(bufmgr, bo->size);
- /* Put the buffer into our internal cache for reuse if we can. */
- if (bufmgr->bo_reuse && bo->reusable && bucket != NULL &&
- brw_bo_madvise(bo, I915_MADV_DONTNEED)) {
- bo->free_time = time;
-
- bo->name = NULL;
-
- list_addtail(&bo->head, &bucket->head);
- } else {
- bo_free(bo);
- }
-}
-
-void
-brw_bo_unreference(struct brw_bo *bo)
-{
- if (bo == NULL)
- return;
-
- assert(p_atomic_read(&bo->refcount) > 0);
-
- if (atomic_add_unless(&bo->refcount, -1, 1)) {
- struct brw_bufmgr *bufmgr = bo->bufmgr;
- struct timespec time;
-
- clock_gettime(CLOCK_MONOTONIC, &time);
-
- mtx_lock(&bufmgr->lock);
-
- if (p_atomic_dec_zero(&bo->refcount)) {
- bo_unreference_final(bo, time.tv_sec);
- cleanup_bo_cache(bufmgr, time.tv_sec);
- }
-
- mtx_unlock(&bufmgr->lock);
- }
-}
-
-static void
-bo_wait_with_stall_warning(struct brw_context *brw,
- struct brw_bo *bo,
- const char *action)
-{
- bool busy = brw && brw->perf_debug && !bo->idle;
- double elapsed = unlikely(busy) ? -get_time() : 0.0;
-
- brw_bo_wait_rendering(bo);
-
- if (unlikely(busy)) {
- elapsed += get_time();
- if (elapsed > 1e-5) /* 0.01ms */
- perf_debug("%s a busy \"%s\" BO stalled and took %.03f ms.\n",
- action, bo->name, elapsed * 1000);
- }
-}
-
-static void
-print_flags(unsigned flags)
-{
- if (flags & MAP_READ)
- DBG("READ ");
- if (flags & MAP_WRITE)
- DBG("WRITE ");
- if (flags & MAP_ASYNC)
- DBG("ASYNC ");
- if (flags & MAP_PERSISTENT)
- DBG("PERSISTENT ");
- if (flags & MAP_COHERENT)
- DBG("COHERENT ");
- if (flags & MAP_RAW)
- DBG("RAW ");
- DBG("\n");
-}
-
-static void *
-brw_bo_gem_mmap_legacy(struct brw_context *brw, struct brw_bo *bo, bool wc)
-{
- struct brw_bufmgr *bufmgr = bo->bufmgr;
-
- struct drm_i915_gem_mmap mmap_arg = {
- .handle = bo->gem_handle,
- .size = bo->size,
- .flags = wc ? I915_MMAP_WC : 0,
- };
-
- int ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg);
- if (ret != 0) {
- DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
- __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
- return NULL;
- }
- void *map = (void *) (uintptr_t) mmap_arg.addr_ptr;
-
- return map;
-}
-
-static void *
-brw_bo_gem_mmap_offset(struct brw_context *brw, struct brw_bo *bo, bool wc)
-{
- struct brw_bufmgr *bufmgr = bo->bufmgr;
-
- struct drm_i915_gem_mmap_offset mmap_arg = {
- .handle = bo->gem_handle,
- .flags = wc ? I915_MMAP_OFFSET_WC : I915_MMAP_OFFSET_WB,
- };
-
- /* Get the fake offset back */
- int ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP_OFFSET, &mmap_arg);
- if (ret != 0) {
- DBG("%s:%d: Error preparing buffer %d (%s): %s .\n",
- __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
- return NULL;
- }
-
- /* And map it */
- void *map = drm_mmap(0, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED,
- bufmgr->fd, mmap_arg.offset);
- if (map == MAP_FAILED) {
- DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
- __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
- return NULL;
- }
-
- return map;
-}
-
-static void *
-brw_bo_gem_mmap(struct brw_context *brw, struct brw_bo *bo, bool wc)
-{
- struct brw_bufmgr *bufmgr = bo->bufmgr;
-
- if (bufmgr->has_mmap_offset)
- return brw_bo_gem_mmap_offset(brw, bo, wc);
- else
- return brw_bo_gem_mmap_legacy(brw, bo, wc);
-}
-
-static void *
-brw_bo_map_cpu(struct brw_context *brw, struct brw_bo *bo, unsigned flags)
-{
- /* We disallow CPU maps for writing to non-coherent buffers, as the
- * CPU map can become invalidated when a batch is flushed out, which
- * can happen at unpredictable times. You should use WC maps instead.
- */
- assert(bo->cache_coherent || !(flags & MAP_WRITE));
-
- if (!bo->map_cpu) {
- DBG("brw_bo_map_cpu: %d (%s)\n", bo->gem_handle, bo->name);
-
- void *map = brw_bo_gem_mmap(brw, bo, false);
- VG_DEFINED(map, bo->size);
-
- if (p_atomic_cmpxchg(&bo->map_cpu, NULL, map)) {
- VG_NOACCESS(map, bo->size);
- drm_munmap(map, bo->size);
- }
- }
- assert(bo->map_cpu);
-
- DBG("brw_bo_map_cpu: %d (%s) -> %p, ", bo->gem_handle, bo->name,
- bo->map_cpu);
- print_flags(flags);
-
- if (!(flags & MAP_ASYNC)) {
- bo_wait_with_stall_warning(brw, bo, "CPU mapping");
- }
-
- if (!bo->cache_coherent && !bo->bufmgr->has_llc) {
- /* If we're reusing an existing CPU mapping, the CPU caches may
- * contain stale data from the last time we read from that mapping.
- * (With the BO cache, it might even be data from a previous buffer!)
- * Even if it's a brand new mapping, the kernel may have zeroed the
- * buffer via CPU writes.
- *
- * We need to invalidate those cachelines so that we see the latest
- * contents, and so long as we only read from the CPU mmap we do not
- * need to write those cachelines back afterwards.
- *
- * On LLC, the emprical evidence suggests that writes from the GPU
- * that bypass the LLC (i.e. for scanout) do *invalidate* the CPU
- * cachelines. (Other reads, such as the display engine, bypass the
- * LLC entirely requiring us to keep dirty pixels for the scanout
- * out of any cache.)
- */
- intel_invalidate_range(bo->map_cpu, bo->size);
- }
-
- return bo->map_cpu;
-}
-
-static void *
-brw_bo_map_wc(struct brw_context *brw, struct brw_bo *bo, unsigned flags)
-{
- struct brw_bufmgr *bufmgr = bo->bufmgr;
-
- if (!bufmgr->has_mmap_wc)
- return NULL;
-
- if (!bo->map_wc) {
- DBG("brw_bo_map_wc: %d (%s)\n", bo->gem_handle, bo->name);
- void *map = brw_bo_gem_mmap(brw, bo, true);
- VG_DEFINED(map, bo->size);
-
- if (p_atomic_cmpxchg(&bo->map_wc, NULL, map)) {
- VG_NOACCESS(map, bo->size);
- drm_munmap(map, bo->size);
- }
- }
- assert(bo->map_wc);
-
- DBG("brw_bo_map_wc: %d (%s) -> %p\n", bo->gem_handle, bo->name, bo->map_wc);
- print_flags(flags);
-
- if (!(flags & MAP_ASYNC)) {
- bo_wait_with_stall_warning(brw, bo, "WC mapping");
- }
-
- return bo->map_wc;
-}
-
-/**
- * Perform an uncached mapping via the GTT.
- *
- * Write access through the GTT is not quite fully coherent. On low power
- * systems especially, like modern Atoms, we can observe reads from RAM before
- * the write via GTT has landed. A write memory barrier that flushes the Write
- * Combining Buffer (i.e. sfence/mfence) is not sufficient to order the later
- * read after the write as the GTT write suffers a small delay through the GTT
- * indirection. The kernel uses an uncached mmio read to ensure the GTT write
- * is ordered with reads (either by the GPU, WB or WC) and unconditionally
- * flushes prior to execbuf submission. However, if we are not informing the
- * kernel about our GTT writes, it will not flush before earlier access, such
- * as when using the cmdparser. Similarly, we need to be careful if we should
- * ever issue a CPU read immediately following a GTT write.
- *
- * Telling the kernel about write access also has one more important
- * side-effect. Upon receiving notification about the write, it cancels any
- * scanout buffering for FBC/PSR and friends. Later FBC/PSR is then flushed by
- * either SW_FINISH or DIRTYFB. The presumption is that we never write to the
- * actual scanout via a mmaping, only to a backbuffer and so all the FBC/PSR
- * tracking is handled on the buffer exchange instead.
- */
-static void *
-brw_bo_map_gtt(struct brw_context *brw, struct brw_bo *bo, unsigned flags)
-{
- struct brw_bufmgr *bufmgr = bo->bufmgr;
-
- /* Get a mapping of the buffer if we haven't before. */
- if (bo->map_gtt == NULL) {
- DBG("bo_map_gtt: mmap %d (%s)\n", bo->gem_handle, bo->name);
-
- struct drm_i915_gem_mmap_gtt mmap_arg = { .handle = bo->gem_handle };
-
- /* Get the fake offset back... */
- int ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP_GTT, &mmap_arg);
- if (ret != 0) {
- DBG("%s:%d: Error preparing buffer map %d (%s): %s .\n",
- __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
- return NULL;
- }
-
- /* and mmap it. */
- void *map = drm_mmap(0, bo->size, PROT_READ | PROT_WRITE,
- MAP_SHARED, bufmgr->fd, mmap_arg.offset);
- if (map == MAP_FAILED) {
- DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
- __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
- return NULL;
- }
-
- /* We don't need to use VALGRIND_MALLOCLIKE_BLOCK because Valgrind will
- * already intercept this mmap call. However, for consistency between
- * all the mmap paths, we mark the pointer as defined now and mark it
- * as inaccessible afterwards.
- */
- VG_DEFINED(map, bo->size);
-
- if (p_atomic_cmpxchg(&bo->map_gtt, NULL, map)) {
- VG_NOACCESS(map, bo->size);
- drm_munmap(map, bo->size);
- }
- }
- assert(bo->map_gtt);
-
- DBG("bo_map_gtt: %d (%s) -> %p, ", bo->gem_handle, bo->name, bo->map_gtt);
- print_flags(flags);
-
- if (!(flags & MAP_ASYNC)) {
- bo_wait_with_stall_warning(brw, bo, "GTT mapping");
- }
-
- return bo->map_gtt;
-}
-
-static bool
-can_map_cpu(struct brw_bo *bo, unsigned flags)
-{
- if (bo->cache_coherent)
- return true;
-
- /* Even if the buffer itself is not cache-coherent (such as a scanout), on
- * an LLC platform reads always are coherent (as they are performed via the
- * central system agent). It is just the writes that we need to take special
- * care to ensure that land in main memory and not stick in the CPU cache.
- */
- if (!(flags & MAP_WRITE) && bo->bufmgr->has_llc)
- return true;
-
- /* If PERSISTENT or COHERENT are set, the mmapping needs to remain valid
- * across batch flushes where the kernel will change cache domains of the
- * bo, invalidating continued access to the CPU mmap on non-LLC device.
- *
- * Similarly, ASYNC typically means that the buffer will be accessed via
- * both the CPU and the GPU simultaneously. Batches may be executed that
- * use the BO even while it is mapped. While OpenGL technically disallows
- * most drawing while non-persistent mappings are active, we may still use
- * the GPU for blits or other operations, causing batches to happen at
- * inconvenient times.
- */
- if (flags & (MAP_PERSISTENT | MAP_COHERENT | MAP_ASYNC))
- return false;
-
- return !(flags & MAP_WRITE);
-}
-
-void *
-brw_bo_map(struct brw_context *brw, struct brw_bo *bo, unsigned flags)
-{
- if (bo->tiling_mode != I915_TILING_NONE && !(flags & MAP_RAW))
- return brw_bo_map_gtt(brw, bo, flags);
-
- void *map;
-
- if (can_map_cpu(bo, flags))
- map = brw_bo_map_cpu(brw, bo, flags);
- else
- map = brw_bo_map_wc(brw, bo, flags);
-
- /* Allow the attempt to fail by falling back to the GTT where necessary.
- *
- * Not every buffer can be mmaped directly using the CPU (or WC), for
- * example buffers that wrap stolen memory or are imported from other
- * devices. For those, we have little choice but to use a GTT mmapping.
- * However, if we use a slow GTT mmapping for reads where we expected fast
- * access, that order of magnitude difference in throughput will be clearly
- * expressed by angry users.
- *
- * We skip MAP_RAW because we want to avoid map_gtt's fence detiling.
- */
- if (!map && !(flags & MAP_RAW)) {
- if (brw) {
- perf_debug("Fallback GTT mapping for %s with access flags %x\n",
- bo->name, flags);
- }
- map = brw_bo_map_gtt(brw, bo, flags);
- }
-
- return map;
-}
-
-int
-brw_bo_subdata(struct brw_bo *bo, uint64_t offset,
- uint64_t size, const void *data)
-{
- struct brw_bufmgr *bufmgr = bo->bufmgr;
-
- struct drm_i915_gem_pwrite pwrite = {
- .handle = bo->gem_handle,
- .offset = offset,
- .size = size,
- .data_ptr = (uint64_t) (uintptr_t) data,
- };
-
- int ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_PWRITE, &pwrite);
- if (ret != 0) {
- ret = -errno;
- DBG("%s:%d: Error writing data to buffer %d: "
- "(%"PRIu64" %"PRIu64") %s .\n",
- __FILE__, __LINE__, bo->gem_handle, offset, size, strerror(errno));
- }
-
- return ret;
-}
-
-/** Waits for all GPU rendering with the object to have completed. */
-void
-brw_bo_wait_rendering(struct brw_bo *bo)
-{
- /* We require a kernel recent enough for WAIT_IOCTL support.
- * See brw_init_bufmgr()
- */
- brw_bo_wait(bo, -1);
-}
-
-/**
- * Waits on a BO for the given amount of time.
- *
- * @bo: buffer object to wait for
- * @timeout_ns: amount of time to wait in nanoseconds.
- * If value is less than 0, an infinite wait will occur.
- *
- * Returns 0 if the wait was successful ie. the last batch referencing the
- * object has completed within the allotted time. Otherwise some negative return
- * value describes the error. Of particular interest is -ETIME when the wait has
- * failed to yield the desired result.
- *
- * Similar to brw_bo_wait_rendering except a timeout parameter allows
- * the operation to give up after a certain amount of time. Another subtle
- * difference is the internal locking semantics are different (this variant does
- * not hold the lock for the duration of the wait). This makes the wait subject
- * to a larger userspace race window.
- *
- * The implementation shall wait until the object is no longer actively
- * referenced within a batch buffer at the time of the call. The wait will
- * not guarantee that the buffer is re-issued via another thread, or an flinked
- * handle. Userspace must make sure this race does not occur if such precision
- * is important.
- *
- * Note that some kernels have broken the inifite wait for negative values
- * promise, upgrade to latest stable kernels if this is the case.
- */
-int
-brw_bo_wait(struct brw_bo *bo, int64_t timeout_ns)
-{
- struct brw_bufmgr *bufmgr = bo->bufmgr;
-
- /* If we know it's idle, don't bother with the kernel round trip */
- if (bo->idle && !bo->external)
- return 0;
-
- struct drm_i915_gem_wait wait = {
- .bo_handle = bo->gem_handle,
- .timeout_ns = timeout_ns,
- };
- int ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_WAIT, &wait);
- if (ret != 0)
- return -errno;
-
- bo->idle = true;
-
- return ret;
-}
-
-void
-brw_bufmgr_unref(struct brw_bufmgr *bufmgr)
-{
- mtx_lock(&global_bufmgr_list_mutex);
- if (p_atomic_dec_zero(&bufmgr->refcount)) {
- list_del(&bufmgr->link);
- } else {
- bufmgr = NULL;
- }
- mtx_unlock(&global_bufmgr_list_mutex);
-
- if (!bufmgr)
- return;
-
- mtx_destroy(&bufmgr->lock);
-
- /* Free any cached buffer objects we were going to reuse */
- for (int i = 0; i < bufmgr->num_buckets; i++) {
- struct bo_cache_bucket *bucket = &bufmgr->cache_bucket[i];
-
- list_for_each_entry_safe(struct brw_bo, bo, &bucket->head, head) {
- list_del(&bo->head);
-
- bo_free(bo);
- }
-
- if (brw_using_softpin(bufmgr)) {
- for (int z = 0; z < BRW_MEMZONE_COUNT; z++) {
- util_dynarray_fini(&bucket->vma_list[z]);
- }
- }
- }
-
- _mesa_hash_table_destroy(bufmgr->name_table, NULL);
- _mesa_hash_table_destroy(bufmgr->handle_table, NULL);
-
- if (brw_using_softpin(bufmgr)) {
- for (int z = 0; z < BRW_MEMZONE_COUNT; z++) {
- util_vma_heap_finish(&bufmgr->vma_allocator[z]);
- }
- }
-
- close(bufmgr->fd);
- bufmgr->fd = -1;
-
- free(bufmgr);
-}
-
-static int
-bo_set_tiling_internal(struct brw_bo *bo, uint32_t tiling_mode,
- uint32_t stride)
-{
- struct brw_bufmgr *bufmgr = bo->bufmgr;
- struct drm_i915_gem_set_tiling set_tiling;
- int ret;
-
- if (bo->global_name == 0 &&
- tiling_mode == bo->tiling_mode && stride == bo->stride)
- return 0;
-
- memset(&set_tiling, 0, sizeof(set_tiling));
- do {
- /* set_tiling is slightly broken and overwrites the
- * input on the error path, so we have to open code
- * rmIoctl.
- */
- set_tiling.handle = bo->gem_handle;
- set_tiling.tiling_mode = tiling_mode;
- set_tiling.stride = stride;
-
- ret = ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling);
- } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
- if (ret == -1)
- return -errno;
-
- bo->tiling_mode = set_tiling.tiling_mode;
- bo->swizzle_mode = set_tiling.swizzle_mode;
- bo->stride = set_tiling.stride;
- return 0;
-}
-
-int
-brw_bo_get_tiling(struct brw_bo *bo, uint32_t *tiling_mode,
- uint32_t *swizzle_mode)
-{
- *tiling_mode = bo->tiling_mode;
- *swizzle_mode = bo->swizzle_mode;
- return 0;
-}
-
-static struct brw_bo *
-brw_bo_gem_create_from_prime_internal(struct brw_bufmgr *bufmgr, int prime_fd,
- int tiling_mode, uint32_t stride)
-{
- uint32_t handle;
- struct brw_bo *bo;
-
- mtx_lock(&bufmgr->lock);
- int ret = drmPrimeFDToHandle(bufmgr->fd, prime_fd, &handle);
- if (ret) {
- DBG("create_from_prime: failed to obtain handle from fd: %s\n",
- strerror(errno));
- mtx_unlock(&bufmgr->lock);
- return NULL;
- }
-
- /*
- * See if the kernel has already returned this buffer to us. Just as
- * for named buffers, we must not create two bo's pointing at the same
- * kernel object
- */
- bo = hash_find_bo(bufmgr->handle_table, handle);
- if (bo) {
- brw_bo_reference(bo);
- goto out;
- }
-
- bo = bo_calloc();
- if (!bo)
- goto out;
-
- p_atomic_set(&bo->refcount, 1);
-
- /* Determine size of bo. The fd-to-handle ioctl really should
- * return the size, but it doesn't. If we have kernel 3.12 or
- * later, we can lseek on the prime fd to get the size. Older
- * kernels will just fail, in which case we fall back to the
- * provided (estimated or guess size). */
- ret = lseek(prime_fd, 0, SEEK_END);
- if (ret != -1)
- bo->size = ret;
-
- bo->bufmgr = bufmgr;
-
- bo->gem_handle = handle;
- _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo);
-
- bo->name = "prime";
- bo->reusable = false;
- bo->external = true;
- bo->kflags = bufmgr->initial_kflags;
-
- if (bo->kflags & EXEC_OBJECT_PINNED) {
- assert(bo->size > 0);
- bo->gtt_offset = vma_alloc(bufmgr, BRW_MEMZONE_OTHER, bo->size, 1);
- }
-
- if (tiling_mode < 0) {
- struct drm_i915_gem_get_tiling get_tiling = { .handle = bo->gem_handle };
- if (drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling))
- goto err;
-
- bo->tiling_mode = get_tiling.tiling_mode;
- bo->swizzle_mode = get_tiling.swizzle_mode;
- /* XXX stride is unknown */
- } else {
- bo_set_tiling_internal(bo, tiling_mode, stride);
- }
-
-out:
- mtx_unlock(&bufmgr->lock);
- return bo;
-
-err:
- bo_free(bo);
- mtx_unlock(&bufmgr->lock);
- return NULL;
-}
-
-struct brw_bo *
-brw_bo_gem_create_from_prime(struct brw_bufmgr *bufmgr, int prime_fd)
-{
- return brw_bo_gem_create_from_prime_internal(bufmgr, prime_fd, -1, 0);
-}
-
-struct brw_bo *
-brw_bo_gem_create_from_prime_tiled(struct brw_bufmgr *bufmgr, int prime_fd,
- uint32_t tiling_mode, uint32_t stride)
-{
- assert(tiling_mode == I915_TILING_NONE ||
- tiling_mode == I915_TILING_X ||
- tiling_mode == I915_TILING_Y);
-
- return brw_bo_gem_create_from_prime_internal(bufmgr, prime_fd,
- tiling_mode, stride);
-}
-
-static void
-brw_bo_make_external(struct brw_bo *bo)
-{
- struct brw_bufmgr *bufmgr = bo->bufmgr;
-
- if (!bo->external) {
- mtx_lock(&bufmgr->lock);
- if (!bo->external) {
- _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo);
- bo->external = true;
- }
- mtx_unlock(&bufmgr->lock);
- }
-}
-
-int
-brw_bo_gem_export_to_prime(struct brw_bo *bo, int *prime_fd)
-{
- struct brw_bufmgr *bufmgr = bo->bufmgr;
-
- brw_bo_make_external(bo);
-
- if (drmPrimeHandleToFD(bufmgr->fd, bo->gem_handle,
- DRM_CLOEXEC | DRM_RDWR, prime_fd) != 0)
- return -errno;
-
- bo->reusable = false;
-
- return 0;
-}
-
-uint32_t
-brw_bo_export_gem_handle(struct brw_bo *bo)
-{
- brw_bo_make_external(bo);
-
- return bo->gem_handle;
-}
-
-int
-brw_bo_flink(struct brw_bo *bo, uint32_t *name)
-{
- struct brw_bufmgr *bufmgr = bo->bufmgr;
-
- if (!bo->global_name) {
- struct drm_gem_flink flink = { .handle = bo->gem_handle };
-
- if (drmIoctl(bufmgr->fd, DRM_IOCTL_GEM_FLINK, &flink))
- return -errno;
-
- brw_bo_make_external(bo);
- mtx_lock(&bufmgr->lock);
- if (!bo->global_name) {
- bo->global_name = flink.name;
- _mesa_hash_table_insert(bufmgr->name_table, &bo->global_name, bo);
- }
- mtx_unlock(&bufmgr->lock);
-
- bo->reusable = false;
- }
-
- *name = bo->global_name;
- return 0;
-}
-
-int
-brw_bo_export_gem_handle_for_device(struct brw_bo *bo, int drm_fd,
- uint32_t *out_handle)
-{
- struct brw_bufmgr *bufmgr = bo->bufmgr;
-
- /* Only add the new GEM handle to the list of export if it belongs to a
- * different GEM device. Otherwise we might close the same buffer multiple
- * times.
- */
- int ret = os_same_file_description(drm_fd, bufmgr->fd);
- WARN_ONCE(ret < 0,
- "Kernel has no file descriptor comparison support: %s\n",
- strerror(errno));
- if (ret == 0) {
- *out_handle = brw_bo_export_gem_handle(bo);
- return 0;
- }
-
- struct bo_export *export = calloc(1, sizeof(*export));
- if (!export)
- return -ENOMEM;
-
- export->drm_fd = drm_fd;
-
- int dmabuf_fd = -1;
- int err = brw_bo_gem_export_to_prime(bo, &dmabuf_fd);
- if (err) {
- free(export);
- return err;
- }
-
- mtx_lock(&bufmgr->lock);
- err = drmPrimeFDToHandle(drm_fd, dmabuf_fd, &export->gem_handle);
- close(dmabuf_fd);
- if (err) {
- mtx_unlock(&bufmgr->lock);
- free(export);
- return err;
- }
-
- bool found = false;
- list_for_each_entry(struct bo_export, iter, &bo->exports, link) {
- if (iter->drm_fd != drm_fd)
- continue;
- /* Here we assume that for a given DRM fd, we'll always get back the
- * same GEM handle for a given buffer.
- */
- assert(iter->gem_handle == export->gem_handle);
- free(export);
- export = iter;
- found = true;
- break;
- }
- if (!found)
- list_addtail(&export->link, &bo->exports);
-
- mtx_unlock(&bufmgr->lock);
-
- *out_handle = export->gem_handle;
-
- return 0;
-}
-
-static void
-add_bucket(struct brw_bufmgr *bufmgr, int size)
-{
- unsigned int i = bufmgr->num_buckets;
-
- assert(i < ARRAY_SIZE(bufmgr->cache_bucket));
-
- list_inithead(&bufmgr->cache_bucket[i].head);
- if (brw_using_softpin(bufmgr)) {
- for (int z = 0; z < BRW_MEMZONE_COUNT; z++)
- util_dynarray_init(&bufmgr->cache_bucket[i].vma_list[z], NULL);
- }
- bufmgr->cache_bucket[i].size = size;
- bufmgr->num_buckets++;
-
- assert(bucket_for_size(bufmgr, size) == &bufmgr->cache_bucket[i]);
- assert(bucket_for_size(bufmgr, size - 2048) == &bufmgr->cache_bucket[i]);
- assert(bucket_for_size(bufmgr, size + 1) != &bufmgr->cache_bucket[i]);
-}
-
-static void
-init_cache_buckets(struct brw_bufmgr *bufmgr)
-{
- uint64_t size, cache_max_size = 64 * 1024 * 1024;
-
- /* OK, so power of two buckets was too wasteful of memory.
- * Give 3 other sizes between each power of two, to hopefully
- * cover things accurately enough. (The alternative is
- * probably to just go for exact matching of sizes, and assume
- * that for things like composited window resize the tiled
- * width/height alignment and rounding of sizes to pages will
- * get us useful cache hit rates anyway)
- */
- add_bucket(bufmgr, PAGE_SIZE);
- add_bucket(bufmgr, PAGE_SIZE * 2);
- add_bucket(bufmgr, PAGE_SIZE * 3);
-
- /* Initialize the linked lists for BO reuse cache. */
- for (size = 4 * PAGE_SIZE; size <= cache_max_size; size *= 2) {
- add_bucket(bufmgr, size);
-
- add_bucket(bufmgr, size + size * 1 / 4);
- add_bucket(bufmgr, size + size * 2 / 4);
- add_bucket(bufmgr, size + size * 3 / 4);
- }
-}
-
-uint32_t
-brw_create_hw_context(struct brw_bufmgr *bufmgr)
-{
- struct drm_i915_gem_context_create create = { };
- int ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create);
- if (ret != 0) {
- DBG("DRM_IOCTL_I915_GEM_CONTEXT_CREATE failed: %s\n", strerror(errno));
- return 0;
- }
-
- return create.ctx_id;
-}
-
-int
-brw_hw_context_set_priority(struct brw_bufmgr *bufmgr,
- uint32_t ctx_id,
- int priority)
-{
- struct drm_i915_gem_context_param p = {
- .ctx_id = ctx_id,
- .param = I915_CONTEXT_PARAM_PRIORITY,
- .value = priority,
- };
- int err;
-
- err = 0;
- if (drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM, &p))
- err = -errno;
-
- return err;
-}
-
-void
-brw_destroy_hw_context(struct brw_bufmgr *bufmgr, uint32_t ctx_id)
-{
- struct drm_i915_gem_context_destroy d = { .ctx_id = ctx_id };
-
- if (ctx_id != 0 &&
- drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, &d) != 0) {
- fprintf(stderr, "DRM_IOCTL_I915_GEM_CONTEXT_DESTROY failed: %s\n",
- strerror(errno));
- }
-}
-
-int
-brw_reg_read(struct brw_bufmgr *bufmgr, uint32_t offset, uint64_t *result)
-{
- struct drm_i915_reg_read reg_read = { .offset = offset };
- int ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_REG_READ, ®_read);
-
- *result = reg_read.val;
- return ret;
-}
-
-static int
-gem_param(int fd, int name)
-{
- int v = -1; /* No param uses (yet) the sign bit, reserve it for errors */
-
- struct drm_i915_getparam gp = { .param = name, .value = &v };
- if (drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp))
- return -1;
-
- return v;
-}
-
-static int
-gem_context_getparam(int fd, uint32_t context, uint64_t param, uint64_t *value)
-{
- struct drm_i915_gem_context_param gp = {
- .ctx_id = context,
- .param = param,
- };
-
- if (drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM, &gp))
- return -1;
-
- *value = gp.value;
-
- return 0;
-}
-
-bool
-brw_using_softpin(struct brw_bufmgr *bufmgr)
-{
- return bufmgr->initial_kflags & EXEC_OBJECT_PINNED;
-}
-
-static struct brw_bufmgr *
-brw_bufmgr_ref(struct brw_bufmgr *bufmgr)
-{
- p_atomic_inc(&bufmgr->refcount);
- return bufmgr;
-}
-
-/**
- * Initializes the GEM buffer manager, which uses the kernel to allocate, map,
- * and manage map buffer objections.
- *
- * \param fd File descriptor of the opened DRM device.
- */
-static struct brw_bufmgr *
-brw_bufmgr_create(struct intel_device_info *devinfo, int fd, bool bo_reuse)
-{
- struct brw_bufmgr *bufmgr;
-
- bufmgr = calloc(1, sizeof(*bufmgr));
- if (bufmgr == NULL)
- return NULL;
-
- /* Handles to buffer objects belong to the device fd and are not
- * reference counted by the kernel. If the same fd is used by
- * multiple parties (threads sharing the same screen bufmgr, or
- * even worse the same device fd passed to multiple libraries)
- * ownership of those handles is shared by those independent parties.
- *
- * Don't do this! Ensure that each library/bufmgr has its own device
- * fd so that its namespace does not clash with another.
- */
- bufmgr->fd = os_dupfd_cloexec(fd);
- if (bufmgr->fd < 0) {
- free(bufmgr);
- return NULL;
- }
-
- p_atomic_set(&bufmgr->refcount, 1);
-
- if (mtx_init(&bufmgr->lock, mtx_plain) != 0) {
- close(bufmgr->fd);
- free(bufmgr);
- return NULL;
- }
-
- uint64_t gtt_size;
- if (gem_context_getparam(fd, 0, I915_CONTEXT_PARAM_GTT_SIZE, >t_size))
- gtt_size = 0;
-
- bufmgr->has_llc = devinfo->has_llc;
- bufmgr->has_mmap_wc = gem_param(fd, I915_PARAM_MMAP_VERSION) > 0;
- bufmgr->bo_reuse = bo_reuse;
- bufmgr->has_mmap_offset = gem_param(fd, I915_PARAM_MMAP_GTT_VERSION) >= 4;
-
- const uint64_t _4GB = 4ull << 30;
-
- /* The STATE_BASE_ADDRESS size field can only hold 1 page shy of 4GB */
- const uint64_t _4GB_minus_1 = _4GB - PAGE_SIZE;
-
- if (devinfo->ver >= 8 && gtt_size > _4GB) {
- bufmgr->initial_kflags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
-
- /* Allocate VMA in userspace if we have softpin and full PPGTT. */
- if (gem_param(fd, I915_PARAM_HAS_EXEC_SOFTPIN) > 0 &&
- gem_param(fd, I915_PARAM_HAS_ALIASING_PPGTT) > 1) {
- bufmgr->initial_kflags |= EXEC_OBJECT_PINNED;
-
- util_vma_heap_init(&bufmgr->vma_allocator[BRW_MEMZONE_LOW_4G],
- PAGE_SIZE, _4GB_minus_1);
-
- /* Leave the last 4GB out of the high vma range, so that no state
- * base address + size can overflow 48 bits.
- */
- util_vma_heap_init(&bufmgr->vma_allocator[BRW_MEMZONE_OTHER],
- 1 * _4GB, gtt_size - 2 * _4GB);
- } else if (devinfo->ver >= 10) {
- /* Softpin landed in 4.5, but GVT used an aliasing PPGTT until
- * kernel commit 6b3816d69628becb7ff35978aa0751798b4a940a in
- * 4.14. Gfx10+ GVT hasn't landed yet, so it's not actually a
- * problem - but extending this requirement back to earlier gens
- * might actually mean requiring 4.14.
- */
- fprintf(stderr, "i965 requires softpin (Kernel 4.5) on Gfx10+.");
- close(bufmgr->fd);
- free(bufmgr);
- return NULL;
- }
- }
-
- init_cache_buckets(bufmgr);
-
- bufmgr->name_table =
- _mesa_hash_table_create(NULL, _mesa_hash_uint, _mesa_key_uint_equal);
- bufmgr->handle_table =
- _mesa_hash_table_create(NULL, _mesa_hash_uint, _mesa_key_uint_equal);
-
- return bufmgr;
-}
-
-struct brw_bufmgr *
-brw_bufmgr_get_for_fd(struct intel_device_info *devinfo, int fd, bool bo_reuse)
-{
- struct stat st;
-
- if (fstat(fd, &st))
- return NULL;
-
- struct brw_bufmgr *bufmgr = NULL;
-
- mtx_lock(&global_bufmgr_list_mutex);
- list_for_each_entry(struct brw_bufmgr, iter_bufmgr, &global_bufmgr_list, link) {
- struct stat iter_st;
- if (fstat(iter_bufmgr->fd, &iter_st))
- continue;
-
- if (st.st_rdev == iter_st.st_rdev) {
- assert(iter_bufmgr->bo_reuse == bo_reuse);
- bufmgr = brw_bufmgr_ref(iter_bufmgr);
- goto unlock;
- }
- }
-
- bufmgr = brw_bufmgr_create(devinfo, fd, bo_reuse);
- if (bufmgr)
- list_addtail(&bufmgr->link, &global_bufmgr_list);
-
- unlock:
- mtx_unlock(&global_bufmgr_list_mutex);
-
- return bufmgr;
-}
-
-int
-brw_bufmgr_get_fd(struct brw_bufmgr *bufmgr)
-{
- return bufmgr->fd;
-}
+++ /dev/null
-/*
- * Copyright © 2008-2012 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- * Eric Anholt <eric@anholt.net>
- *
- */
-
-/**
- * @file brw_bufmgr.h
- *
- * Public definitions of Intel-specific bufmgr functions.
- */
-
-#ifndef BRW_BUFMGR_H
-#define BRW_BUFMGR_H
-
-#include <stdbool.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <time.h>
-
-#include "c11/threads.h"
-#include "util/u_atomic.h"
-#include "util/list.h"
-
-#if defined(__cplusplus)
-extern "C" {
-#endif
-
-struct intel_device_info;
-struct brw_context;
-
-/**
- * Memory zones. When allocating a buffer, you can request that it is
- * placed into a specific region of the virtual address space (PPGTT).
- *
- * Most buffers can go anywhere (BRW_MEMZONE_OTHER). Some buffers are
- * accessed via an offset from a base address. STATE_BASE_ADDRESS has
- * a maximum 4GB size for each region, so we need to restrict those
- * buffers to be within 4GB of the base. Each memory zone corresponds
- * to a particular base address.
- *
- * Currently, i965 partitions the address space into two regions:
- *
- * - Low 4GB
- * - Full 48-bit address space
- *
- * Eventually, we hope to carve out 4GB of VMA for each base address.
- */
-enum brw_memory_zone {
- BRW_MEMZONE_LOW_4G,
- BRW_MEMZONE_OTHER,
-
- /* Shaders - Instruction State Base Address */
- BRW_MEMZONE_SHADER = BRW_MEMZONE_LOW_4G,
-
- /* Scratch - General State Base Address */
- BRW_MEMZONE_SCRATCH = BRW_MEMZONE_LOW_4G,
-
- /* Surface State Base Address */
- BRW_MEMZONE_SURFACE = BRW_MEMZONE_LOW_4G,
-
- /* Dynamic State Base Address */
- BRW_MEMZONE_DYNAMIC = BRW_MEMZONE_LOW_4G,
-};
-
-#define BRW_MEMZONE_COUNT (BRW_MEMZONE_OTHER + 1)
-
-struct brw_bo {
- /**
- * Size in bytes of the buffer object.
- *
- * The size may be larger than the size originally requested for the
- * allocation, such as being aligned to page size.
- */
- uint64_t size;
-
- /** Buffer manager context associated with this buffer object */
- struct brw_bufmgr *bufmgr;
-
- /** The GEM handle for this buffer object. */
- uint32_t gem_handle;
-
- /**
- * Offset of the buffer inside the Graphics Translation Table.
- *
- * This is effectively our GPU address for the buffer and we use it
- * as our base for all state pointers into the buffer. However, since the
- * kernel may be forced to move it around during the course of the
- * buffer's lifetime, we can only know where the buffer was on the last
- * execbuf. We presume, and are usually right, that the buffer will not
- * move and so we use that last offset for the next batch and by doing
- * so we can avoid having the kernel perform a relocation fixup pass as
- * our pointers inside the batch will be using the correct base offset.
- *
- * Since we do use it as a base address for the next batch of pointers,
- * the kernel treats our offset as a request, and if possible will
- * arrange the buffer to placed at that address (trying to balance
- * the cost of buffer migration versus the cost of performing
- * relocations). Furthermore, we can force the kernel to place the buffer,
- * or report a failure if we specified a conflicting offset, at our chosen
- * offset by specifying EXEC_OBJECT_PINNED.
- *
- * Note the GTT may be either per context, or shared globally across the
- * system. On a shared system, our buffers have to contend for address
- * space with both aperture mappings and framebuffers and so are more
- * likely to be moved. On a full ppGTT system, each batch exists in its
- * own GTT, and so each buffer may have their own offset within each
- * context.
- */
- uint64_t gtt_offset;
-
- /**
- * The validation list index for this buffer, or -1 when not in a batch.
- * Note that a single buffer may be in multiple batches (contexts), and
- * this is a global field, which refers to the last batch using the BO.
- * It should not be considered authoritative, but can be used to avoid a
- * linear walk of the validation list in the common case by guessing that
- * exec_bos[bo->index] == bo and confirming whether that's the case.
- */
- unsigned index;
-
- /**
- * Boolean of whether the GPU is definitely not accessing the buffer.
- *
- * This is only valid when reusable, since non-reusable
- * buffers are those that have been shared with other
- * processes, so we don't know their state.
- */
- bool idle;
-
- int refcount;
- const char *name;
-
- uint64_t kflags;
-
- /**
- * Kenel-assigned global name for this object
- *
- * List contains both flink named and prime fd'd objects
- */
- unsigned int global_name;
-
- /**
- * Current tiling mode
- */
- uint32_t tiling_mode;
- uint32_t swizzle_mode;
- uint32_t stride;
-
- time_t free_time;
-
- /** Mapped address for the buffer, saved across map/unmap cycles */
- void *map_cpu;
- /** GTT virtual address for the buffer, saved across map/unmap cycles */
- void *map_gtt;
- /** WC CPU address for the buffer, saved across map/unmap cycles */
- void *map_wc;
-
- /** BO cache list */
- struct list_head head;
-
- /**
- * List of GEM handle exports of this buffer (bo_export).
- *
- * Hold bufmgr->lock when using this list.
- */
- struct list_head exports;
-
- /**
- * Boolean of whether this buffer can be re-used
- */
- bool reusable;
-
- /**
- * Boolean of whether this buffer has been shared with an external client.
- */
- bool external;
-
- /**
- * Boolean of whether this buffer is cache coherent
- */
- bool cache_coherent;
-};
-
-#define BO_ALLOC_BUSY (1<<0)
-#define BO_ALLOC_ZEROED (1<<1)
-
-/**
- * Allocate a buffer object.
- *
- * Buffer objects are not necessarily initially mapped into CPU virtual
- * address space or graphics device aperture. They must be mapped
- * using brw_bo_map() to be used by the CPU.
- */
-struct brw_bo *brw_bo_alloc(struct brw_bufmgr *bufmgr, const char *name,
- uint64_t size, enum brw_memory_zone memzone);
-
-/**
- * Allocate a tiled buffer object.
- *
- * Alignment for tiled objects is set automatically; the 'flags'
- * argument provides a hint about how the object will be used initially.
- *
- * Valid tiling formats are:
- * I915_TILING_NONE
- * I915_TILING_X
- * I915_TILING_Y
- */
-struct brw_bo *brw_bo_alloc_tiled(struct brw_bufmgr *bufmgr,
- const char *name,
- uint64_t size,
- enum brw_memory_zone memzone,
- uint32_t tiling_mode,
- uint32_t pitch,
- unsigned flags);
-
-/**
- * Allocate a tiled buffer object.
- *
- * Alignment for tiled objects is set automatically; the 'flags'
- * argument provides a hint about how the object will be used initially.
- *
- * Valid tiling formats are:
- * I915_TILING_NONE
- * I915_TILING_X
- * I915_TILING_Y
- *
- * Note the tiling format may be rejected; callers should check the
- * 'tiling_mode' field on return, as well as the pitch value, which
- * may have been rounded up to accommodate for tiling restrictions.
- */
-struct brw_bo *brw_bo_alloc_tiled_2d(struct brw_bufmgr *bufmgr,
- const char *name,
- int x, int y, int cpp,
- enum brw_memory_zone memzone,
- uint32_t tiling_mode,
- uint32_t *pitch,
- unsigned flags);
-
-/** Takes a reference on a buffer object */
-static inline void
-brw_bo_reference(struct brw_bo *bo)
-{
- p_atomic_inc(&bo->refcount);
-}
-
-/**
- * Releases a reference on a buffer object, freeing the data if
- * no references remain.
- */
-void brw_bo_unreference(struct brw_bo *bo);
-
-/* Must match MapBufferRange interface (for convenience) */
-#define MAP_READ GL_MAP_READ_BIT
-#define MAP_WRITE GL_MAP_WRITE_BIT
-#define MAP_ASYNC GL_MAP_UNSYNCHRONIZED_BIT
-#define MAP_PERSISTENT GL_MAP_PERSISTENT_BIT
-#define MAP_COHERENT GL_MAP_COHERENT_BIT
-/* internal */
-#define MAP_INTERNAL_MASK (0xffu << 24)
-#define MAP_RAW (0x01 << 24)
-
-/**
- * Maps the buffer into userspace.
- *
- * This function will block waiting for any existing execution on the
- * buffer to complete, first. The resulting mapping is returned.
- */
-MUST_CHECK void *brw_bo_map(struct brw_context *brw, struct brw_bo *bo, unsigned flags);
-
-/**
- * Reduces the refcount on the userspace mapping of the buffer
- * object.
- */
-static inline int brw_bo_unmap(UNUSED struct brw_bo *bo) { return 0; }
-
-/** Write data into an object. */
-int brw_bo_subdata(struct brw_bo *bo, uint64_t offset,
- uint64_t size, const void *data);
-/**
- * Waits for rendering to an object by the GPU to have completed.
- *
- * This is not required for any access to the BO by bo_map,
- * bo_subdata, etc. It is merely a way for the driver to implement
- * glFinish.
- */
-void brw_bo_wait_rendering(struct brw_bo *bo);
-
-/**
- * Unref a buffer manager instance.
- */
-void brw_bufmgr_unref(struct brw_bufmgr *bufmgr);
-
-/**
- * Get the current tiling (and resulting swizzling) mode for the bo.
- *
- * \param buf Buffer to get tiling mode for
- * \param tiling_mode returned tiling mode
- * \param swizzle_mode returned swizzling mode
- */
-int brw_bo_get_tiling(struct brw_bo *bo, uint32_t *tiling_mode,
- uint32_t *swizzle_mode);
-
-/**
- * Create a visible name for a buffer which can be used by other apps
- *
- * \param buf Buffer to create a name for
- * \param name Returned name
- */
-int brw_bo_flink(struct brw_bo *bo, uint32_t *name);
-
-/**
- * Returns 1 if mapping the buffer for write could cause the process
- * to block, due to the object being active in the GPU.
- */
-int brw_bo_busy(struct brw_bo *bo);
-
-/**
- * Specify the volatility of the buffer.
- * \param bo Buffer to create a name for
- * \param madv The purgeable status
- *
- * Use I915_MADV_DONTNEED to mark the buffer as purgeable, and it will be
- * reclaimed under memory pressure. If you subsequently require the buffer,
- * then you must pass I915_MADV_WILLNEED to mark the buffer as required.
- *
- * Returns 1 if the buffer was retained, or 0 if it was discarded whilst
- * marked as I915_MADV_DONTNEED.
- */
-int brw_bo_madvise(struct brw_bo *bo, int madv);
-
-struct brw_bufmgr *brw_bufmgr_get_for_fd(struct intel_device_info *devinfo,
- int fd, bool bo_reuse);
-
-struct brw_bo *brw_bo_gem_create_from_name(struct brw_bufmgr *bufmgr,
- const char *name,
- unsigned int handle);
-
-int brw_bo_wait(struct brw_bo *bo, int64_t timeout_ns);
-
-uint32_t brw_create_hw_context(struct brw_bufmgr *bufmgr);
-
-int brw_hw_context_set_priority(struct brw_bufmgr *bufmgr,
- uint32_t ctx_id,
- int priority);
-
-void brw_destroy_hw_context(struct brw_bufmgr *bufmgr, uint32_t ctx_id);
-
-int brw_bufmgr_get_fd(struct brw_bufmgr *bufmgr);
-
-int brw_bo_gem_export_to_prime(struct brw_bo *bo, int *prime_fd);
-struct brw_bo *brw_bo_gem_create_from_prime(struct brw_bufmgr *bufmgr,
- int prime_fd);
-struct brw_bo *brw_bo_gem_create_from_prime_tiled(struct brw_bufmgr *bufmgr,
- int prime_fd,
- uint32_t tiling_mode,
- uint32_t stride);
-
-uint32_t brw_bo_export_gem_handle(struct brw_bo *bo);
-
-/**
- * Exports a bo as a GEM handle into a given DRM file descriptor
- * \param bo Buffer to export
- * \param drm_fd File descriptor where the new handle is created
- * \param out_handle Pointer to store the new handle
- *
- * Returns 0 if the buffer was successfully exported, a non zero error code
- * otherwise.
- */
-int brw_bo_export_gem_handle_for_device(struct brw_bo *bo, int drm_fd,
- uint32_t *out_handle);
-
-int brw_reg_read(struct brw_bufmgr *bufmgr, uint32_t offset,
- uint64_t *result);
-
-bool brw_using_softpin(struct brw_bufmgr *bufmgr);
-
-/** @{ */
-
-#if defined(__cplusplus)
-}
-#endif
-#endif /* BRW_BUFMGR_H */
+++ /dev/null
-/*
- * Copyright 2003 VMware, Inc.
- * Copyright 2009, 2012 Intel Corporation.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "main/mtypes.h"
-#include "main/condrender.h"
-#include "swrast/swrast.h"
-#include "drivers/common/meta.h"
-
-#include "brw_batch.h"
-#include "brw_fbo.h"
-#include "brw_mipmap_tree.h"
-
-#include "brw_context.h"
-#include "brw_blorp.h"
-#include "brw_defines.h"
-
-#define FILE_DEBUG_FLAG DEBUG_BLIT
-
-static const char *buffer_names[] = {
- [BUFFER_FRONT_LEFT] = "front",
- [BUFFER_BACK_LEFT] = "back",
- [BUFFER_FRONT_RIGHT] = "front right",
- [BUFFER_BACK_RIGHT] = "back right",
- [BUFFER_DEPTH] = "depth",
- [BUFFER_STENCIL] = "stencil",
- [BUFFER_ACCUM] = "accum",
- [BUFFER_COLOR0] = "color0",
- [BUFFER_COLOR1] = "color1",
- [BUFFER_COLOR2] = "color2",
- [BUFFER_COLOR3] = "color3",
- [BUFFER_COLOR4] = "color4",
- [BUFFER_COLOR5] = "color5",
- [BUFFER_COLOR6] = "color6",
- [BUFFER_COLOR7] = "color7",
-};
-
-static void
-debug_mask(const char *name, GLbitfield mask)
-{
- GLuint i;
-
- if (INTEL_DEBUG(DEBUG_BLIT)) {
- DBG("%s clear:", name);
- for (i = 0; i < BUFFER_COUNT; i++) {
- if (mask & (1 << i))
- DBG(" %s", buffer_names[i]);
- }
- DBG("\n");
- }
-}
-
-/**
- * Returns true if the scissor is a noop (cuts out nothing).
- */
-static bool
-noop_scissor(struct gl_framebuffer *fb)
-{
- return fb->_Xmin <= 0 &&
- fb->_Ymin <= 0 &&
- fb->_Xmax >= fb->Width &&
- fb->_Ymax >= fb->Height;
-}
-
-/**
- * Implements fast depth clears on gfx6+.
- *
- * Fast clears basically work by setting a flag in each of the subspans
- * represented in the HiZ buffer that says "When you need the depth values for
- * this subspan, it's the hardware's current clear value." Then later rendering
- * can just use the static clear value instead of referencing memory.
- *
- * The tricky part of the implementation is that you have to have the clear
- * value that was used on the depth buffer in place for all further rendering,
- * at least until a resolve to the real depth buffer happens.
- */
-static bool
-brw_fast_clear_depth(struct gl_context *ctx)
-{
- struct brw_context *brw = brw_context(ctx);
- struct gl_framebuffer *fb = ctx->DrawBuffer;
- struct brw_renderbuffer *depth_irb =
- brw_get_renderbuffer(fb, BUFFER_DEPTH);
- struct brw_mipmap_tree *mt = depth_irb->mt;
- struct gl_renderbuffer_attachment *depth_att = &fb->Attachment[BUFFER_DEPTH];
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- if (INTEL_DEBUG(DEBUG_NO_FAST_CLEAR))
- return false;
-
- if (devinfo->ver < 6)
- return false;
-
- if (!brw_renderbuffer_has_hiz(depth_irb))
- return false;
-
- /* We only handle full buffer clears -- otherwise you'd have to track whether
- * a previous clear had happened at a different clear value and resolve it
- * first.
- */
- if ((ctx->Scissor.EnableFlags & 1) && !noop_scissor(fb)) {
- perf_debug("Failed to fast clear %dx%d depth because of scissors. "
- "Possible 5%% performance win if avoided.\n",
- mt->surf.logical_level0_px.width,
- mt->surf.logical_level0_px.height);
- return false;
- }
-
- switch (mt->format) {
- case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
- case MESA_FORMAT_Z24_UNORM_S8_UINT:
- /* From the Sandy Bridge PRM, volume 2 part 1, page 314:
- *
- * "[DevSNB+]: Several cases exist where Depth Buffer Clear cannot be
- * enabled (the legacy method of clearing must be performed):
- *
- * - If the depth buffer format is D32_FLOAT_S8X24_UINT or
- * D24_UNORM_S8_UINT.
- */
- return false;
-
- case MESA_FORMAT_Z_UNORM16:
- /* From the Sandy Bridge PRM, volume 2 part 1, page 314:
- *
- * "[DevSNB+]: Several cases exist where Depth Buffer Clear cannot be
- * enabled (the legacy method of clearing must be performed):
- *
- * - DevSNB{W/A}]: When depth buffer format is D16_UNORM and the
- * width of the map (LOD0) is not multiple of 16, fast clear
- * optimization must be disabled.
- */
- if (devinfo->ver == 6 &&
- (minify(mt->surf.phys_level0_sa.width,
- depth_irb->mt_level - mt->first_level) % 16) != 0)
- return false;
- break;
-
- default:
- break;
- }
-
- /* Quantize the clear value to what can be stored in the actual depth
- * buffer. This makes the following check more accurate because it now
- * checks if the actual depth bits will match. It also prevents us from
- * getting a too-accurate depth value during depth testing or when sampling
- * with HiZ enabled.
- */
- float clear_value =
- mt->format == MESA_FORMAT_Z_FLOAT32 ? ctx->Depth.Clear :
- _mesa_lroundeven(ctx->Depth.Clear * fb->_DepthMax) / (float)(fb->_DepthMax);
-
- const uint32_t num_layers = depth_att->Layered ? depth_irb->layer_count : 1;
-
- /* If we're clearing to a new clear value, then we need to resolve any clear
- * flags out of the HiZ buffer into the real depth buffer.
- */
- if (mt->fast_clear_color.f32[0] != clear_value) {
- for (uint32_t level = mt->first_level; level <= mt->last_level; level++) {
- if (!brw_miptree_level_has_hiz(mt, level))
- continue;
-
- const unsigned level_layers = brw_get_num_logical_layers(mt, level);
-
- for (uint32_t layer = 0; layer < level_layers; layer++) {
- if (level == depth_irb->mt_level &&
- layer >= depth_irb->mt_layer &&
- layer < depth_irb->mt_layer + num_layers) {
- /* We're going to clear this layer anyway. Leave it alone. */
- continue;
- }
-
- enum isl_aux_state aux_state =
- brw_miptree_get_aux_state(mt, level, layer);
-
- if (aux_state != ISL_AUX_STATE_CLEAR &&
- aux_state != ISL_AUX_STATE_COMPRESSED_CLEAR) {
- /* This slice doesn't have any fast-cleared bits. */
- continue;
- }
-
- /* If we got here, then the level may have fast-clear bits that
- * use the old clear value. We need to do a depth resolve to get
- * rid of their use of the clear value before we can change it.
- * Fortunately, few applications ever change their depth clear
- * value so this shouldn't happen often.
- */
- brw_hiz_exec(brw, mt, level, layer, 1, ISL_AUX_OP_FULL_RESOLVE);
- brw_miptree_set_aux_state(brw, mt, level, layer, 1,
- ISL_AUX_STATE_RESOLVED);
- }
- }
-
- const union isl_color_value clear_color = { .f32 = {clear_value, } };
- brw_miptree_set_clear_color(brw, mt, clear_color);
- }
-
- for (unsigned a = 0; a < num_layers; a++) {
- enum isl_aux_state aux_state =
- brw_miptree_get_aux_state(mt, depth_irb->mt_level,
- depth_irb->mt_layer + a);
-
- if (aux_state != ISL_AUX_STATE_CLEAR) {
- brw_hiz_exec(brw, mt, depth_irb->mt_level,
- depth_irb->mt_layer + a, 1,
- ISL_AUX_OP_FAST_CLEAR);
- }
- }
-
- brw_miptree_set_aux_state(brw, mt, depth_irb->mt_level,
- depth_irb->mt_layer, num_layers,
- ISL_AUX_STATE_CLEAR);
- return true;
-}
-
-/**
- * Called by ctx->Driver.Clear.
- */
-static void
-brw_clear(struct gl_context *ctx, GLbitfield mask)
-{
- struct brw_context *brw = brw_context(ctx);
- struct gl_framebuffer *fb = ctx->DrawBuffer;
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- bool partial_clear = ctx->Scissor.EnableFlags && !noop_scissor(fb);
-
- if (!_mesa_check_conditional_render(ctx))
- return;
-
- if (mask & (BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_FRONT_RIGHT)) {
- brw->front_buffer_dirty = true;
- }
-
- brw_prepare_render(brw);
- brw_workaround_depthstencil_alignment(brw, partial_clear ? 0 : mask);
-
- if (mask & BUFFER_BIT_DEPTH) {
- if (brw_fast_clear_depth(ctx)) {
- DBG("fast clear: depth\n");
- mask &= ~BUFFER_BIT_DEPTH;
- }
- }
-
- if (mask & BUFFER_BITS_COLOR) {
- brw_blorp_clear_color(brw, fb, mask, partial_clear,
- ctx->Color.sRGBEnabled);
- debug_mask("blorp color", mask & BUFFER_BITS_COLOR);
- mask &= ~BUFFER_BITS_COLOR;
- }
-
- if (devinfo->ver >= 6 && (mask & BUFFER_BITS_DEPTH_STENCIL)) {
- brw_blorp_clear_depth_stencil(brw, fb, mask, partial_clear);
- debug_mask("blorp depth/stencil", mask & BUFFER_BITS_DEPTH_STENCIL);
- mask &= ~BUFFER_BITS_DEPTH_STENCIL;
- }
-
- GLbitfield tri_mask = mask & (BUFFER_BIT_STENCIL |
- BUFFER_BIT_DEPTH);
-
- if (tri_mask) {
- debug_mask("tri", tri_mask);
- mask &= ~tri_mask;
- _mesa_meta_glsl_Clear(&brw->ctx, tri_mask);
- }
-
- /* Any strange buffers get passed off to swrast. The only thing that
- * should be left at this point is the accumulation buffer.
- */
- assert((mask & ~BUFFER_BIT_ACCUM) == 0);
- if (mask) {
- debug_mask("swrast", mask);
- _swrast_Clear(ctx, mask);
- }
-}
-
-
-void
-brw_init_clear_functions(struct dd_function_table *functions)
-{
- functions->Clear = brw_clear;
-}
+++ /dev/null
-/*
- Copyright (C) Intel Corp. 2006. All Rights Reserved.
- Intel funded Tungsten Graphics to
- develop this 3D driver.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
- **********************************************************************/
- /*
- * Authors:
- * Keith Whitwell <keithw@vmware.com>
- */
-
-#include "main/macros.h"
-#include "main/enums.h"
-
-#include "brw_batch.h"
-
-#include "brw_defines.h"
-#include "brw_context.h"
-#include "brw_util.h"
-#include "brw_state.h"
-#include "compiler/brw_eu.h"
-
-#include "util/ralloc.h"
-
-static void
-compile_clip_prog(struct brw_context *brw, struct brw_clip_prog_key *key)
-{
- const unsigned *program;
- void *mem_ctx;
- unsigned program_size;
-
- mem_ctx = ralloc_context(NULL);
-
- struct brw_clip_prog_data prog_data;
- program = brw_compile_clip(brw->screen->compiler, mem_ctx, key, &prog_data,
- &brw->vue_map_geom_out, &program_size);
-
- brw_upload_cache(&brw->cache,
- BRW_CACHE_CLIP_PROG,
- key, sizeof(*key),
- program, program_size,
- &prog_data, sizeof(prog_data),
- &brw->clip.prog_offset, &brw->clip.prog_data);
- ralloc_free(mem_ctx);
-}
-
-/* Calculate interpolants for triangle and line rasterization.
- */
-void
-brw_upload_clip_prog(struct brw_context *brw)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- struct gl_context *ctx = &brw->ctx;
- struct brw_clip_prog_key key;
-
- if (!brw_state_dirty(brw,
- _NEW_BUFFERS |
- _NEW_LIGHT |
- _NEW_POLYGON |
- _NEW_TRANSFORM,
- BRW_NEW_BLORP |
- BRW_NEW_FS_PROG_DATA |
- BRW_NEW_REDUCED_PRIMITIVE |
- BRW_NEW_VUE_MAP_GEOM_OUT))
- return;
-
- memset(&key, 0, sizeof(key));
-
- /* Populate the key:
- */
-
- /* BRW_NEW_FS_PROG_DATA */
- const struct brw_wm_prog_data *wm_prog_data =
- brw_wm_prog_data(brw->wm.base.prog_data);
- if (wm_prog_data) {
- key.contains_flat_varying = wm_prog_data->contains_flat_varying;
- key.contains_noperspective_varying =
- wm_prog_data->contains_noperspective_varying;
-
- STATIC_ASSERT(sizeof(key.interp_mode) ==
- sizeof(wm_prog_data->interp_mode));
- memcpy(key.interp_mode, wm_prog_data->interp_mode,
- sizeof(key.interp_mode));
- }
-
- /* BRW_NEW_REDUCED_PRIMITIVE */
- key.primitive = brw->reduced_primitive;
- /* BRW_NEW_VUE_MAP_GEOM_OUT */
- key.attrs = brw->vue_map_geom_out.slots_valid;
-
- /* _NEW_LIGHT */
- key.pv_first = (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION);
- /* _NEW_TRANSFORM (also part of VUE map)*/
- if (ctx->Transform.ClipPlanesEnabled)
- key.nr_userclip = util_logbase2(ctx->Transform.ClipPlanesEnabled) + 1;
-
- if (devinfo->ver == 5)
- key.clip_mode = BRW_CLIP_MODE_KERNEL_CLIP;
- else
- key.clip_mode = BRW_CLIP_MODE_NORMAL;
-
- /* _NEW_POLYGON */
- if (key.primitive == GL_TRIANGLES) {
- if (ctx->Polygon.CullFlag &&
- ctx->Polygon.CullFaceMode == GL_FRONT_AND_BACK)
- key.clip_mode = BRW_CLIP_MODE_REJECT_ALL;
- else {
- GLuint fill_front = BRW_CLIP_FILL_MODE_CULL;
- GLuint fill_back = BRW_CLIP_FILL_MODE_CULL;
- GLuint offset_front = 0;
- GLuint offset_back = 0;
-
- if (!ctx->Polygon.CullFlag ||
- ctx->Polygon.CullFaceMode != GL_FRONT) {
- switch (ctx->Polygon.FrontMode) {
- case GL_FILL:
- fill_front = BRW_CLIP_FILL_MODE_FILL;
- offset_front = 0;
- break;
- case GL_LINE:
- fill_front = BRW_CLIP_FILL_MODE_LINE;
- offset_front = ctx->Polygon.OffsetLine;
- break;
- case GL_POINT:
- fill_front = BRW_CLIP_FILL_MODE_POINT;
- offset_front = ctx->Polygon.OffsetPoint;
- break;
- }
- }
-
- if (!ctx->Polygon.CullFlag ||
- ctx->Polygon.CullFaceMode != GL_BACK) {
- switch (ctx->Polygon.BackMode) {
- case GL_FILL:
- fill_back = BRW_CLIP_FILL_MODE_FILL;
- offset_back = 0;
- break;
- case GL_LINE:
- fill_back = BRW_CLIP_FILL_MODE_LINE;
- offset_back = ctx->Polygon.OffsetLine;
- break;
- case GL_POINT:
- fill_back = BRW_CLIP_FILL_MODE_POINT;
- offset_back = ctx->Polygon.OffsetPoint;
- break;
- }
- }
-
- if (ctx->Polygon.BackMode != GL_FILL ||
- ctx->Polygon.FrontMode != GL_FILL) {
- key.do_unfilled = 1;
-
- /* Most cases the fixed function units will handle. Cases where
- * one or more polygon faces are unfilled will require help:
- */
- key.clip_mode = BRW_CLIP_MODE_CLIP_NON_REJECTED;
-
- if (offset_back || offset_front) {
- /* _NEW_POLYGON, _NEW_BUFFERS */
- key.offset_units = ctx->Polygon.OffsetUnits * ctx->DrawBuffer->_MRD * 2;
- key.offset_factor = ctx->Polygon.OffsetFactor * ctx->DrawBuffer->_MRD;
- key.offset_clamp = ctx->Polygon.OffsetClamp * ctx->DrawBuffer->_MRD;
- }
-
- if (!brw->polygon_front_bit) {
- key.fill_ccw = fill_front;
- key.fill_cw = fill_back;
- key.offset_ccw = offset_front;
- key.offset_cw = offset_back;
- if (ctx->Light.Model.TwoSide &&
- key.fill_cw != BRW_CLIP_FILL_MODE_CULL)
- key.copy_bfc_cw = 1;
- } else {
- key.fill_cw = fill_front;
- key.fill_ccw = fill_back;
- key.offset_cw = offset_front;
- key.offset_ccw = offset_back;
- if (ctx->Light.Model.TwoSide &&
- key.fill_ccw != BRW_CLIP_FILL_MODE_CULL)
- key.copy_bfc_ccw = 1;
- }
- }
- }
- }
-
- if (!brw_search_cache(&brw->cache, BRW_CACHE_CLIP_PROG, &key, sizeof(key),
- &brw->clip.prog_offset, &brw->clip.prog_data, true)) {
- compile_clip_prog( brw, &key );
- }
-}
+++ /dev/null
-/*
- * Copyright © 2014 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-#include <sys/errno.h>
-
-#include "main/condrender.h"
-#include "main/mtypes.h"
-#include "main/state.h"
-#include "brw_context.h"
-#include "brw_draw.h"
-#include "brw_state.h"
-#include "brw_batch.h"
-#include "brw_buffer_objects.h"
-#include "brw_defines.h"
-
-
-static void
-brw_dispatch_compute_common(struct gl_context *ctx)
-{
- struct brw_context *brw = brw_context(ctx);
- bool fail_next;
-
- if (!_mesa_check_conditional_render(ctx))
- return;
-
- if (ctx->NewState)
- _mesa_update_state(ctx);
-
- brw_validate_textures(brw);
-
- brw_predraw_resolve_inputs(brw, false, NULL);
-
- /* Flush the batch if the batch/state buffers are nearly full. We can
- * grow them if needed, but this is not free, so we'd like to avoid it.
- */
- brw_batch_require_space(brw, 600);
- brw_require_statebuffer_space(brw, 2500);
- brw_batch_save_state(brw);
- fail_next = brw_batch_saved_state_is_empty(brw);
-
- retry:
- brw->batch.no_wrap = true;
- brw_upload_compute_state(brw);
-
- brw->vtbl.emit_compute_walker(brw);
-
- brw->batch.no_wrap = false;
-
- if (!brw_batch_has_aperture_space(brw, 0)) {
- if (!fail_next) {
- brw_batch_reset_to_saved(brw);
- brw_batch_flush(brw);
- fail_next = true;
- goto retry;
- } else {
- int ret = brw_batch_flush(brw);
- WARN_ONCE(ret == -ENOSPC,
- "i965: Single compute shader dispatch "
- "exceeded available aperture space\n");
- }
- }
-
- /* Now that we know we haven't run out of aperture space, we can safely
- * reset the dirty bits.
- */
- brw_compute_state_finished(brw);
-
- if (brw->always_flush_batch)
- brw_batch_flush(brw);
-
- brw_program_cache_check_size(brw);
-
- /* Note: since compute shaders can't write to framebuffers, there's no need
- * to call brw_postdraw_set_buffers_need_resolve().
- */
-}
-
-static void
-brw_dispatch_compute(struct gl_context *ctx, const GLuint *num_groups) {
- struct brw_context *brw = brw_context(ctx);
-
- brw->compute.num_work_groups_bo = NULL;
- brw->compute.num_work_groups = num_groups;
- brw->compute.group_size = NULL;
- ctx->NewDriverState |= BRW_NEW_CS_WORK_GROUPS;
-
- brw_dispatch_compute_common(ctx);
-}
-
-static void
-brw_dispatch_compute_indirect(struct gl_context *ctx, GLintptr indirect)
-{
- struct brw_context *brw = brw_context(ctx);
- static const GLuint indirect_group_counts[3] = { 0, 0, 0 };
- struct gl_buffer_object *indirect_buffer = ctx->DispatchIndirectBuffer;
- struct brw_bo *bo =
- brw_bufferobj_buffer(brw,
- brw_buffer_object(indirect_buffer),
- indirect, 3 * sizeof(GLuint), false);
-
- brw->compute.num_work_groups_bo = bo;
- brw->compute.num_work_groups_offset = indirect;
- brw->compute.num_work_groups = indirect_group_counts;
- brw->compute.group_size = NULL;
- ctx->NewDriverState |= BRW_NEW_CS_WORK_GROUPS;
-
- brw_dispatch_compute_common(ctx);
-}
-
-static void
-brw_dispatch_compute_group_size(struct gl_context *ctx,
- const GLuint *num_groups,
- const GLuint *group_size)
-{
- struct brw_context *brw = brw_context(ctx);
-
- brw->compute.num_work_groups_bo = NULL;
- brw->compute.num_work_groups = num_groups;
- brw->compute.group_size = group_size;
- ctx->NewDriverState |= BRW_NEW_CS_WORK_GROUPS;
-
- brw_dispatch_compute_common(ctx);
-}
-
-void
-brw_init_compute_functions(struct dd_function_table *functions)
-{
- functions->DispatchCompute = brw_dispatch_compute;
- functions->DispatchComputeIndirect = brw_dispatch_compute_indirect;
- functions->DispatchComputeGroupSize = brw_dispatch_compute_group_size;
-}
+++ /dev/null
-/*
- * Copyright © 2014 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- * Neil Roberts <neil@linux.intel.com>
- */
-
-/** @file brw_conditional_render.c
- *
- * Support for conditional rendering based on query objects
- * (GL_NV_conditional_render, GL_ARB_conditional_render_inverted) on Gfx7+.
- */
-
-#include "main/condrender.h"
-
-#include "brw_context.h"
-#include "brw_defines.h"
-#include "brw_batch.h"
-
-static void
-set_predicate_enable(struct brw_context *brw,
- bool value)
-{
- if (value)
- brw->predicate.state = BRW_PREDICATE_STATE_RENDER;
- else
- brw->predicate.state = BRW_PREDICATE_STATE_DONT_RENDER;
-}
-
-static void
-set_predicate_for_overflow_query(struct brw_context *brw,
- struct brw_query_object *query,
- int stream_start, int count)
-{
- if (!can_do_mi_math_and_lrr(brw->screen)) {
- brw->predicate.state = BRW_PREDICATE_STATE_STALL_FOR_QUERY;
- return;
- }
-
- brw->predicate.state = BRW_PREDICATE_STATE_USE_BIT;
-
- /* Needed to ensure the memory is coherent for the MI_LOAD_REGISTER_MEM
- * command when loading the values into the predicate source registers for
- * conditional rendering.
- */
- brw_emit_pipe_control_flush(brw, PIPE_CONTROL_FLUSH_ENABLE);
-
- hsw_overflow_result_to_gpr0(brw, query, count);
- brw_load_register_reg64(brw, MI_PREDICATE_SRC0, HSW_CS_GPR(0));
- brw_load_register_imm64(brw, MI_PREDICATE_SRC1, 0ull);
-}
-
-static void
-set_predicate_for_occlusion_query(struct brw_context *brw,
- struct brw_query_object *query)
-{
- if (!brw->predicate.supported) {
- brw->predicate.state = BRW_PREDICATE_STATE_STALL_FOR_QUERY;
- return;
- }
-
- brw->predicate.state = BRW_PREDICATE_STATE_USE_BIT;
-
- /* Needed to ensure the memory is coherent for the MI_LOAD_REGISTER_MEM
- * command when loading the values into the predicate source registers for
- * conditional rendering.
- */
- brw_emit_pipe_control_flush(brw, PIPE_CONTROL_FLUSH_ENABLE);
-
- brw_load_register_mem64(brw, MI_PREDICATE_SRC0, query->bo, 0 /* offset */);
- brw_load_register_mem64(brw, MI_PREDICATE_SRC1, query->bo, 8 /* offset */);
-}
-
-static void
-set_predicate_for_result(struct brw_context *brw,
- struct brw_query_object *query,
- bool inverted)
-{
- int load_op;
-
- assert(query->bo != NULL);
-
- switch (query->Base.Target) {
- case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB:
- set_predicate_for_overflow_query(brw, query, 0, 1);
- break;
- case GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB:
- set_predicate_for_overflow_query(brw, query, 0, MAX_VERTEX_STREAMS);
- break;
- default:
- set_predicate_for_occlusion_query(brw, query);
- }
-
- if (brw->predicate.state == BRW_PREDICATE_STATE_USE_BIT) {
- if (inverted)
- load_op = MI_PREDICATE_LOADOP_LOAD;
- else
- load_op = MI_PREDICATE_LOADOP_LOADINV;
-
- BEGIN_BATCH(1);
- OUT_BATCH(GFX7_MI_PREDICATE |
- load_op |
- MI_PREDICATE_COMBINEOP_SET |
- MI_PREDICATE_COMPAREOP_SRCS_EQUAL);
- ADVANCE_BATCH();
- }
-}
-
-static void
-brw_begin_conditional_render(struct gl_context *ctx,
- struct gl_query_object *q,
- GLenum mode)
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_query_object *query = (struct brw_query_object *) q;
- bool inverted;
-
- switch (mode) {
- case GL_QUERY_WAIT:
- case GL_QUERY_NO_WAIT:
- case GL_QUERY_BY_REGION_WAIT:
- case GL_QUERY_BY_REGION_NO_WAIT:
- inverted = false;
- break;
- case GL_QUERY_WAIT_INVERTED:
- case GL_QUERY_NO_WAIT_INVERTED:
- case GL_QUERY_BY_REGION_WAIT_INVERTED:
- case GL_QUERY_BY_REGION_NO_WAIT_INVERTED:
- inverted = true;
- break;
- default:
- unreachable("Unexpected conditional render mode");
- }
-
- /* If there are already samples from a BLT operation or if the query object
- * is ready then we can avoid looking at the values in the buffer and just
- * decide whether to draw using the CPU without stalling.
- */
- if (query->Base.Result || query->Base.Ready)
- set_predicate_enable(brw, (query->Base.Result != 0) ^ inverted);
- else
- set_predicate_for_result(brw, query, inverted);
-}
-
-static void
-brw_end_conditional_render(struct gl_context *ctx,
- struct gl_query_object *q)
-{
- struct brw_context *brw = brw_context(ctx);
-
- /* When there is no longer a conditional render in progress it should
- * always render.
- */
- brw->predicate.state = BRW_PREDICATE_STATE_RENDER;
-}
-
-void
-brw_init_conditional_render_functions(struct dd_function_table *functions)
-{
- functions->BeginConditionalRender = brw_begin_conditional_render;
- functions->EndConditionalRender = brw_end_conditional_render;
-}
-
-bool
-brw_check_conditional_render(struct brw_context *brw)
-{
- if (brw->predicate.state == BRW_PREDICATE_STATE_STALL_FOR_QUERY) {
- perf_debug("Conditional rendering is implemented in software and may "
- "stall.\n");
- return _mesa_check_conditional_render(&brw->ctx);
- }
-
- return brw->predicate.state != BRW_PREDICATE_STATE_DONT_RENDER;
-}
+++ /dev/null
-/*
- Copyright 2003 VMware, Inc.
- Copyright (C) Intel Corp. 2006. All Rights Reserved.
- Intel funded Tungsten Graphics to
- develop this 3D driver.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
- **********************************************************************/
- /*
- * Authors:
- * Keith Whitwell <keithw@vmware.com>
- */
-
-
-#include "compiler/nir/nir.h"
-#include "main/api_exec.h"
-#include "main/context.h"
-#include "main/fbobject.h"
-#include "main/extensions.h"
-#include "main/glthread.h"
-#include "main/macros.h"
-#include "main/points.h"
-#include "main/version.h"
-#include "main/vtxfmt.h"
-#include "main/texobj.h"
-#include "main/framebuffer.h"
-#include "main/stencil.h"
-#include "main/state.h"
-#include "main/spirv_extensions.h"
-#include "main/externalobjects.h"
-
-#include "vbo/vbo.h"
-
-#include "drivers/common/driverfuncs.h"
-#include "drivers/common/meta.h"
-#include "utils.h"
-
-#include "brw_context.h"
-#include "brw_defines.h"
-#include "brw_blorp.h"
-#include "brw_draw.h"
-#include "brw_state.h"
-
-#include "brw_batch.h"
-#include "brw_buffer_objects.h"
-#include "brw_buffers.h"
-#include "brw_fbo.h"
-#include "brw_mipmap_tree.h"
-#include "brw_pixel.h"
-#include "brw_image.h"
-#include "brw_tex.h"
-#include "brw_tex_obj.h"
-
-#include "swrast_setup/swrast_setup.h"
-#include "tnl/tnl.h"
-#include "tnl/t_pipeline.h"
-#include "util/ralloc.h"
-#include "util/debug.h"
-#include "util/disk_cache.h"
-#include "util/u_memory.h"
-#include "isl/isl.h"
-
-#include "common/intel_defines.h"
-#include "common/intel_uuid.h"
-
-#include "compiler/spirv/nir_spirv.h"
-/***************************************
- * Mesa's Driver Functions
- ***************************************/
-
-const char *const brw_vendor_string = "Intel Open Source Technology Center";
-
-const char *
-brw_get_renderer_string(const struct brw_screen *screen)
-{
- static char buf[128];
- const char *name = screen->devinfo.name;
-
- if (!name)
- name = "Intel Unknown";
-
- snprintf(buf, sizeof(buf), "Mesa DRI %s", name);
-
- return buf;
-}
-
-static const GLubyte *
-brw_get_string(struct gl_context * ctx, GLenum name)
-{
- const struct brw_context *const brw = brw_context(ctx);
-
- switch (name) {
- case GL_VENDOR:
- return (GLubyte *) brw_vendor_string;
-
- case GL_RENDERER:
- return
- (GLubyte *) brw_get_renderer_string(brw->screen);
-
- default:
- return NULL;
- }
-}
-
-static void
-brw_set_background_context(struct gl_context *ctx,
- UNUSED struct util_queue_monitoring *queue_info)
-{
- struct brw_context *brw = brw_context(ctx);
- __DRIcontext *driContext = brw->driContext;
- __DRIscreen *driScreen = driContext->driScreenPriv;
- const __DRIbackgroundCallableExtension *backgroundCallable =
- driScreen->dri2.backgroundCallable;
-
- /* Note: Mesa will only call this function if we've called
- * _mesa_enable_multithreading(). We only do that if the loader exposed
- * the __DRI_BACKGROUND_CALLABLE extension. So we know that
- * backgroundCallable is not NULL.
- */
- backgroundCallable->setBackgroundContext(driContext->loaderPrivate);
-}
-
-static struct gl_memory_object *
-brw_new_memoryobj(struct gl_context *ctx, GLuint name)
-{
- struct brw_memory_object *memory_object = CALLOC_STRUCT(brw_memory_object);
- if (!memory_object)
- return NULL;
-
- _mesa_initialize_memory_object(ctx, &memory_object->Base, name);
- return &memory_object->Base;
-}
-
-static void
-brw_delete_memoryobj(struct gl_context *ctx, struct gl_memory_object *memObj)
-{
- struct brw_memory_object *memory_object = brw_memory_object(memObj);
- brw_bo_unreference(memory_object->bo);
- _mesa_delete_memory_object(ctx, memObj);
-}
-
-static void
-brw_import_memoryobj_fd(struct gl_context *ctx,
- struct gl_memory_object *obj,
- GLuint64 size,
- int fd)
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_memory_object *memory_object = brw_memory_object(obj);
-
- memory_object->bo = brw_bo_gem_create_from_prime(brw->bufmgr, fd);
- brw_bo_reference(memory_object->bo);
- assert(memory_object->bo->size >= size);
- close(fd);
-}
-
-static void
-brw_viewport(struct gl_context *ctx)
-{
- struct brw_context *brw = brw_context(ctx);
- __DRIcontext *driContext = brw->driContext;
-
- if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
- if (driContext->driDrawablePriv)
- dri2InvalidateDrawable(driContext->driDrawablePriv);
- if (driContext->driReadablePriv)
- dri2InvalidateDrawable(driContext->driReadablePriv);
- }
-}
-
-static void
-brw_update_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb)
-{
- struct brw_context *brw = brw_context(ctx);
-
- /* Quantize the derived default number of samples
- */
- fb->DefaultGeometry._NumSamples =
- brw_quantize_num_samples(brw->screen, fb->DefaultGeometry.NumSamples);
-}
-
-static void
-brw_update_state(struct gl_context * ctx)
-{
- GLuint new_state = ctx->NewState;
- struct brw_context *brw = brw_context(ctx);
-
- if (ctx->swrast_context)
- _swrast_InvalidateState(ctx, new_state);
-
- brw->NewGLState |= new_state;
-
- if (new_state & (_NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT))
- _mesa_update_draw_buffer_bounds(ctx, ctx->DrawBuffer);
-
- if (new_state & (_NEW_STENCIL | _NEW_BUFFERS)) {
- brw->stencil_enabled = _mesa_stencil_is_enabled(ctx);
- brw->stencil_two_sided = _mesa_stencil_is_two_sided(ctx);
- brw->stencil_write_enabled =
- _mesa_stencil_is_write_enabled(ctx, brw->stencil_two_sided);
- }
-
- if (new_state & _NEW_POLYGON)
- brw->polygon_front_bit = _mesa_polygon_get_front_bit(ctx);
-
- if (new_state & _NEW_BUFFERS) {
- brw_update_framebuffer(ctx, ctx->DrawBuffer);
- if (ctx->DrawBuffer != ctx->ReadBuffer)
- brw_update_framebuffer(ctx, ctx->ReadBuffer);
- }
-}
-
-#define flushFront(screen) ((screen)->image.loader ? (screen)->image.loader->flushFrontBuffer : (screen)->dri2.loader->flushFrontBuffer)
-
-static void
-brw_flush_front(struct gl_context *ctx)
-{
- struct brw_context *brw = brw_context(ctx);
- __DRIcontext *driContext = brw->driContext;
- __DRIdrawable *driDrawable = driContext->driDrawablePriv;
- __DRIscreen *const dri_screen = brw->screen->driScrnPriv;
-
- if (brw->front_buffer_dirty && _mesa_is_winsys_fbo(ctx->DrawBuffer)) {
- if (flushFront(dri_screen) && driDrawable &&
- driDrawable->loaderPrivate) {
-
- /* Resolve before flushing FAKE_FRONT_LEFT to FRONT_LEFT.
- *
- * This potentially resolves both front and back buffer. It
- * is unnecessary to resolve the back, but harms nothing except
- * performance. And no one cares about front-buffer render
- * performance.
- */
- brw_resolve_for_dri2_flush(brw, driDrawable);
- brw_batch_flush(brw);
-
- flushFront(dri_screen)(driDrawable, driDrawable->loaderPrivate);
-
- /* We set the dirty bit in brw_prepare_render() if we're
- * front buffer rendering once we get there.
- */
- brw->front_buffer_dirty = false;
- }
- }
-}
-
-static void
-brw_display_shared_buffer(struct brw_context *brw)
-{
- __DRIcontext *dri_context = brw->driContext;
- __DRIdrawable *dri_drawable = dri_context->driDrawablePriv;
- __DRIscreen *dri_screen = brw->screen->driScrnPriv;
- int fence_fd = -1;
-
- if (!brw->is_shared_buffer_bound)
- return;
-
- if (!brw->is_shared_buffer_dirty)
- return;
-
- if (brw->screen->has_exec_fence) {
- /* This function is always called during a flush operation, so there is
- * no need to flush again here. But we want to provide a fence_fd to the
- * loader, and a redundant flush is the easiest way to acquire one.
- */
- if (brw_batch_flush_fence(brw, -1, &fence_fd))
- return;
- }
-
- dri_screen->mutableRenderBuffer.loader
- ->displaySharedBuffer(dri_drawable, fence_fd,
- dri_drawable->loaderPrivate);
- brw->is_shared_buffer_dirty = false;
-}
-
-static void
-brw_glFlush(struct gl_context *ctx, unsigned gallium_flush_flags)
-{
- struct brw_context *brw = brw_context(ctx);
-
- brw_batch_flush(brw);
- brw_flush_front(ctx);
- brw_display_shared_buffer(brw);
- brw->need_flush_throttle = true;
-}
-
-static void
-brw_glEnable(struct gl_context *ctx, GLenum cap, GLboolean state)
-{
- struct brw_context *brw = brw_context(ctx);
-
- switch (cap) {
- case GL_BLACKHOLE_RENDER_INTEL:
- brw->frontend_noop = state;
- brw_batch_flush(brw);
- brw_batch_maybe_noop(brw);
- /* Because we started previous batches with a potential
- * MI_BATCH_BUFFER_END if NOOP was enabled, that means that anything
- * that was ever emitted after that never made it to the HW. So when the
- * blackhole state changes from NOOP->!NOOP reupload the entire state.
- */
- if (!brw->frontend_noop) {
- brw->NewGLState = ~0u;
- brw->ctx.NewDriverState = ~0ull;
- }
- break;
- default:
- break;
- }
-}
-
-static void
-brw_finish(struct gl_context * ctx)
-{
- struct brw_context *brw = brw_context(ctx);
-
- brw_glFlush(ctx, 0);
-
- if (brw->batch.last_bo)
- brw_bo_wait_rendering(brw->batch.last_bo);
-}
-
-static void
-brw_get_device_uuid(struct gl_context *ctx, char *uuid)
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_screen *screen = brw->screen;
-
- assert(GL_UUID_SIZE_EXT >= PIPE_UUID_SIZE);
- memset(uuid, 0, GL_UUID_SIZE_EXT);
- intel_uuid_compute_device_id((uint8_t *)uuid, &screen->isl_dev, PIPE_UUID_SIZE);
-}
-
-
-static void
-brw_get_driver_uuid(struct gl_context *ctx, char *uuid)
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_screen *screen = brw->screen;
-
- assert(GL_UUID_SIZE_EXT >= PIPE_UUID_SIZE);
- memset(uuid, 0, GL_UUID_SIZE_EXT);
- intel_uuid_compute_driver_id((uint8_t *)uuid, &screen->devinfo, PIPE_UUID_SIZE);
-}
-
-static void
-brw_init_driver_functions(struct brw_context *brw,
- struct dd_function_table *functions)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- _mesa_init_driver_functions(functions);
-
- /* GLX uses DRI2 invalidate events to handle window resizing.
- * Unfortunately, EGL does not - libEGL is written in XCB (not Xlib),
- * which doesn't provide a mechanism for snooping the event queues.
- *
- * So EGL still relies on viewport hacks to handle window resizing.
- * This should go away with DRI3000.
- */
- if (!brw->driContext->driScreenPriv->dri2.useInvalidate)
- functions->Viewport = brw_viewport;
-
- functions->Enable = brw_glEnable;
- functions->Flush = brw_glFlush;
- functions->Finish = brw_finish;
- functions->GetString = brw_get_string;
- functions->UpdateState = brw_update_state;
-
- brw_init_draw_functions(functions);
- brw_init_texture_functions(functions);
- brw_init_texture_image_functions(functions);
- brw_init_texture_copy_image_functions(functions);
- brw_init_copy_image_functions(functions);
- brw_init_clear_functions(functions);
- brw_init_buffer_functions(functions);
- brw_init_pixel_functions(functions);
- brw_init_buffer_object_functions(functions);
- brw_init_syncobj_functions(functions);
- brw_init_object_purgeable_functions(functions);
-
- brw_init_frag_prog_functions(functions);
- brw_init_common_queryobj_functions(functions);
- if (devinfo->verx10 >= 75)
- hsw_init_queryobj_functions(functions);
- else if (devinfo->ver >= 6)
- gfx6_init_queryobj_functions(functions);
- else
- gfx4_init_queryobj_functions(functions);
- brw_init_compute_functions(functions);
- brw_init_conditional_render_functions(functions);
-
- functions->GenerateMipmap = brw_generate_mipmap;
-
- functions->QueryInternalFormat = brw_query_internal_format;
-
- functions->NewTransformFeedback = brw_new_transform_feedback;
- functions->DeleteTransformFeedback = brw_delete_transform_feedback;
- if (can_do_mi_math_and_lrr(brw->screen)) {
- functions->BeginTransformFeedback = hsw_begin_transform_feedback;
- functions->EndTransformFeedback = hsw_end_transform_feedback;
- functions->PauseTransformFeedback = hsw_pause_transform_feedback;
- functions->ResumeTransformFeedback = hsw_resume_transform_feedback;
- } else if (devinfo->ver >= 7) {
- functions->BeginTransformFeedback = gfx7_begin_transform_feedback;
- functions->EndTransformFeedback = gfx7_end_transform_feedback;
- functions->PauseTransformFeedback = gfx7_pause_transform_feedback;
- functions->ResumeTransformFeedback = gfx7_resume_transform_feedback;
- functions->GetTransformFeedbackVertexCount =
- brw_get_transform_feedback_vertex_count;
- } else {
- functions->BeginTransformFeedback = brw_begin_transform_feedback;
- functions->EndTransformFeedback = brw_end_transform_feedback;
- functions->PauseTransformFeedback = brw_pause_transform_feedback;
- functions->ResumeTransformFeedback = brw_resume_transform_feedback;
- functions->GetTransformFeedbackVertexCount =
- brw_get_transform_feedback_vertex_count;
- }
-
- if (devinfo->ver >= 6)
- functions->GetSamplePosition = gfx6_get_sample_position;
-
- /* GL_ARB_get_program_binary */
- brw_program_binary_init(brw->screen->deviceID);
- functions->GetProgramBinaryDriverSHA1 = brw_get_program_binary_driver_sha1;
- functions->ProgramBinarySerializeDriverBlob = brw_serialize_program_binary;
- functions->ProgramBinaryDeserializeDriverBlob =
- brw_deserialize_program_binary;
-
- if (brw->screen->disk_cache) {
- functions->ShaderCacheSerializeDriverBlob = brw_program_serialize_nir;
- }
-
- functions->SetBackgroundContext = brw_set_background_context;
-
- functions->NewMemoryObject = brw_new_memoryobj;
- functions->DeleteMemoryObject = brw_delete_memoryobj;
- functions->ImportMemoryObjectFd = brw_import_memoryobj_fd;
- functions->GetDeviceUuid = brw_get_device_uuid;
- functions->GetDriverUuid = brw_get_driver_uuid;
-}
-
-static void
-brw_initialize_spirv_supported_capabilities(struct brw_context *brw)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- struct gl_context *ctx = &brw->ctx;
-
- /* The following SPIR-V capabilities are only supported on gfx7+. In theory
- * you should enable the extension only on gfx7+, but just in case let's
- * assert it.
- */
- assert(devinfo->ver >= 7);
-
- ctx->Const.SpirVCapabilities.atomic_storage = devinfo->ver >= 7;
- ctx->Const.SpirVCapabilities.draw_parameters = true;
- ctx->Const.SpirVCapabilities.float64 = devinfo->ver >= 8;
- ctx->Const.SpirVCapabilities.geometry_streams = devinfo->ver >= 7;
- ctx->Const.SpirVCapabilities.image_write_without_format = true;
- ctx->Const.SpirVCapabilities.int64 = devinfo->ver >= 8;
- ctx->Const.SpirVCapabilities.tessellation = true;
- ctx->Const.SpirVCapabilities.transform_feedback = devinfo->ver >= 7;
- ctx->Const.SpirVCapabilities.variable_pointers = true;
- ctx->Const.SpirVCapabilities.integer_functions2 = devinfo->ver >= 8;
-}
-
-static void
-brw_initialize_context_constants(struct brw_context *brw)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- struct gl_context *ctx = &brw->ctx;
- const struct brw_compiler *compiler = brw->screen->compiler;
-
- const bool stage_exists[MESA_SHADER_STAGES] = {
- [MESA_SHADER_VERTEX] = true,
- [MESA_SHADER_TESS_CTRL] = devinfo->ver >= 7,
- [MESA_SHADER_TESS_EVAL] = devinfo->ver >= 7,
- [MESA_SHADER_GEOMETRY] = devinfo->ver >= 6,
- [MESA_SHADER_FRAGMENT] = true,
- [MESA_SHADER_COMPUTE] =
- (_mesa_is_desktop_gl(ctx) &&
- ctx->Const.MaxComputeWorkGroupSize[0] >= 1024) ||
- (ctx->API == API_OPENGLES2 &&
- ctx->Const.MaxComputeWorkGroupSize[0] >= 128),
- };
-
- unsigned num_stages = 0;
- for (int i = 0; i < MESA_SHADER_STAGES; i++) {
- if (stage_exists[i])
- num_stages++;
- }
-
- unsigned max_samplers =
- devinfo->verx10 >= 75 ? BRW_MAX_TEX_UNIT : 16;
-
- ctx->Const.MaxDualSourceDrawBuffers = 1;
- ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
- ctx->Const.MaxCombinedShaderOutputResources =
- MAX_IMAGE_UNITS + BRW_MAX_DRAW_BUFFERS;
-
- /* The timestamp register we can read for glGetTimestamp() is
- * sometimes only 32 bits, before scaling to nanoseconds (depending
- * on kernel).
- *
- * Once scaled to nanoseconds the timestamp would roll over at a
- * non-power-of-two, so an application couldn't use
- * GL_QUERY_COUNTER_BITS to handle rollover correctly. Instead, we
- * report 36 bits and truncate at that (rolling over 5 times as
- * often as the HW counter), and when the 32-bit counter rolls
- * over, it happens to also be at a rollover in the reported value
- * from near (1<<36) to 0.
- *
- * The low 32 bits rolls over in ~343 seconds. Our 36-bit result
- * rolls over every ~69 seconds.
- */
- ctx->Const.QueryCounterBits.Timestamp = 36;
-
- ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
- ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS;
- if (devinfo->ver >= 7) {
- ctx->Const.MaxRenderbufferSize = 16384;
- ctx->Const.MaxTextureSize = 16384;
- ctx->Const.MaxCubeTextureLevels = 15; /* 16384 */
- } else {
- ctx->Const.MaxRenderbufferSize = 8192;
- ctx->Const.MaxTextureSize = 8192;
- ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */
- }
- ctx->Const.Max3DTextureLevels = 12; /* 2048 */
- ctx->Const.MaxArrayTextureLayers = devinfo->ver >= 7 ? 2048 : 512;
- ctx->Const.MaxTextureMbytes = 1536;
- ctx->Const.MaxTextureRectSize = devinfo->ver >= 7 ? 16384 : 8192;
- ctx->Const.MaxTextureMaxAnisotropy = 16.0;
- ctx->Const.MaxTextureLodBias = 15.0;
- ctx->Const.StripTextureBorder = true;
- if (devinfo->ver >= 7) {
- ctx->Const.MaxProgramTextureGatherComponents = 4;
- ctx->Const.MinProgramTextureGatherOffset = -32;
- ctx->Const.MaxProgramTextureGatherOffset = 31;
- } else if (devinfo->ver == 6) {
- ctx->Const.MaxProgramTextureGatherComponents = 1;
- ctx->Const.MinProgramTextureGatherOffset = -8;
- ctx->Const.MaxProgramTextureGatherOffset = 7;
- }
-
- ctx->Const.MaxUniformBlockSize = 65536;
-
- for (int i = 0; i < MESA_SHADER_STAGES; i++) {
- struct gl_program_constants *prog = &ctx->Const.Program[i];
-
- if (!stage_exists[i])
- continue;
-
- prog->MaxTextureImageUnits = max_samplers;
-
- prog->MaxUniformBlocks = BRW_MAX_UBO;
- prog->MaxCombinedUniformComponents =
- prog->MaxUniformComponents +
- ctx->Const.MaxUniformBlockSize / 4 * prog->MaxUniformBlocks;
-
- prog->MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
- prog->MaxAtomicBuffers = BRW_MAX_ABO;
- prog->MaxImageUniforms = compiler->scalar_stage[i] ? BRW_MAX_IMAGES : 0;
- prog->MaxShaderStorageBlocks = BRW_MAX_SSBO;
- }
-
- ctx->Const.MaxTextureUnits =
- MIN2(ctx->Const.MaxTextureCoordUnits,
- ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits);
-
- ctx->Const.MaxUniformBufferBindings = num_stages * BRW_MAX_UBO;
- ctx->Const.MaxCombinedUniformBlocks = num_stages * BRW_MAX_UBO;
- ctx->Const.MaxCombinedAtomicBuffers = num_stages * BRW_MAX_ABO;
- ctx->Const.MaxCombinedShaderStorageBlocks = num_stages * BRW_MAX_SSBO;
- ctx->Const.MaxShaderStorageBufferBindings = num_stages * BRW_MAX_SSBO;
- ctx->Const.MaxCombinedTextureImageUnits = num_stages * max_samplers;
- ctx->Const.MaxCombinedImageUniforms = num_stages * BRW_MAX_IMAGES;
-
-
- /* Hardware only supports a limited number of transform feedback buffers.
- * So we need to override the Mesa default (which is based only on software
- * limits).
- */
- ctx->Const.MaxTransformFeedbackBuffers = BRW_MAX_SOL_BUFFERS;
-
- /* On Gfx6, in the worst case, we use up one binding table entry per
- * transform feedback component (see comments above the definition of
- * BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value
- * for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to
- * BRW_MAX_SOL_BINDINGS.
- *
- * In "separate components" mode, we need to divide this value by
- * BRW_MAX_SOL_BUFFERS, so that the total number of binding table entries
- * used up by all buffers will not exceed BRW_MAX_SOL_BINDINGS.
- */
- ctx->Const.MaxTransformFeedbackInterleavedComponents = BRW_MAX_SOL_BINDINGS;
- ctx->Const.MaxTransformFeedbackSeparateComponents =
- BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS;
-
- ctx->Const.AlwaysUseGetTransformFeedbackVertexCount =
- !can_do_mi_math_and_lrr(brw->screen);
-
- int max_samples;
- const int *msaa_modes = brw_supported_msaa_modes(brw->screen);
- const int clamp_max_samples =
- driQueryOptioni(&brw->screen->optionCache, "clamp_max_samples");
-
- if (clamp_max_samples < 0) {
- max_samples = msaa_modes[0];
- } else {
- /* Select the largest supported MSAA mode that does not exceed
- * clamp_max_samples.
- */
- max_samples = 0;
- for (int i = 0; msaa_modes[i] != 0; ++i) {
- if (msaa_modes[i] <= clamp_max_samples) {
- max_samples = msaa_modes[i];
- break;
- }
- }
- }
-
- ctx->Const.MaxSamples = max_samples;
- ctx->Const.MaxColorTextureSamples = max_samples;
- ctx->Const.MaxDepthTextureSamples = max_samples;
- ctx->Const.MaxIntegerSamples = max_samples;
- ctx->Const.MaxImageSamples = 0;
-
- ctx->Const.MinLineWidth = 1.0;
- ctx->Const.MinLineWidthAA = 1.0;
- if (devinfo->ver >= 6) {
- ctx->Const.MaxLineWidth = 7.375;
- ctx->Const.MaxLineWidthAA = 7.375;
- ctx->Const.LineWidthGranularity = 0.125;
- } else {
- ctx->Const.MaxLineWidth = 7.0;
- ctx->Const.MaxLineWidthAA = 7.0;
- ctx->Const.LineWidthGranularity = 0.5;
- }
-
- /* For non-antialiased lines, we have to round the line width to the
- * nearest whole number. Make sure that we don't advertise a line
- * width that, when rounded, will be beyond the actual hardware
- * maximum.
- */
- assert(roundf(ctx->Const.MaxLineWidth) <= ctx->Const.MaxLineWidth);
-
- ctx->Const.MinPointSize = 1.0;
- ctx->Const.MinPointSizeAA = 1.0;
- ctx->Const.MaxPointSize = 255.0;
- ctx->Const.MaxPointSizeAA = 255.0;
- ctx->Const.PointSizeGranularity = 1.0;
-
- if (devinfo->verx10 >= 45)
- ctx->Const.MaxClipPlanes = 8;
-
- ctx->Const.GLSLFragCoordIsSysVal = true;
- ctx->Const.GLSLFrontFacingIsSysVal = true;
- ctx->Const.GLSLTessLevelsAsInputs = true;
- ctx->Const.PrimitiveRestartForPatches = true;
-
- ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeInstructions = 16 * 1024;
- ctx->Const.Program[MESA_SHADER_VERTEX].MaxAluInstructions = 0;
- ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexInstructions = 0;
- ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexIndirections = 0;
- ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAluInstructions = 0;
- ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexInstructions = 0;
- ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexIndirections = 0;
- ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAttribs = 16;
- ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTemps = 256;
- ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAddressRegs = 1;
- ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters = 1024;
- ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams =
- MIN2(ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters,
- ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams);
-
- ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeInstructions = 1024;
- ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAluInstructions = 1024;
- ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexInstructions = 1024;
- ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexIndirections = 1024;
- ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAttribs = 12;
- ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTemps = 256;
- ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAddressRegs = 0;
- ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters = 1024;
- ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams =
- MIN2(ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters,
- ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams);
-
- /* Fragment shaders use real, 32-bit twos-complement integers for all
- * integer types.
- */
- ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMin = 31;
- ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMax = 30;
- ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.Precision = 0;
- ctx->Const.Program[MESA_SHADER_FRAGMENT].HighInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
- ctx->Const.Program[MESA_SHADER_FRAGMENT].MediumInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
-
- ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMin = 31;
- ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMax = 30;
- ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.Precision = 0;
- ctx->Const.Program[MESA_SHADER_VERTEX].HighInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
- ctx->Const.Program[MESA_SHADER_VERTEX].MediumInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
-
- /* Gfx6 converts quads to polygon in beginning of 3D pipeline,
- * but we're not sure how it's actually done for vertex order,
- * that affect provoking vertex decision. Always use last vertex
- * convention for quad primitive which works as expected for now.
- */
- if (devinfo->ver >= 6)
- ctx->Const.QuadsFollowProvokingVertexConvention = false;
-
- ctx->Const.NativeIntegers = true;
-
- /* Regarding the CMP instruction, the Ivybridge PRM says:
- *
- * "For each enabled channel 0b or 1b is assigned to the appropriate flag
- * bit and 0/all zeros or all ones (e.g, byte 0xFF, word 0xFFFF, DWord
- * 0xFFFFFFFF) is assigned to dst."
- *
- * but PRMs for earlier generations say
- *
- * "In dword format, one GRF may store up to 8 results. When the register
- * is used later as a vector of Booleans, as only LSB at each channel
- * contains meaning [sic] data, software should make sure all higher bits
- * are masked out (e.g. by 'and-ing' an [sic] 0x01 constant)."
- *
- * We select the representation of a true boolean uniform to be ~0, and fix
- * the results of Gen <= 5 CMP instruction's with -(result & 1).
- */
- ctx->Const.UniformBooleanTrue = ~0;
-
- /* From the gfx4 PRM, volume 4 page 127:
- *
- * "For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
- * the base address of the first element of the surface, computed in
- * software by adding the surface base address to the byte offset of
- * the element in the buffer."
- *
- * However, unaligned accesses are slower, so enforce buffer alignment.
- *
- * In order to push UBO data, 3DSTATE_CONSTANT_XS imposes an additional
- * restriction: the start of the buffer needs to be 32B aligned.
- */
- ctx->Const.UniformBufferOffsetAlignment = 32;
-
- /* ShaderStorageBufferOffsetAlignment should be a cacheline (64 bytes) so
- * that we can safely have the CPU and GPU writing the same SSBO on
- * non-cachecoherent systems (our Atom CPUs). With UBOs, the GPU never
- * writes, so there's no problem. For an SSBO, the GPU and the CPU can
- * be updating disjoint regions of the buffer simultaneously and that will
- * break if the regions overlap the same cacheline.
- */
- ctx->Const.ShaderStorageBufferOffsetAlignment = 64;
- ctx->Const.TextureBufferOffsetAlignment = 16;
- ctx->Const.MaxTextureBufferSize = 128 * 1024 * 1024;
-
- if (devinfo->ver >= 6) {
- ctx->Const.MaxVarying = 32;
- ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 128;
- ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents =
- compiler->scalar_stage[MESA_SHADER_GEOMETRY] ? 128 : 64;
- ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents = 128;
- ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 128;
- ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxInputComponents = 128;
- ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxOutputComponents = 128;
- ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxInputComponents = 128;
- ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxOutputComponents = 128;
- }
-
- /* We want the GLSL compiler to emit code that uses condition codes */
- for (int i = 0; i < MESA_SHADER_STAGES; i++) {
- ctx->Const.ShaderCompilerOptions[i] =
- brw->screen->compiler->glsl_compiler_options[i];
- }
-
- if (devinfo->ver >= 7) {
- ctx->Const.MaxViewportWidth = 32768;
- ctx->Const.MaxViewportHeight = 32768;
- }
-
- /* ARB_viewport_array, OES_viewport_array */
- if (devinfo->ver >= 6) {
- ctx->Const.MaxViewports = GFX6_NUM_VIEWPORTS;
- ctx->Const.ViewportSubpixelBits = 8;
-
- /* Cast to float before negating because MaxViewportWidth is unsigned.
- */
- ctx->Const.ViewportBounds.Min = -(float)ctx->Const.MaxViewportWidth;
- ctx->Const.ViewportBounds.Max = ctx->Const.MaxViewportWidth;
- }
-
- /* ARB_gpu_shader5 */
- if (devinfo->ver >= 7)
- ctx->Const.MaxVertexStreams = MIN2(4, MAX_VERTEX_STREAMS);
-
- /* ARB_framebuffer_no_attachments */
- ctx->Const.MaxFramebufferWidth = 16384;
- ctx->Const.MaxFramebufferHeight = 16384;
- ctx->Const.MaxFramebufferLayers = ctx->Const.MaxArrayTextureLayers;
- ctx->Const.MaxFramebufferSamples = max_samples;
-
- /* OES_primitive_bounding_box */
- ctx->Const.NoPrimitiveBoundingBoxOutput = true;
-
- /* TODO: We should be able to use STD430 packing by default on all hardware
- * but some piglit tests [1] currently fail on SNB when this is enabled.
- * The problem is the messages we're using for doing uniform pulls
- * in the vec4 back-end on SNB is the OWORD block load instruction, which
- * takes its offset in units of OWORDS (16 bytes). On IVB+, we use the
- * sampler which doesn't have these restrictions.
- *
- * In the scalar back-end, we use the sampler for dynamic uniform loads and
- * pull an entire cache line at a time for constant offset loads both of
- * which support almost any alignment.
- *
- * [1] glsl-1.40/uniform_buffer/vs-float-array-variable-index.shader_test
- */
- if (devinfo->ver >= 7)
- ctx->Const.UseSTD430AsDefaultPacking = true;
-
- if (!(ctx->Const.ContextFlags & GL_CONTEXT_FLAG_DEBUG_BIT))
- ctx->Const.AllowMappedBuffersDuringExecution = true;
-
- /* GL_ARB_get_program_binary */
- ctx->Const.NumProgramBinaryFormats = 1;
-}
-
-static void
-brw_initialize_cs_context_constants(struct brw_context *brw)
-{
- struct gl_context *ctx = &brw->ctx;
- struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- /* Maximum number of scalar compute shader invocations that can be run in
- * parallel in the same subslice assuming SIMD32 dispatch.
- */
- const unsigned max_threads = devinfo->max_cs_workgroup_threads;
- const uint32_t max_invocations = 32 * max_threads;
- ctx->Const.MaxComputeWorkGroupSize[0] = max_invocations;
- ctx->Const.MaxComputeWorkGroupSize[1] = max_invocations;
- ctx->Const.MaxComputeWorkGroupSize[2] = max_invocations;
- ctx->Const.MaxComputeWorkGroupInvocations = max_invocations;
- ctx->Const.MaxComputeSharedMemorySize = 64 * 1024;
-
- /* Constants used for ARB_compute_variable_group_size. */
- if (devinfo->ver >= 7) {
- assert(max_invocations >= 512);
- ctx->Const.MaxComputeVariableGroupSize[0] = max_invocations;
- ctx->Const.MaxComputeVariableGroupSize[1] = max_invocations;
- ctx->Const.MaxComputeVariableGroupSize[2] = max_invocations;
- ctx->Const.MaxComputeVariableGroupInvocations = max_invocations;
- }
-}
-
-/**
- * Process driconf (drirc) options, setting appropriate context flags.
- *
- * brw_init_extensions still pokes at optionCache directly, in order to
- * avoid advertising various extensions. No flags are set, so it makes
- * sense to continue doing that there.
- */
-static void
-brw_process_driconf_options(struct brw_context *brw)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- struct gl_context *ctx = &brw->ctx;
- const driOptionCache *const options = &brw->screen->optionCache;
-
- if (INTEL_DEBUG(DEBUG_NO_HIZ)) {
- brw->has_hiz = false;
- /* On gfx6, you can only do separate stencil with HIZ. */
- if (devinfo->ver == 6)
- brw->has_separate_stencil = false;
- }
-
- if (driQueryOptionb(options, "mesa_no_error"))
- ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_NO_ERROR_BIT_KHR;
-
- if (driQueryOptionb(options, "always_flush_batch")) {
- fprintf(stderr, "flushing batchbuffer before/after each draw call\n");
- brw->always_flush_batch = true;
- }
-
- if (driQueryOptionb(options, "always_flush_cache")) {
- fprintf(stderr, "flushing GPU caches before/after each draw call\n");
- brw->always_flush_cache = true;
- }
-
- if (driQueryOptionb(options, "disable_throttling")) {
- fprintf(stderr, "disabling flush throttling\n");
- brw->disable_throttling = true;
- }
-
- brw->precompile = driQueryOptionb(&brw->screen->optionCache, "shader_precompile");
-
- if (driQueryOptionb(&brw->screen->optionCache, "precise_trig"))
- brw->screen->compiler->precise_trig = true;
-
- ctx->Const.ForceGLSLExtensionsWarn =
- driQueryOptionb(options, "force_glsl_extensions_warn");
-
- ctx->Const.ForceGLSLVersion =
- driQueryOptioni(options, "force_glsl_version");
-
- ctx->Const.DisableGLSLLineContinuations =
- driQueryOptionb(options, "disable_glsl_line_continuations");
-
- ctx->Const.AllowGLSLExtensionDirectiveMidShader =
- driQueryOptionb(options, "allow_glsl_extension_directive_midshader");
-
- ctx->Const.AllowGLSLBuiltinVariableRedeclaration =
- driQueryOptionb(options, "allow_glsl_builtin_variable_redeclaration");
-
- ctx->Const.AllowHigherCompatVersion =
- driQueryOptionb(options, "allow_higher_compat_version");
-
- ctx->Const.ForceGLSLAbsSqrt =
- driQueryOptionb(options, "force_glsl_abs_sqrt");
-
- ctx->Const.GLSLZeroInit = driQueryOptionb(options, "glsl_zero_init") ? 1 : 0;
-
- brw->dual_color_blend_by_location =
- driQueryOptionb(options, "dual_color_blend_by_location");
-
- ctx->Const.AllowGLSLCrossStageInterpolationMismatch =
- driQueryOptionb(options, "allow_glsl_cross_stage_interpolation_mismatch");
-
- char *vendor_str = driQueryOptionstr(options, "force_gl_vendor");
- /* not an empty string */
- if (*vendor_str)
- ctx->Const.VendorOverride = vendor_str;
-
- ctx->Const.dri_config_options_sha1 =
- ralloc_array(brw->mem_ctx, unsigned char, 20);
- driComputeOptionsSha1(&brw->screen->optionCache,
- ctx->Const.dri_config_options_sha1);
-}
-
-GLboolean
-brw_create_context(gl_api api,
- const struct gl_config *mesaVis,
- __DRIcontext *driContextPriv,
- const struct __DriverContextConfig *ctx_config,
- unsigned *dri_ctx_error,
- void *sharedContextPrivate)
-{
- struct gl_context *shareCtx = (struct gl_context *) sharedContextPrivate;
- struct brw_screen *screen = driContextPriv->driScreenPriv->driverPrivate;
- const struct intel_device_info *devinfo = &screen->devinfo;
- struct dd_function_table functions;
-
- /* Only allow the __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS flag if the kernel
- * provides us with context reset notifications.
- */
- uint32_t allowed_flags = __DRI_CTX_FLAG_DEBUG |
- __DRI_CTX_FLAG_FORWARD_COMPATIBLE |
- __DRI_CTX_FLAG_NO_ERROR;
-
- if (screen->has_context_reset_notification)
- allowed_flags |= __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS;
-
- if (ctx_config->flags & ~allowed_flags) {
- *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_FLAG;
- return false;
- }
-
- if (ctx_config->attribute_mask &
- ~(__DRIVER_CONTEXT_ATTRIB_RESET_STRATEGY |
- __DRIVER_CONTEXT_ATTRIB_PRIORITY)) {
- *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_ATTRIBUTE;
- return false;
- }
-
- bool notify_reset =
- ((ctx_config->attribute_mask & __DRIVER_CONTEXT_ATTRIB_RESET_STRATEGY) &&
- ctx_config->reset_strategy != __DRI_CTX_RESET_NO_NOTIFICATION);
-
- struct brw_context *brw = align_calloc(sizeof(struct brw_context), 16);
- if (!brw) {
- fprintf(stderr, "%s: failed to alloc context\n", __func__);
- *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
- return false;
- }
- brw->mem_ctx = ralloc_context(NULL);
- brw->perf_ctx = intel_perf_new_context(brw->mem_ctx);
-
- driContextPriv->driverPrivate = brw;
- brw->driContext = driContextPriv;
- brw->screen = screen;
- brw->bufmgr = screen->bufmgr;
-
- brw->has_hiz = devinfo->has_hiz_and_separate_stencil;
- brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil;
-
- /* We don't push UBOs on IVB and earlier because the restrictions on
- * 3DSTATE_CONSTANT_* make it really annoying to use push constants
- * without dynamic state base address.
- */
- brw->can_push_ubos = devinfo->verx10 >= 75;
-
- brw->isl_dev = screen->isl_dev;
-
- brw->vs.base.stage = MESA_SHADER_VERTEX;
- brw->tcs.base.stage = MESA_SHADER_TESS_CTRL;
- brw->tes.base.stage = MESA_SHADER_TESS_EVAL;
- brw->gs.base.stage = MESA_SHADER_GEOMETRY;
- brw->wm.base.stage = MESA_SHADER_FRAGMENT;
- brw->cs.base.stage = MESA_SHADER_COMPUTE;
-
- brw_init_driver_functions(brw, &functions);
-
- if (notify_reset)
- functions.GetGraphicsResetStatus = brw_get_graphics_reset_status;
-
- brw_process_driconf_options(brw);
-
- if (api == API_OPENGL_CORE &&
- driQueryOptionb(&screen->optionCache, "force_compat_profile")) {
- api = API_OPENGL_COMPAT;
- }
-
- struct gl_context *ctx = &brw->ctx;
-
- if (!_mesa_initialize_context(ctx, api, mesaVis, shareCtx, &functions)) {
- *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
- fprintf(stderr, "%s: failed to init mesa context\n", __func__);
- brw_destroy_context(driContextPriv);
- return false;
- }
-
- driContextSetFlags(ctx, ctx_config->flags);
-
- /* Initialize the software rasterizer and helper modules.
- *
- * As of GL 3.1 core, the gfx4+ driver doesn't need the swrast context for
- * software fallbacks (which we have to support on legacy GL to do weird
- * glDrawPixels(), glBitmap(), and other functions).
- */
- if (api != API_OPENGL_CORE && api != API_OPENGLES2) {
- _swrast_CreateContext(ctx);
- }
-
- _vbo_CreateContext(ctx, true);
- if (ctx->swrast_context) {
- _tnl_CreateContext(ctx);
- TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
- _swsetup_CreateContext(ctx);
-
- /* Configure swrast to match hardware characteristics: */
- _swrast_allow_pixel_fog(ctx, false);
- _swrast_allow_vertex_fog(ctx, true);
- }
-
- _mesa_meta_init(ctx);
-
- if (INTEL_DEBUG(DEBUG_PERF))
- brw->perf_debug = true;
-
- brw_initialize_cs_context_constants(brw);
- brw_initialize_context_constants(brw);
-
- ctx->Const.ResetStrategy = notify_reset
- ? GL_LOSE_CONTEXT_ON_RESET_ARB : GL_NO_RESET_NOTIFICATION_ARB;
-
- /* Reinitialize the context point state. It depends on ctx->Const values. */
- _mesa_init_point(ctx);
-
- brw_fbo_init(brw);
-
- brw_batch_init(brw);
-
- /* Create a new hardware context. Using a hardware context means that
- * our GPU state will be saved/restored on context switch, allowing us
- * to assume that the GPU is in the same state we left it in.
- *
- * This is required for transform feedback buffer offsets, query objects,
- * and also allows us to reduce how much state we have to emit.
- */
- brw->hw_ctx = brw_create_hw_context(brw->bufmgr);
- if (!brw->hw_ctx && devinfo->ver >= 6) {
- fprintf(stderr, "Failed to create hardware context.\n");
- brw_destroy_context(driContextPriv);
- return false;
- }
-
- if (brw->hw_ctx) {
- int hw_priority = INTEL_CONTEXT_MEDIUM_PRIORITY;
- if (ctx_config->attribute_mask & __DRIVER_CONTEXT_ATTRIB_PRIORITY) {
- switch (ctx_config->priority) {
- case __DRI_CTX_PRIORITY_LOW:
- hw_priority = INTEL_CONTEXT_LOW_PRIORITY;
- break;
- case __DRI_CTX_PRIORITY_HIGH:
- hw_priority = INTEL_CONTEXT_HIGH_PRIORITY;
- break;
- }
- }
- if (hw_priority != I915_CONTEXT_DEFAULT_PRIORITY &&
- brw_hw_context_set_priority(brw->bufmgr, brw->hw_ctx, hw_priority)) {
- fprintf(stderr,
- "Failed to set priority [%d:%d] for hardware context.\n",
- ctx_config->priority, hw_priority);
- brw_destroy_context(driContextPriv);
- return false;
- }
- }
-
- if (brw_init_pipe_control(brw, devinfo)) {
- *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
- brw_destroy_context(driContextPriv);
- return false;
- }
-
- brw_upload_init(&brw->upload, brw->bufmgr, 65536);
-
- brw_init_state(brw);
-
- brw_init_extensions(ctx);
-
- brw_init_surface_formats(brw);
-
- brw_blorp_init(brw);
-
- brw->urb.size = devinfo->urb.size;
-
- if (devinfo->ver == 6)
- brw->urb.gs_present = false;
-
- brw->prim_restart.in_progress = false;
- brw->prim_restart.enable_cut_index = false;
- brw->gs.enabled = false;
- brw->clip.viewport_count = 1;
-
- brw->predicate.state = BRW_PREDICATE_STATE_RENDER;
-
- brw->max_gtt_map_object_size = screen->max_gtt_map_object_size;
-
- ctx->VertexProgram._MaintainTnlProgram = true;
- ctx->FragmentProgram._MaintainTexEnvProgram = true;
- _mesa_reset_vertex_processing_mode(ctx);
-
- brw_draw_init( brw );
-
- if ((ctx_config->flags & __DRI_CTX_FLAG_DEBUG) != 0) {
- /* Turn on some extra GL_ARB_debug_output generation. */
- brw->perf_debug = true;
- }
-
- if ((ctx_config->flags & __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS) != 0) {
- ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB;
- ctx->Const.RobustAccess = GL_TRUE;
- }
-
- if (INTEL_DEBUG(DEBUG_SHADER_TIME))
- brw_init_shader_time(brw);
-
- _mesa_override_extensions(ctx);
- _mesa_compute_version(ctx);
-
-#ifndef NDEBUG
- /* Enforce that the version of the context that was created is at least as
- * high as the version that was advertised via GLX / EGL / whatever window
- * system.
- */
- const __DRIscreen *const dri_screen = brw->screen->driScrnPriv;
-
- switch (api) {
- case API_OPENGL_COMPAT:
- assert(ctx->Version >= dri_screen->max_gl_compat_version);
- break;
- case API_OPENGLES:
- assert(ctx->Version >= dri_screen->max_gl_es1_version);
- break;
- case API_OPENGLES2:
- assert(ctx->Version >= dri_screen->max_gl_es2_version);
- break;
- case API_OPENGL_CORE:
- assert(ctx->Version >= dri_screen->max_gl_core_version);
- break;
- }
-#endif
-
- /* GL_ARB_gl_spirv */
- if (ctx->Extensions.ARB_gl_spirv) {
- brw_initialize_spirv_supported_capabilities(brw);
-
- if (ctx->Extensions.ARB_spirv_extensions) {
- /* GL_ARB_spirv_extensions */
- ctx->Const.SpirVExtensions = MALLOC_STRUCT(spirv_supported_extensions);
- _mesa_fill_supported_spirv_extensions(ctx->Const.SpirVExtensions,
- &ctx->Const.SpirVCapabilities);
- }
- }
-
- _mesa_initialize_dispatch_tables(ctx);
- _mesa_initialize_vbo_vtxfmt(ctx);
-
- if (ctx->Extensions.INTEL_performance_query)
- brw_init_performance_queries(brw);
-
- brw->ctx.Cache = brw->screen->disk_cache;
-
- if (driContextPriv->driScreenPriv->dri2.backgroundCallable &&
- driQueryOptionb(&screen->optionCache, "mesa_glthread")) {
- /* Loader supports multithreading, and so do we. */
- _mesa_glthread_init(ctx);
- }
-
- return true;
-}
-
-void
-brw_destroy_context(__DRIcontext *driContextPriv)
-{
- struct brw_context *brw =
- (struct brw_context *) driContextPriv->driverPrivate;
- struct gl_context *ctx = &brw->ctx;
-
- GET_CURRENT_CONTEXT(curctx);
-
- if (curctx == NULL) {
- /* No current context, but we need one to release
- * renderbuffer surface when we release framebuffer.
- * So temporarily bind the context.
- */
- _mesa_make_current(ctx, NULL, NULL);
- }
-
- _mesa_glthread_destroy(&brw->ctx);
-
- _mesa_meta_free(&brw->ctx);
-
- if (INTEL_DEBUG(DEBUG_SHADER_TIME)) {
- /* Force a report. */
- brw->shader_time.report_time = 0;
-
- brw_collect_and_report_shader_time(brw);
- brw_destroy_shader_time(brw);
- }
-
- blorp_finish(&brw->blorp);
-
- brw_destroy_state(brw);
- brw_draw_destroy(brw);
-
- brw_bo_unreference(brw->curbe.curbe_bo);
-
- brw_bo_unreference(brw->vs.base.scratch_bo);
- brw_bo_unreference(brw->tcs.base.scratch_bo);
- brw_bo_unreference(brw->tes.base.scratch_bo);
- brw_bo_unreference(brw->gs.base.scratch_bo);
- brw_bo_unreference(brw->wm.base.scratch_bo);
-
- brw_bo_unreference(brw->vs.base.push_const_bo);
- brw_bo_unreference(brw->tcs.base.push_const_bo);
- brw_bo_unreference(brw->tes.base.push_const_bo);
- brw_bo_unreference(brw->gs.base.push_const_bo);
- brw_bo_unreference(brw->wm.base.push_const_bo);
-
- brw_destroy_hw_context(brw->bufmgr, brw->hw_ctx);
-
- if (ctx->swrast_context) {
- _swsetup_DestroyContext(&brw->ctx);
- _tnl_DestroyContext(&brw->ctx);
- }
- _vbo_DestroyContext(&brw->ctx);
-
- if (ctx->swrast_context)
- _swrast_DestroyContext(&brw->ctx);
-
- brw_fini_pipe_control(brw);
- brw_batch_free(&brw->batch);
-
- brw_bo_unreference(brw->throttle_batch[1]);
- brw_bo_unreference(brw->throttle_batch[0]);
- brw->throttle_batch[1] = NULL;
- brw->throttle_batch[0] = NULL;
-
- /* free the Mesa context */
- _mesa_free_context_data(&brw->ctx, true);
-
- ralloc_free(brw->mem_ctx);
- align_free(brw);
- driContextPriv->driverPrivate = NULL;
-}
-
-GLboolean
-brw_unbind_context(__DRIcontext *driContextPriv)
-{
- struct gl_context *ctx = driContextPriv->driverPrivate;
- _mesa_glthread_finish(ctx);
-
- /* Unset current context and dispath table */
- _mesa_make_current(NULL, NULL, NULL);
-
- return true;
-}
-
-/**
- * Fixes up the context for GLES23 with our default-to-sRGB-capable behavior
- * on window system framebuffers.
- *
- * Desktop GL is fairly reasonable in its handling of sRGB: You can ask if
- * your renderbuffer can do sRGB encode, and you can flip a switch that does
- * sRGB encode if the renderbuffer can handle it. You can ask specifically
- * for a visual where you're guaranteed to be capable, but it turns out that
- * everyone just makes all their ARGB8888 visuals capable and doesn't offer
- * incapable ones, because there's no difference between the two in resources
- * used. Applications thus get built that accidentally rely on the default
- * visual choice being sRGB, so we make ours sRGB capable. Everything sounds
- * great...
- *
- * But for GLES2/3, they decided that it was silly to not turn on sRGB encode
- * for sRGB renderbuffers you made with the GL_EXT_texture_sRGB equivalent.
- * So they removed the enable knob and made it "if the renderbuffer is sRGB
- * capable, do sRGB encode". Then, for your window system renderbuffers, you
- * can ask for sRGB visuals and get sRGB encode, or not ask for sRGB visuals
- * and get no sRGB encode (assuming that both kinds of visual are available).
- * Thus our choice to support sRGB by default on our visuals for desktop would
- * result in broken rendering of GLES apps that aren't expecting sRGB encode.
- *
- * Unfortunately, renderbuffer setup happens before a context is created. So
- * in brw_screen.c we always set up sRGB, and here, if you're a GLES2/3
- * context (without an sRGB visual), we go turn that back off before anyone
- * finds out.
- */
-static void
-brw_gles3_srgb_workaround(struct brw_context *brw, struct gl_framebuffer *fb)
-{
- struct gl_context *ctx = &brw->ctx;
-
- if (_mesa_is_desktop_gl(ctx) || !fb->Visual.sRGBCapable)
- return;
-
- for (int i = 0; i < BUFFER_COUNT; i++) {
- struct gl_renderbuffer *rb = fb->Attachment[i].Renderbuffer;
-
- /* Check if sRGB was specifically asked for. */
- struct brw_renderbuffer *irb = brw_get_renderbuffer(fb, i);
- if (irb && irb->need_srgb)
- return;
-
- if (rb)
- rb->Format = _mesa_get_srgb_format_linear(rb->Format);
- }
- /* Disable sRGB from framebuffers that are not compatible. */
- fb->Visual.sRGBCapable = false;
-}
-
-GLboolean
-brw_make_current(__DRIcontext *driContextPriv,
- __DRIdrawable *driDrawPriv,
- __DRIdrawable *driReadPriv)
-{
- struct brw_context *brw;
-
- if (driContextPriv)
- brw = (struct brw_context *) driContextPriv->driverPrivate;
- else
- brw = NULL;
-
- if (driContextPriv) {
- struct gl_context *ctx = &brw->ctx;
- struct gl_framebuffer *fb, *readFb;
-
- if (driDrawPriv == NULL) {
- fb = _mesa_get_incomplete_framebuffer();
- } else {
- fb = driDrawPriv->driverPrivate;
- driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1;
- }
-
- if (driReadPriv == NULL) {
- readFb = _mesa_get_incomplete_framebuffer();
- } else {
- readFb = driReadPriv->driverPrivate;
- driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1;
- }
-
- /* The sRGB workaround changes the renderbuffer's format. We must change
- * the format before the renderbuffer's miptree get's allocated, otherwise
- * the formats of the renderbuffer and its miptree will differ.
- */
- brw_gles3_srgb_workaround(brw, fb);
- brw_gles3_srgb_workaround(brw, readFb);
-
- /* If the context viewport hasn't been initialized, force a call out to
- * the loader to get buffers so we have a drawable size for the initial
- * viewport. */
- if (!brw->ctx.ViewportInitialized)
- brw_prepare_render(brw);
-
- _mesa_make_current(ctx, fb, readFb);
- } else {
- GET_CURRENT_CONTEXT(ctx);
- _mesa_glthread_finish(ctx);
- _mesa_make_current(NULL, NULL, NULL);
- }
-
- return true;
-}
-
-void
-brw_resolve_for_dri2_flush(struct brw_context *brw,
- __DRIdrawable *drawable)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- if (devinfo->ver < 6) {
- /* MSAA and fast color clear are not supported, so don't waste time
- * checking whether a resolve is needed.
- */
- return;
- }
-
- struct gl_framebuffer *fb = drawable->driverPrivate;
- struct brw_renderbuffer *rb;
-
- /* Usually, only the back buffer will need to be downsampled. However,
- * the front buffer will also need it if the user has rendered into it.
- */
- static const gl_buffer_index buffers[2] = {
- BUFFER_BACK_LEFT,
- BUFFER_FRONT_LEFT,
- };
-
- for (int i = 0; i < 2; ++i) {
- rb = brw_get_renderbuffer(fb, buffers[i]);
- if (rb == NULL || rb->mt == NULL)
- continue;
- if (rb->mt->surf.samples == 1) {
- assert(rb->mt_layer == 0 && rb->mt_level == 0 &&
- rb->layer_count == 1);
- brw_miptree_prepare_external(brw, rb->mt);
- } else {
- brw_renderbuffer_downsample(brw, rb);
-
- /* Call prepare_external on the single-sample miptree to do any
- * needed resolves prior to handing it off to the window system.
- * This is needed in the case that rb->singlesample_mt is Y-tiled
- * with CCS_E enabled but without I915_FORMAT_MOD_Y_TILED_CCS_E. In
- * this case, the MSAA resolve above will write compressed data into
- * rb->singlesample_mt.
- *
- * TODO: Some day, if we decide to care about the tiny performance
- * hit we're taking by doing the MSAA resolve and then a CCS resolve,
- * we could detect this case and just allocate the single-sampled
- * miptree without aux. However, that would be a lot of plumbing and
- * this is a rather exotic case so it's not really worth it.
- */
- brw_miptree_prepare_external(brw, rb->singlesample_mt);
- }
- }
-}
-
-static unsigned
-brw_bits_per_pixel(const struct brw_renderbuffer *rb)
-{
- return _mesa_get_format_bytes(brw_rb_format(rb)) * 8;
-}
-
-static void
-brw_query_dri2_buffers(struct brw_context *brw,
- __DRIdrawable *drawable,
- __DRIbuffer **buffers,
- int *count);
-
-static void
-brw_process_dri2_buffer(struct brw_context *brw,
- __DRIdrawable *drawable,
- __DRIbuffer *buffer,
- struct brw_renderbuffer *rb,
- const char *buffer_name);
-
-static void
-brw_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable);
-
-static void
-brw_update_dri2_buffers(struct brw_context *brw, __DRIdrawable *drawable)
-{
- struct gl_framebuffer *fb = drawable->driverPrivate;
- struct brw_renderbuffer *rb;
- __DRIbuffer *buffers = NULL;
- int count;
- const char *region_name;
-
- /* Set this up front, so that in case our buffers get invalidated
- * while we're getting new buffers, we don't clobber the stamp and
- * thus ignore the invalidate. */
- drawable->lastStamp = drawable->dri2.stamp;
-
- if (INTEL_DEBUG(DEBUG_DRI))
- fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
-
- brw_query_dri2_buffers(brw, drawable, &buffers, &count);
-
- if (buffers == NULL)
- return;
-
- for (int i = 0; i < count; i++) {
- switch (buffers[i].attachment) {
- case __DRI_BUFFER_FRONT_LEFT:
- rb = brw_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
- region_name = "dri2 front buffer";
- break;
-
- case __DRI_BUFFER_FAKE_FRONT_LEFT:
- rb = brw_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
- region_name = "dri2 fake front buffer";
- break;
-
- case __DRI_BUFFER_BACK_LEFT:
- rb = brw_get_renderbuffer(fb, BUFFER_BACK_LEFT);
- region_name = "dri2 back buffer";
- break;
-
- case __DRI_BUFFER_DEPTH:
- case __DRI_BUFFER_HIZ:
- case __DRI_BUFFER_DEPTH_STENCIL:
- case __DRI_BUFFER_STENCIL:
- case __DRI_BUFFER_ACCUM:
- default:
- fprintf(stderr,
- "unhandled buffer attach event, attachment type %d\n",
- buffers[i].attachment);
- return;
- }
-
- brw_process_dri2_buffer(brw, drawable, &buffers[i], rb, region_name);
- }
-
-}
-
-void
-brw_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
-{
- struct brw_context *brw = context->driverPrivate;
- __DRIscreen *dri_screen = brw->screen->driScrnPriv;
-
- /* Set this up front, so that in case our buffers get invalidated
- * while we're getting new buffers, we don't clobber the stamp and
- * thus ignore the invalidate. */
- drawable->lastStamp = drawable->dri2.stamp;
-
- if (INTEL_DEBUG(DEBUG_DRI))
- fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
-
- if (dri_screen->image.loader)
- brw_update_image_buffers(brw, drawable);
- else
- brw_update_dri2_buffers(brw, drawable);
-
- driUpdateFramebufferSize(&brw->ctx, drawable);
-}
-
-/**
- * intel_prepare_render should be called anywhere that curent read/drawbuffer
- * state is required.
- */
-void
-brw_prepare_render(struct brw_context *brw)
-{
- struct gl_context *ctx = &brw->ctx;
- __DRIcontext *driContext = brw->driContext;
- __DRIdrawable *drawable;
-
- drawable = driContext->driDrawablePriv;
- if (drawable && drawable->dri2.stamp != driContext->dri2.draw_stamp) {
- if (drawable->lastStamp != drawable->dri2.stamp)
- brw_update_renderbuffers(driContext, drawable);
- driContext->dri2.draw_stamp = drawable->dri2.stamp;
- }
-
- drawable = driContext->driReadablePriv;
- if (drawable && drawable->dri2.stamp != driContext->dri2.read_stamp) {
- if (drawable->lastStamp != drawable->dri2.stamp)
- brw_update_renderbuffers(driContext, drawable);
- driContext->dri2.read_stamp = drawable->dri2.stamp;
- }
-
- /* If we're currently rendering to the front buffer, the rendering
- * that will happen next will probably dirty the front buffer. So
- * mark it as dirty here.
- */
- if (_mesa_is_front_buffer_drawing(ctx->DrawBuffer) &&
- ctx->DrawBuffer != _mesa_get_incomplete_framebuffer()) {
- brw->front_buffer_dirty = true;
- }
-
- if (brw->is_shared_buffer_bound) {
- /* Subsequent rendering will probably dirty the shared buffer. */
- brw->is_shared_buffer_dirty = true;
- }
-}
-
-/**
- * \brief Query DRI2 to obtain a DRIdrawable's buffers.
- *
- * To determine which DRI buffers to request, examine the renderbuffers
- * attached to the drawable's framebuffer. Then request the buffers with
- * DRI2GetBuffers() or DRI2GetBuffersWithFormat().
- *
- * This is called from brw_update_renderbuffers().
- *
- * \param drawable Drawable whose buffers are queried.
- * \param buffers [out] List of buffers returned by DRI2 query.
- * \param buffer_count [out] Number of buffers returned.
- *
- * \see brw_update_renderbuffers()
- * \see DRI2GetBuffers()
- * \see DRI2GetBuffersWithFormat()
- */
-static void
-brw_query_dri2_buffers(struct brw_context *brw,
- __DRIdrawable *drawable,
- __DRIbuffer **buffers,
- int *buffer_count)
-{
- __DRIscreen *dri_screen = brw->screen->driScrnPriv;
- struct gl_framebuffer *fb = drawable->driverPrivate;
- int i = 0;
- unsigned attachments[__DRI_BUFFER_COUNT];
-
- struct brw_renderbuffer *front_rb;
- struct brw_renderbuffer *back_rb;
-
- front_rb = brw_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
- back_rb = brw_get_renderbuffer(fb, BUFFER_BACK_LEFT);
-
- memset(attachments, 0, sizeof(attachments));
- if ((_mesa_is_front_buffer_drawing(fb) ||
- _mesa_is_front_buffer_reading(fb) ||
- !back_rb) && front_rb) {
- /* If a fake front buffer is in use, then querying for
- * __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from
- * the real front buffer to the fake front buffer. So before doing the
- * query, we need to make sure all the pending drawing has landed in the
- * real front buffer.
- */
- brw_batch_flush(brw);
- brw_flush_front(&brw->ctx);
-
- attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
- attachments[i++] = brw_bits_per_pixel(front_rb);
- } else if (front_rb && brw->front_buffer_dirty) {
- /* We have pending front buffer rendering, but we aren't querying for a
- * front buffer. If the front buffer we have is a fake front buffer,
- * the X server is going to throw it away when it processes the query.
- * So before doing the query, make sure all the pending drawing has
- * landed in the real front buffer.
- */
- brw_batch_flush(brw);
- brw_flush_front(&brw->ctx);
- }
-
- if (back_rb) {
- attachments[i++] = __DRI_BUFFER_BACK_LEFT;
- attachments[i++] = brw_bits_per_pixel(back_rb);
- }
-
- assert(i <= ARRAY_SIZE(attachments));
-
- *buffers =
- dri_screen->dri2.loader->getBuffersWithFormat(drawable,
- &drawable->w,
- &drawable->h,
- attachments, i / 2,
- buffer_count,
- drawable->loaderPrivate);
-}
-
-/**
- * \brief Assign a DRI buffer's DRM region to a renderbuffer.
- *
- * This is called from brw_update_renderbuffers().
- *
- * \par Note:
- * DRI buffers whose attachment point is DRI2BufferStencil or
- * DRI2BufferDepthStencil are handled as special cases.
- *
- * \param buffer_name is a human readable name, such as "dri2 front buffer",
- * that is passed to brw_bo_gem_create_from_name().
- *
- * \see brw_update_renderbuffers()
- */
-static void
-brw_process_dri2_buffer(struct brw_context *brw,
- __DRIdrawable *drawable,
- __DRIbuffer *buffer,
- struct brw_renderbuffer *rb,
- const char *buffer_name)
-{
- struct gl_framebuffer *fb = drawable->driverPrivate;
- struct brw_bo *bo;
-
- if (!rb)
- return;
-
- unsigned num_samples = rb->Base.Base.NumSamples;
-
- /* We try to avoid closing and reopening the same BO name, because the first
- * use of a mapping of the buffer involves a bunch of page faulting which is
- * moderately expensive.
- */
- struct brw_mipmap_tree *last_mt;
- if (num_samples == 0)
- last_mt = rb->mt;
- else
- last_mt = rb->singlesample_mt;
-
- uint32_t old_name = 0;
- if (last_mt) {
- /* The bo already has a name because the miptree was created by a
- * previous call to brw_process_dri2_buffer(). If a bo already has a
- * name, then brw_bo_flink() is a low-cost getter. It does not
- * create a new name.
- */
- brw_bo_flink(last_mt->bo, &old_name);
- }
-
- if (old_name == buffer->name)
- return;
-
- if (INTEL_DEBUG(DEBUG_DRI)) {
- fprintf(stderr,
- "attaching buffer %d, at %d, cpp %d, pitch %d\n",
- buffer->name, buffer->attachment,
- buffer->cpp, buffer->pitch);
- }
-
- bo = brw_bo_gem_create_from_name(brw->bufmgr, buffer_name,
- buffer->name);
- if (!bo) {
- fprintf(stderr,
- "Failed to open BO for returned DRI2 buffer "
- "(%dx%d, %s, named %d).\n"
- "This is likely a bug in the X Server that will lead to a "
- "crash soon.\n",
- drawable->w, drawable->h, buffer_name, buffer->name);
- return;
- }
-
- uint32_t tiling, swizzle;
- brw_bo_get_tiling(bo, &tiling, &swizzle);
-
- struct brw_mipmap_tree *mt =
- brw_miptree_create_for_bo(brw,
- bo,
- brw_rb_format(rb),
- 0,
- drawable->w,
- drawable->h,
- 1,
- buffer->pitch,
- isl_tiling_from_i915_tiling(tiling),
- MIPTREE_CREATE_DEFAULT);
- if (!mt) {
- brw_bo_unreference(bo);
- return;
- }
-
- /* We got this BO from X11. We cana't assume that we have coherent texture
- * access because X may suddenly decide to use it for scan-out which would
- * destroy coherency.
- */
- bo->cache_coherent = false;
-
- if (!brw_update_winsys_renderbuffer_miptree(brw, rb, mt,
- drawable->w, drawable->h,
- buffer->pitch)) {
- brw_bo_unreference(bo);
- brw_miptree_release(&mt);
- return;
- }
-
- if (_mesa_is_front_buffer_drawing(fb) &&
- (buffer->attachment == __DRI_BUFFER_FRONT_LEFT ||
- buffer->attachment == __DRI_BUFFER_FAKE_FRONT_LEFT) &&
- rb->Base.Base.NumSamples > 1) {
- brw_renderbuffer_upsample(brw, rb);
- }
-
- assert(rb->mt);
-
- brw_bo_unreference(bo);
-}
-
-/**
- * \brief Query DRI image loader to obtain a DRIdrawable's buffers.
- *
- * To determine which DRI buffers to request, examine the renderbuffers
- * attached to the drawable's framebuffer. Then request the buffers from
- * the image loader
- *
- * This is called from brw_update_renderbuffers().
- *
- * \param drawable Drawable whose buffers are queried.
- * \param buffers [out] List of buffers returned by DRI2 query.
- * \param buffer_count [out] Number of buffers returned.
- *
- * \see brw_update_renderbuffers()
- */
-
-static void
-brw_update_image_buffer(struct brw_context *intel,
- __DRIdrawable *drawable,
- struct brw_renderbuffer *rb,
- __DRIimage *buffer,
- enum __DRIimageBufferMask buffer_type)
-{
- struct gl_framebuffer *fb = drawable->driverPrivate;
-
- if (!rb || !buffer->bo)
- return;
-
- unsigned num_samples = rb->Base.Base.NumSamples;
-
- /* Check and see if we're already bound to the right
- * buffer object
- */
- struct brw_mipmap_tree *last_mt;
- if (num_samples == 0)
- last_mt = rb->mt;
- else
- last_mt = rb->singlesample_mt;
-
- if (last_mt && last_mt->bo == buffer->bo) {
- if (buffer_type == __DRI_IMAGE_BUFFER_SHARED) {
- brw_miptree_make_shareable(intel, last_mt);
- }
- return;
- }
-
- /* Only allow internal compression if samples == 0. For multisampled
- * window system buffers, the only thing the single-sampled buffer is used
- * for is as a resolve target. If we do any compression beyond what is
- * supported by the window system, we will just have to resolve so it's
- * probably better to just not bother.
- */
- const bool allow_internal_aux = (num_samples == 0);
-
- struct brw_mipmap_tree *mt =
- brw_miptree_create_for_dri_image(intel, buffer, GL_TEXTURE_2D,
- brw_rb_format(rb),
- allow_internal_aux);
- if (!mt)
- return;
-
- if (!brw_update_winsys_renderbuffer_miptree(intel, rb, mt,
- buffer->width, buffer->height,
- buffer->pitch)) {
- brw_miptree_release(&mt);
- return;
- }
-
- if (_mesa_is_front_buffer_drawing(fb) &&
- buffer_type == __DRI_IMAGE_BUFFER_FRONT &&
- rb->Base.Base.NumSamples > 1) {
- brw_renderbuffer_upsample(intel, rb);
- }
-
- if (buffer_type == __DRI_IMAGE_BUFFER_SHARED) {
- /* The compositor and the application may access this image
- * concurrently. The display hardware may even scanout the image while
- * the GPU is rendering to it. Aux surfaces cause difficulty with
- * concurrent access, so permanently disable aux for this miptree.
- *
- * Perhaps we could improve overall application performance by
- * re-enabling the aux surface when EGL_RENDER_BUFFER transitions to
- * EGL_BACK_BUFFER, then disabling it again when EGL_RENDER_BUFFER
- * returns to EGL_SINGLE_BUFFER. I expect the wins and losses with this
- * approach to be highly dependent on the application's GL usage.
- *
- * I [chadv] expect clever disabling/reenabling to be counterproductive
- * in the use cases I care about: applications that render nearly
- * realtime handwriting to the surface while possibly undergiong
- * simultaneously scanout as a display plane. The app requires low
- * render latency. Even though the app spends most of its time in
- * shared-buffer mode, it also frequently transitions between
- * shared-buffer (EGL_SINGLE_BUFFER) and double-buffer (EGL_BACK_BUFFER)
- * mode. Visual sutter during the transitions should be avoided.
- *
- * In this case, I [chadv] believe reducing the GPU workload at
- * shared-buffer/double-buffer transitions would offer a smoother app
- * experience than any savings due to aux compression. But I've
- * collected no data to prove my theory.
- */
- brw_miptree_make_shareable(intel, mt);
- }
-}
-
-static void
-brw_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable)
-{
- struct gl_framebuffer *fb = drawable->driverPrivate;
- __DRIscreen *dri_screen = brw->screen->driScrnPriv;
- struct brw_renderbuffer *front_rb;
- struct brw_renderbuffer *back_rb;
- struct __DRIimageList images;
- mesa_format format;
- uint32_t buffer_mask = 0;
- int ret;
-
- front_rb = brw_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
- back_rb = brw_get_renderbuffer(fb, BUFFER_BACK_LEFT);
-
- if (back_rb)
- format = brw_rb_format(back_rb);
- else if (front_rb)
- format = brw_rb_format(front_rb);
- else
- return;
-
- if (front_rb && (_mesa_is_front_buffer_drawing(fb) ||
- _mesa_is_front_buffer_reading(fb) || !back_rb)) {
- buffer_mask |= __DRI_IMAGE_BUFFER_FRONT;
- }
-
- if (back_rb)
- buffer_mask |= __DRI_IMAGE_BUFFER_BACK;
-
- ret = dri_screen->image.loader->getBuffers(drawable,
- driGLFormatToImageFormat(format),
- &drawable->dri2.stamp,
- drawable->loaderPrivate,
- buffer_mask,
- &images);
- if (!ret)
- return;
-
- if (images.image_mask & __DRI_IMAGE_BUFFER_FRONT) {
- drawable->w = images.front->width;
- drawable->h = images.front->height;
- brw_update_image_buffer(brw, drawable, front_rb, images.front,
- __DRI_IMAGE_BUFFER_FRONT);
- }
-
- if (images.image_mask & __DRI_IMAGE_BUFFER_BACK) {
- drawable->w = images.back->width;
- drawable->h = images.back->height;
- brw_update_image_buffer(brw, drawable, back_rb, images.back,
- __DRI_IMAGE_BUFFER_BACK);
- }
-
- if (images.image_mask & __DRI_IMAGE_BUFFER_SHARED) {
- assert(images.image_mask == __DRI_IMAGE_BUFFER_SHARED);
- drawable->w = images.back->width;
- drawable->h = images.back->height;
- brw_update_image_buffer(brw, drawable, back_rb, images.back,
- __DRI_IMAGE_BUFFER_SHARED);
- brw->is_shared_buffer_bound = true;
- } else {
- brw->is_shared_buffer_bound = false;
- brw->is_shared_buffer_dirty = false;
- }
-}
+++ /dev/null
-/*
- Copyright (C) Intel Corp. 2006. All Rights Reserved.
- Intel funded Tungsten Graphics to
- develop this 3D driver.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
- **********************************************************************/
- /*
- * Authors:
- * Keith Whitwell <keithw@vmware.com>
- */
-
-
-#ifndef BRWCONTEXT_INC
-#define BRWCONTEXT_INC
-
-#include <stdbool.h>
-#include "main/macros.h"
-#include "main/mtypes.h"
-#include "main/errors.h"
-#include "brw_structs.h"
-#include "brw_pipe_control.h"
-#include "compiler/brw_compiler.h"
-
-#include "isl/isl.h"
-#include "blorp/blorp.h"
-
-#include <brw_bufmgr.h>
-
-#include "dev/intel_debug.h"
-#include "common/intel_decoder.h"
-#include "brw_screen.h"
-#include "brw_tex_obj.h"
-#include "perf/intel_perf.h"
-#include "perf/intel_perf_query.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-/* Glossary:
- *
- * URB - uniform resource buffer. A mid-sized buffer which is
- * partitioned between the fixed function units and used for passing
- * values (vertices, primitives, constants) between them.
- *
- * CURBE - constant URB entry. An urb region (entry) used to hold
- * constant values which the fixed function units can be instructed to
- * preload into the GRF when spawning a thread.
- *
- * VUE - vertex URB entry. An urb entry holding a vertex and usually
- * a vertex header. The header contains control information and
- * things like primitive type, Begin/end flags and clip codes.
- *
- * PUE - primitive URB entry. An urb entry produced by the setup (SF)
- * unit holding rasterization and interpolation parameters.
- *
- * GRF - general register file. One of several register files
- * addressable by programmed threads. The inputs (r0, payload, curbe,
- * urb) of the thread are preloaded to this area before the thread is
- * spawned. The registers are individually 8 dwords wide and suitable
- * for general usage. Registers holding thread input values are not
- * special and may be overwritten.
- *
- * MRF - message register file. Threads communicate (and terminate)
- * by sending messages. Message parameters are placed in contiguous
- * MRF registers. All program output is via these messages. URB
- * entries are populated by sending a message to the shared URB
- * function containing the new data, together with a control word,
- * often an unmodified copy of R0.
- *
- * R0 - GRF register 0. Typically holds control information used when
- * sending messages to other threads.
- *
- * EU or GFX4 EU: The name of the programmable subsystem of the
- * i965 hardware. Threads are executed by the EU, the registers
- * described above are part of the EU architecture.
- *
- * Fixed function units:
- *
- * CS - Command streamer. Notional first unit, little software
- * interaction. Holds the URB entries used for constant data, ie the
- * CURBEs.
- *
- * VF/VS - Vertex Fetch / Vertex Shader. The fixed function part of
- * this unit is responsible for pulling vertices out of vertex buffers
- * in vram and injecting them into the processing pipe as VUEs. If
- * enabled, it first passes them to a VS thread which is a good place
- * for the driver to implement any active vertex shader.
- *
- * HS - Hull Shader (Tessellation Control Shader)
- *
- * TE - Tessellation Engine (Tessellation Primitive Generation)
- *
- * DS - Domain Shader (Tessellation Evaluation Shader)
- *
- * GS - Geometry Shader. This corresponds to a new DX10 concept. If
- * enabled, incoming strips etc are passed to GS threads in individual
- * line/triangle/point units. The GS thread may perform arbitary
- * computation and emit whatever primtives with whatever vertices it
- * chooses. This makes GS an excellent place to implement GL's
- * unfilled polygon modes, though of course it is capable of much
- * more. Additionally, GS is used to translate away primitives not
- * handled by latter units, including Quads and Lineloops.
- *
- * CS - Clipper. Mesa's clipping algorithms are imported to run on
- * this unit. The fixed function part performs cliptesting against
- * the 6 fixed clipplanes and makes descisions on whether or not the
- * incoming primitive needs to be passed to a thread for clipping.
- * User clip planes are handled via cooperation with the VS thread.
- *
- * SF - Strips Fans or Setup: Triangles are prepared for
- * rasterization. Interpolation coefficients are calculated.
- * Flatshading and two-side lighting usually performed here.
- *
- * WM - Windower. Interpolation of vertex attributes performed here.
- * Fragment shader implemented here. SIMD aspects of EU taken full
- * advantage of, as pixels are processed in blocks of 16.
- *
- * CC - Color Calculator. No EU threads associated with this unit.
- * Handles blending and (presumably) depth and stencil testing.
- */
-
-struct brw_context;
-struct brw_inst;
-struct brw_vs_prog_key;
-struct brw_vue_prog_key;
-struct brw_wm_prog_key;
-struct brw_wm_prog_data;
-struct brw_cs_prog_key;
-struct brw_cs_prog_data;
-struct brw_label;
-
-enum brw_pipeline {
- BRW_RENDER_PIPELINE,
- BRW_COMPUTE_PIPELINE,
-
- BRW_NUM_PIPELINES
-};
-
-enum brw_cache_id {
- BRW_CACHE_FS_PROG,
- BRW_CACHE_BLORP_PROG,
- BRW_CACHE_SF_PROG,
- BRW_CACHE_VS_PROG,
- BRW_CACHE_FF_GS_PROG,
- BRW_CACHE_GS_PROG,
- BRW_CACHE_TCS_PROG,
- BRW_CACHE_TES_PROG,
- BRW_CACHE_CLIP_PROG,
- BRW_CACHE_CS_PROG,
-
- BRW_MAX_CACHE
-};
-
-enum gfx9_astc5x5_wa_tex_type {
- GFX9_ASTC5X5_WA_TEX_TYPE_ASTC5x5 = 1 << 0,
- GFX9_ASTC5X5_WA_TEX_TYPE_AUX = 1 << 1,
-};
-
-enum brw_state_id {
- /* brw_cache_ids must come first - see brw_program_cache.c */
- BRW_STATE_URB_FENCE = BRW_MAX_CACHE,
- BRW_STATE_FRAGMENT_PROGRAM,
- BRW_STATE_GEOMETRY_PROGRAM,
- BRW_STATE_TESS_PROGRAMS,
- BRW_STATE_VERTEX_PROGRAM,
- BRW_STATE_REDUCED_PRIMITIVE,
- BRW_STATE_PATCH_PRIMITIVE,
- BRW_STATE_PRIMITIVE,
- BRW_STATE_CONTEXT,
- BRW_STATE_PSP,
- BRW_STATE_SURFACES,
- BRW_STATE_BINDING_TABLE_POINTERS,
- BRW_STATE_INDICES,
- BRW_STATE_VERTICES,
- BRW_STATE_DEFAULT_TESS_LEVELS,
- BRW_STATE_BATCH,
- BRW_STATE_INDEX_BUFFER,
- BRW_STATE_VS_CONSTBUF,
- BRW_STATE_TCS_CONSTBUF,
- BRW_STATE_TES_CONSTBUF,
- BRW_STATE_GS_CONSTBUF,
- BRW_STATE_PROGRAM_CACHE,
- BRW_STATE_STATE_BASE_ADDRESS,
- BRW_STATE_VUE_MAP_GEOM_OUT,
- BRW_STATE_TRANSFORM_FEEDBACK,
- BRW_STATE_RASTERIZER_DISCARD,
- BRW_STATE_STATS_WM,
- BRW_STATE_UNIFORM_BUFFER,
- BRW_STATE_IMAGE_UNITS,
- BRW_STATE_META_IN_PROGRESS,
- BRW_STATE_PUSH_CONSTANT_ALLOCATION,
- BRW_STATE_NUM_SAMPLES,
- BRW_STATE_TEXTURE_BUFFER,
- BRW_STATE_GFX4_UNIT_STATE,
- BRW_STATE_CC_VP,
- BRW_STATE_SF_VP,
- BRW_STATE_CLIP_VP,
- BRW_STATE_SAMPLER_STATE_TABLE,
- BRW_STATE_VS_ATTRIB_WORKAROUNDS,
- BRW_STATE_COMPUTE_PROGRAM,
- BRW_STATE_CS_WORK_GROUPS,
- BRW_STATE_URB_SIZE,
- BRW_STATE_CC_STATE,
- BRW_STATE_BLORP,
- BRW_STATE_VIEWPORT_COUNT,
- BRW_STATE_CONSERVATIVE_RASTERIZATION,
- BRW_STATE_DRAW_CALL,
- BRW_STATE_AUX,
- BRW_NUM_STATE_BITS
-};
-
-/**
- * BRW_NEW_*_PROG_DATA and BRW_NEW_*_PROGRAM are similar, but distinct.
- *
- * BRW_NEW_*_PROGRAM relates to the gl_shader_program/gl_program structures.
- * When the currently bound shader program differs from the previous draw
- * call, these will be flagged. They cover brw->{stage}_program and
- * ctx->{Stage}Program->_Current.
- *
- * BRW_NEW_*_PROG_DATA is flagged when the effective shaders change, from a
- * driver perspective. Even if the same shader is bound at the API level,
- * we may need to switch between multiple versions of that shader to handle
- * changes in non-orthagonal state.
- *
- * Additionally, multiple shader programs may have identical vertex shaders
- * (for example), or compile down to the same code in the backend. We combine
- * those into a single program cache entry.
- *
- * BRW_NEW_*_PROG_DATA occurs when switching program cache entries, which
- * covers the brw_*_prog_data structures, and brw->*.prog_offset.
- */
-#define BRW_NEW_FS_PROG_DATA (1ull << BRW_CACHE_FS_PROG)
-/* XXX: The BRW_NEW_BLORP_BLIT_PROG_DATA dirty bit is unused (as BLORP doesn't
- * use the normal state upload paths), but the cache is still used. To avoid
- * polluting the brw_program_cache code with special cases, we retain the
- * dirty bit for now. It should eventually be removed.
- */
-#define BRW_NEW_BLORP_BLIT_PROG_DATA (1ull << BRW_CACHE_BLORP_PROG)
-#define BRW_NEW_SF_PROG_DATA (1ull << BRW_CACHE_SF_PROG)
-#define BRW_NEW_VS_PROG_DATA (1ull << BRW_CACHE_VS_PROG)
-#define BRW_NEW_FF_GS_PROG_DATA (1ull << BRW_CACHE_FF_GS_PROG)
-#define BRW_NEW_GS_PROG_DATA (1ull << BRW_CACHE_GS_PROG)
-#define BRW_NEW_TCS_PROG_DATA (1ull << BRW_CACHE_TCS_PROG)
-#define BRW_NEW_TES_PROG_DATA (1ull << BRW_CACHE_TES_PROG)
-#define BRW_NEW_CLIP_PROG_DATA (1ull << BRW_CACHE_CLIP_PROG)
-#define BRW_NEW_CS_PROG_DATA (1ull << BRW_CACHE_CS_PROG)
-#define BRW_NEW_URB_FENCE (1ull << BRW_STATE_URB_FENCE)
-#define BRW_NEW_FRAGMENT_PROGRAM (1ull << BRW_STATE_FRAGMENT_PROGRAM)
-#define BRW_NEW_GEOMETRY_PROGRAM (1ull << BRW_STATE_GEOMETRY_PROGRAM)
-#define BRW_NEW_TESS_PROGRAMS (1ull << BRW_STATE_TESS_PROGRAMS)
-#define BRW_NEW_VERTEX_PROGRAM (1ull << BRW_STATE_VERTEX_PROGRAM)
-#define BRW_NEW_REDUCED_PRIMITIVE (1ull << BRW_STATE_REDUCED_PRIMITIVE)
-#define BRW_NEW_PATCH_PRIMITIVE (1ull << BRW_STATE_PATCH_PRIMITIVE)
-#define BRW_NEW_PRIMITIVE (1ull << BRW_STATE_PRIMITIVE)
-#define BRW_NEW_CONTEXT (1ull << BRW_STATE_CONTEXT)
-#define BRW_NEW_PSP (1ull << BRW_STATE_PSP)
-#define BRW_NEW_SURFACES (1ull << BRW_STATE_SURFACES)
-#define BRW_NEW_BINDING_TABLE_POINTERS (1ull << BRW_STATE_BINDING_TABLE_POINTERS)
-#define BRW_NEW_INDICES (1ull << BRW_STATE_INDICES)
-#define BRW_NEW_VERTICES (1ull << BRW_STATE_VERTICES)
-#define BRW_NEW_DEFAULT_TESS_LEVELS (1ull << BRW_STATE_DEFAULT_TESS_LEVELS)
-/**
- * Used for any batch entry with a relocated pointer that will be used
- * by any 3D rendering.
- */
-#define BRW_NEW_BATCH (1ull << BRW_STATE_BATCH)
-/** \see brw.state.depth_region */
-#define BRW_NEW_INDEX_BUFFER (1ull << BRW_STATE_INDEX_BUFFER)
-#define BRW_NEW_VS_CONSTBUF (1ull << BRW_STATE_VS_CONSTBUF)
-#define BRW_NEW_TCS_CONSTBUF (1ull << BRW_STATE_TCS_CONSTBUF)
-#define BRW_NEW_TES_CONSTBUF (1ull << BRW_STATE_TES_CONSTBUF)
-#define BRW_NEW_GS_CONSTBUF (1ull << BRW_STATE_GS_CONSTBUF)
-#define BRW_NEW_PROGRAM_CACHE (1ull << BRW_STATE_PROGRAM_CACHE)
-#define BRW_NEW_STATE_BASE_ADDRESS (1ull << BRW_STATE_STATE_BASE_ADDRESS)
-#define BRW_NEW_VUE_MAP_GEOM_OUT (1ull << BRW_STATE_VUE_MAP_GEOM_OUT)
-#define BRW_NEW_VIEWPORT_COUNT (1ull << BRW_STATE_VIEWPORT_COUNT)
-#define BRW_NEW_TRANSFORM_FEEDBACK (1ull << BRW_STATE_TRANSFORM_FEEDBACK)
-#define BRW_NEW_RASTERIZER_DISCARD (1ull << BRW_STATE_RASTERIZER_DISCARD)
-#define BRW_NEW_STATS_WM (1ull << BRW_STATE_STATS_WM)
-#define BRW_NEW_UNIFORM_BUFFER (1ull << BRW_STATE_UNIFORM_BUFFER)
-#define BRW_NEW_IMAGE_UNITS (1ull << BRW_STATE_IMAGE_UNITS)
-#define BRW_NEW_META_IN_PROGRESS (1ull << BRW_STATE_META_IN_PROGRESS)
-#define BRW_NEW_PUSH_CONSTANT_ALLOCATION (1ull << BRW_STATE_PUSH_CONSTANT_ALLOCATION)
-#define BRW_NEW_NUM_SAMPLES (1ull << BRW_STATE_NUM_SAMPLES)
-#define BRW_NEW_TEXTURE_BUFFER (1ull << BRW_STATE_TEXTURE_BUFFER)
-#define BRW_NEW_GFX4_UNIT_STATE (1ull << BRW_STATE_GFX4_UNIT_STATE)
-#define BRW_NEW_CC_VP (1ull << BRW_STATE_CC_VP)
-#define BRW_NEW_SF_VP (1ull << BRW_STATE_SF_VP)
-#define BRW_NEW_CLIP_VP (1ull << BRW_STATE_CLIP_VP)
-#define BRW_NEW_SAMPLER_STATE_TABLE (1ull << BRW_STATE_SAMPLER_STATE_TABLE)
-#define BRW_NEW_VS_ATTRIB_WORKAROUNDS (1ull << BRW_STATE_VS_ATTRIB_WORKAROUNDS)
-#define BRW_NEW_COMPUTE_PROGRAM (1ull << BRW_STATE_COMPUTE_PROGRAM)
-#define BRW_NEW_CS_WORK_GROUPS (1ull << BRW_STATE_CS_WORK_GROUPS)
-#define BRW_NEW_URB_SIZE (1ull << BRW_STATE_URB_SIZE)
-#define BRW_NEW_CC_STATE (1ull << BRW_STATE_CC_STATE)
-#define BRW_NEW_BLORP (1ull << BRW_STATE_BLORP)
-#define BRW_NEW_CONSERVATIVE_RASTERIZATION (1ull << BRW_STATE_CONSERVATIVE_RASTERIZATION)
-#define BRW_NEW_DRAW_CALL (1ull << BRW_STATE_DRAW_CALL)
-#define BRW_NEW_AUX_STATE (1ull << BRW_STATE_AUX)
-
-struct brw_state_flags {
- /** State update flags signalled by mesa internals */
- GLuint mesa;
- /**
- * State update flags signalled as the result of brw_tracked_state updates
- */
- uint64_t brw;
-};
-
-
-/** Subclass of Mesa program */
-struct brw_program {
- struct gl_program program;
- GLuint id;
-
- bool compiled_once;
-};
-
-/** Number of texture sampler units */
-#define BRW_MAX_TEX_UNIT 32
-
-/** Max number of UBOs in a shader */
-#define BRW_MAX_UBO 14
-
-/** Max number of SSBOs in a shader */
-#define BRW_MAX_SSBO 12
-
-/** Max number of atomic counter buffer objects in a shader */
-#define BRW_MAX_ABO 16
-
-/** Max number of image uniforms in a shader */
-#define BRW_MAX_IMAGES 32
-
-/** Maximum number of actual buffers used for stream output */
-#define BRW_MAX_SOL_BUFFERS 4
-
-#define BRW_MAX_SURFACES (BRW_MAX_DRAW_BUFFERS + \
- BRW_MAX_TEX_UNIT * 2 + /* normal, gather */ \
- BRW_MAX_UBO + \
- BRW_MAX_SSBO + \
- BRW_MAX_ABO + \
- BRW_MAX_IMAGES + \
- 2 + /* shader time, pull constants */ \
- 1 /* cs num work groups */)
-
-struct brw_cache {
- struct brw_context *brw;
-
- struct brw_cache_item **items;
- struct brw_bo *bo;
- void *map;
- GLuint size, n_items;
-
- uint32_t next_offset;
-};
-
-#define perf_debug(...) do { \
- static GLuint msg_id = 0; \
- if (INTEL_DEBUG(DEBUG_PERF)) \
- dbg_printf(__VA_ARGS__); \
- if (brw->perf_debug) \
- _mesa_gl_debugf(&brw->ctx, &msg_id, \
- MESA_DEBUG_SOURCE_API, \
- MESA_DEBUG_TYPE_PERFORMANCE, \
- MESA_DEBUG_SEVERITY_MEDIUM, \
- __VA_ARGS__); \
-} while(0)
-
-#define WARN_ONCE(cond, fmt...) do { \
- if (unlikely(cond)) { \
- static bool _warned = false; \
- static GLuint msg_id = 0; \
- if (!_warned) { \
- fprintf(stderr, "WARNING: "); \
- fprintf(stderr, fmt); \
- _warned = true; \
- \
- _mesa_gl_debugf(ctx, &msg_id, \
- MESA_DEBUG_SOURCE_API, \
- MESA_DEBUG_TYPE_OTHER, \
- MESA_DEBUG_SEVERITY_HIGH, fmt); \
- } \
- } \
-} while (0)
-
-/* Considered adding a member to this struct to document which flags
- * an update might raise so that ordering of the state atoms can be
- * checked or derived at runtime. Dropped the idea in favor of having
- * a debug mode where the state is monitored for flags which are
- * raised that have already been tested against.
- */
-struct brw_tracked_state {
- struct brw_state_flags dirty;
- void (*emit)( struct brw_context *brw );
-};
-
-enum shader_time_shader_type {
- ST_NONE,
- ST_VS,
- ST_TCS,
- ST_TES,
- ST_GS,
- ST_FS8,
- ST_FS16,
- ST_FS32,
- ST_CS,
-};
-
-struct brw_vertex_buffer {
- /** Buffer object containing the uploaded vertex data */
- struct brw_bo *bo;
- uint32_t offset;
- uint32_t size;
- /** Byte stride between elements in the uploaded array */
- GLuint stride;
- GLuint step_rate;
-};
-struct brw_vertex_element {
- const struct gl_vertex_format *glformat;
-
- int buffer;
- bool is_dual_slot;
- /** Offset of the first element within the buffer object */
- unsigned int offset;
-};
-
-struct brw_query_object {
- struct gl_query_object Base;
-
- /** Last query BO associated with this query. */
- struct brw_bo *bo;
-
- /** Last index in bo with query data for this object. */
- int last_index;
-
- /** True if we know the batch has been flushed since we ended the query. */
- bool flushed;
-};
-
-struct brw_reloc_list {
- struct drm_i915_gem_relocation_entry *relocs;
- int reloc_count;
- int reloc_array_size;
-};
-
-struct brw_growing_bo {
- struct brw_bo *bo;
- uint32_t *map;
- struct brw_bo *partial_bo;
- uint32_t *partial_bo_map;
- unsigned partial_bytes;
- enum brw_memory_zone memzone;
-};
-
-struct brw_batch {
- /** Current batchbuffer being queued up. */
- struct brw_growing_bo batch;
- /** Current statebuffer being queued up. */
- struct brw_growing_bo state;
-
- /** Last batchbuffer submitted to the hardware. Used for glFinish(). */
- struct brw_bo *last_bo;
-
-#ifdef DEBUG
- uint16_t emit, total;
-#endif
- uint32_t *map_next;
- uint32_t state_used;
-
- bool use_shadow_copy;
- bool use_batch_first;
- bool needs_sol_reset;
- bool state_base_address_emitted;
- bool no_wrap;
- bool contains_fence_signal;
-
- struct brw_reloc_list batch_relocs;
- struct brw_reloc_list state_relocs;
- unsigned int valid_reloc_flags;
-
- /** The validation list */
- struct drm_i915_gem_exec_object2 *validation_list;
- struct brw_bo **exec_bos;
- int exec_count;
- int exec_array_size;
-
- /** The amount of aperture space (in bytes) used by all exec_bos */
- uint64_t aperture_space;
-
- struct {
- uint32_t *map_next;
- int batch_reloc_count;
- int state_reloc_count;
- int exec_count;
- } saved;
-
- /** Map from batch offset to brw_state_batch data (with DEBUG_BATCH) */
- struct hash_table_u64 *state_batch_sizes;
-
- struct intel_batch_decode_ctx decoder;
-
- /** A list of drm_i915_exec_fences to have execbuf signal or wait on */
- struct util_dynarray exec_fences;
-};
-
-#define BRW_MAX_XFB_STREAMS 4
-
-struct brw_transform_feedback_counter {
- /**
- * Index of the first entry of this counter within the primitive count BO.
- * An entry is considered to be an N-tuple of 64bit values, where N is the
- * number of vertex streams supported by the platform.
- */
- unsigned bo_start;
-
- /**
- * Index one past the last entry of this counter within the primitive
- * count BO.
- */
- unsigned bo_end;
-
- /**
- * Primitive count values accumulated while this counter was active,
- * excluding any entries buffered between \c bo_start and \c bo_end, which
- * haven't been accounted for yet.
- */
- uint64_t accum[BRW_MAX_XFB_STREAMS];
-};
-
-static inline void
-brw_reset_transform_feedback_counter(
- struct brw_transform_feedback_counter *counter)
-{
- counter->bo_start = counter->bo_end;
- memset(&counter->accum, 0, sizeof(counter->accum));
-}
-
-struct brw_transform_feedback_object {
- struct gl_transform_feedback_object base;
-
- /** A buffer to hold SO_WRITE_OFFSET(n) values while paused. */
- struct brw_bo *offset_bo;
-
- /** If true, SO_WRITE_OFFSET(n) should be reset to zero at next use. */
- bool zero_offsets;
-
- /** The most recent primitive mode (GL_TRIANGLES/GL_POINTS/GL_LINES). */
- GLenum primitive_mode;
-
- /**
- * The maximum number of vertices that we can write without overflowing
- * any of the buffers currently being used for transform feedback.
- */
- unsigned max_index;
-
- struct brw_bo *prim_count_bo;
-
- /**
- * Count of primitives generated during this transform feedback operation.
- */
- struct brw_transform_feedback_counter counter;
-
- /**
- * Count of primitives generated during the previous transform feedback
- * operation. Used to implement DrawTransformFeedback().
- */
- struct brw_transform_feedback_counter previous_counter;
-
- /**
- * Number of vertices written between last Begin/EndTransformFeedback().
- *
- * Used to implement DrawTransformFeedback().
- */
- uint64_t vertices_written[BRW_MAX_XFB_STREAMS];
- bool vertices_written_valid;
-};
-
-/**
- * Data shared between each programmable stage in the pipeline (vs, gs, and
- * wm).
- */
-struct brw_stage_state
-{
- gl_shader_stage stage;
- struct brw_stage_prog_data *prog_data;
-
- /**
- * Optional scratch buffer used to store spilled register values and
- * variably-indexed GRF arrays.
- *
- * The contents of this buffer are short-lived so the same memory can be
- * re-used at will for multiple shader programs (executed by the same fixed
- * function). However reusing a scratch BO for which shader invocations
- * are still in flight with a per-thread scratch slot size other than the
- * original can cause threads with different scratch slot size and FFTID
- * (which may be executed in parallel depending on the shader stage and
- * hardware generation) to map to an overlapping region of the scratch
- * space, which can potentially lead to mutual scratch space corruption.
- * For that reason if you borrow this scratch buffer you should only be
- * using the slot size given by the \c per_thread_scratch member below,
- * unless you're taking additional measures to synchronize thread execution
- * across slot size changes.
- */
- struct brw_bo *scratch_bo;
-
- /**
- * Scratch slot size allocated for each thread in the buffer object given
- * by \c scratch_bo.
- */
- uint32_t per_thread_scratch;
-
- /** Offset in the program cache to the program */
- uint32_t prog_offset;
-
- /** Offset in the batchbuffer to Gfx4-5 pipelined state (VS/WM/GS_STATE). */
- uint32_t state_offset;
-
- struct brw_bo *push_const_bo; /* NULL if using the batchbuffer */
- uint32_t push_const_offset; /* Offset in the push constant BO or batch */
- int push_const_size; /* in 256-bit register increments */
-
- /* Binding table: pointers to SURFACE_STATE entries. */
- uint32_t bind_bo_offset;
- uint32_t surf_offset[BRW_MAX_SURFACES];
-
- /** SAMPLER_STATE count and table offset */
- uint32_t sampler_count;
- uint32_t sampler_offset;
-
- struct brw_image_param image_param[BRW_MAX_IMAGES];
-
- /** Need to re-emit 3DSTATE_CONSTANT_XS? */
- bool push_constants_dirty;
-};
-
-enum brw_predicate_state {
- /* The first two states are used if we can determine whether to draw
- * without having to look at the values in the query object buffer. This
- * will happen if there is no conditional render in progress, if the query
- * object is already completed or if something else has already added
- * samples to the preliminary result such as via a BLT command.
- */
- BRW_PREDICATE_STATE_RENDER,
- BRW_PREDICATE_STATE_DONT_RENDER,
- /* In this case whether to draw or not depends on the result of an
- * MI_PREDICATE command so the predicate enable bit needs to be checked.
- */
- BRW_PREDICATE_STATE_USE_BIT,
- /* In this case, either MI_PREDICATE doesn't exist or we lack the
- * necessary kernel features to use it. Stall for the query result.
- */
- BRW_PREDICATE_STATE_STALL_FOR_QUERY,
-};
-
-struct shader_times;
-
-struct intel_l3_config;
-struct intel_perf;
-
-struct brw_uploader {
- struct brw_bufmgr *bufmgr;
- struct brw_bo *bo;
- void *map;
- uint32_t next_offset;
- unsigned default_size;
-};
-
-/**
- * brw_context is derived from gl_context.
- */
-struct brw_context
-{
- struct gl_context ctx; /**< base class, must be first field */
-
- struct
- {
- /**
- * Emit an MI_REPORT_PERF_COUNT command packet.
- *
- * This asks the GPU to write a report of the current OA counter values
- * into @bo at the given offset and containing the given @report_id
- * which we can cross-reference when parsing the report (gfx7+ only).
- */
- void (*emit_mi_report_perf_count)(struct brw_context *brw,
- struct brw_bo *bo,
- uint32_t offset_in_bytes,
- uint32_t report_id);
-
- void (*emit_compute_walker)(struct brw_context *brw);
- void (*emit_raw_pipe_control)(struct brw_context *brw, uint32_t flags,
- struct brw_bo *bo, uint32_t offset,
- uint64_t imm);
- void (*emit_state_base_address)(struct brw_context *brw);
- } vtbl;
-
- struct brw_bufmgr *bufmgr;
-
- uint32_t hw_ctx;
-
- /**
- * BO for post-sync nonzero writes for gfx6 workaround.
- *
- * This buffer also contains a marker + description of the driver. This
- * buffer is added to all execbufs syscalls so that we can identify the
- * driver that generated a hang by looking at the content of the buffer in
- * the error state.
- *
- * Read/write should go at workaround_bo_offset in that buffer to avoid
- * overriding the debug data.
- */
- struct brw_bo *workaround_bo;
- uint32_t workaround_bo_offset;
- uint8_t pipe_controls_since_last_cs_stall;
-
- /**
- * Set of struct brw_bo * that have been rendered to within this batchbuffer
- * and would need flushing before being used from another cache domain that
- * isn't coherent with it (i.e. the sampler).
- */
- struct hash_table *render_cache;
-
- /**
- * Set of struct brw_bo * that have been used as a depth buffer within this
- * batchbuffer and would need flushing before being used from another cache
- * domain that isn't coherent with it (i.e. the sampler).
- */
- struct set *depth_cache;
-
- /**
- * Number of resets observed in the system at context creation.
- *
- * This is tracked in the context so that we can determine that another
- * reset has occurred.
- */
- uint32_t reset_count;
-
- struct brw_batch batch;
-
- struct brw_uploader upload;
-
- /**
- * Set if rendering has occurred to the drawable's front buffer.
- *
- * This is used in the DRI2 case to detect that glFlush should also copy
- * the contents of the fake front buffer to the real front buffer.
- */
- bool front_buffer_dirty;
-
- /**
- * True if the __DRIdrawable's current __DRIimageBufferMask is
- * __DRI_IMAGE_BUFFER_SHARED.
- */
- bool is_shared_buffer_bound;
-
- /**
- * True if a shared buffer is bound and it has received any rendering since
- * the previous __DRImutableRenderBufferLoaderExtension::displaySharedBuffer().
- */
- bool is_shared_buffer_dirty;
-
- /** Framerate throttling: @{ */
- struct brw_bo *throttle_batch[2];
-
- /* Limit the number of outstanding SwapBuffers by waiting for an earlier
- * frame of rendering to complete. This gives a very precise cap to the
- * latency between input and output such that rendering never gets more
- * than a frame behind the user. (With the caveat that we technically are
- * not using the SwapBuffers itself as a barrier but the first batch
- * submitted afterwards, which may be immediately prior to the next
- * SwapBuffers.)
- */
- bool need_swap_throttle;
-
- /** General throttling, not caught by throttling between SwapBuffers */
- bool need_flush_throttle;
- /** @} */
-
- GLuint stats_wm;
-
- /**
- * drirc options:
- * @{
- */
- bool always_flush_batch;
- bool always_flush_cache;
- bool disable_throttling;
- bool precompile;
- bool dual_color_blend_by_location;
- /** @} */
-
- GLuint primitive; /**< Hardware primitive, such as _3DPRIM_TRILIST. */
-
- bool object_preemption; /**< Object level preemption enabled. */
-
- GLenum reduced_primitive;
-
- /**
- * Set if we're either a debug context or the INTEL_DEBUG=perf environment
- * variable is set, this is the flag indicating to do expensive work that
- * might lead to a perf_debug() call.
- */
- bool perf_debug;
-
- uint64_t max_gtt_map_object_size;
-
- bool has_hiz;
- bool has_separate_stencil;
-
- bool can_push_ubos;
-
- /** Derived stencil states. */
- bool stencil_enabled;
- bool stencil_two_sided;
- bool stencil_write_enabled;
- /** Derived polygon state. */
- bool polygon_front_bit; /**< 0=GL_CCW, 1=GL_CW */
-
- struct isl_device isl_dev;
-
- struct blorp_context blorp;
-
- GLuint NewGLState;
- struct {
- struct brw_state_flags pipelines[BRW_NUM_PIPELINES];
- } state;
-
- enum brw_pipeline last_pipeline;
-
- struct brw_cache cache;
-
- /* Whether a meta-operation is in progress. */
- bool meta_in_progress;
-
- /* Whether the last depth/stencil packets were both NULL. */
- bool no_depth_or_stencil;
-
- /* The last PMA stall bits programmed. */
- uint32_t pma_stall_bits;
-
- /* Whether INTEL_black_render is active. */
- bool frontend_noop;
-
- struct {
- struct {
- /**
- * Either the value of gl_BaseVertex for indexed draw calls or the
- * value of the argument <first> for non-indexed draw calls for the
- * current _mesa_prim.
- */
- int firstvertex;
-
- /** The value of gl_BaseInstance for the current _mesa_prim. */
- int gl_baseinstance;
- } params;
-
- /**
- * Buffer and offset used for GL_ARB_shader_draw_parameters which will
- * point to the indirect buffer for indirect draw calls.
- */
- struct brw_bo *draw_params_bo;
- uint32_t draw_params_offset;
-
- struct {
- /**
- * The value of gl_DrawID for the current _mesa_prim. This always comes
- * in from it's own vertex buffer since it's not part of the indirect
- * draw parameters.
- */
- int gl_drawid;
-
- /**
- * Stores if the current _mesa_prim is an indexed or non-indexed draw
- * (~0/0). Useful to calculate gl_BaseVertex as an AND of firstvertex
- * and is_indexed_draw.
- */
- int is_indexed_draw;
- } derived_params;
-
- /**
- * Buffer and offset used for GL_ARB_shader_draw_parameters which contains
- * parameters that are not present in the indirect buffer. They will go in
- * their own vertex element.
- */
- struct brw_bo *derived_draw_params_bo;
- uint32_t derived_draw_params_offset;
-
- /**
- * Pointer to the the buffer storing the indirect draw parameters. It
- * currently only stores the number of requested draw calls but more
- * parameters could potentially be added.
- */
- struct brw_bo *draw_params_count_bo;
- uint32_t draw_params_count_offset;
-
- /**
- * Draw indirect buffer.
- */
- unsigned draw_indirect_stride;
- GLsizeiptr draw_indirect_offset;
- struct gl_buffer_object *draw_indirect_data;
- } draw;
-
- struct {
- /**
- * For gl_NumWorkGroups: If num_work_groups_bo is non NULL, then it is
- * an indirect call, and num_work_groups_offset is valid. Otherwise,
- * num_work_groups is set based on glDispatchCompute.
- */
- struct brw_bo *num_work_groups_bo;
- GLintptr num_work_groups_offset;
- const GLuint *num_work_groups;
- /**
- * This is only used alongside ARB_compute_variable_group_size when the
- * local work group size is variable, otherwise it's NULL.
- */
- const GLuint *group_size;
- } compute;
-
- struct {
- struct brw_vertex_element inputs[VERT_ATTRIB_MAX];
- struct brw_vertex_buffer buffers[VERT_ATTRIB_MAX];
-
- struct brw_vertex_element *enabled[VERT_ATTRIB_MAX];
- GLuint nr_enabled;
- GLuint nr_buffers;
-
- /* Summary of size and varying of active arrays, so we can check
- * for changes to this state:
- */
- bool index_bounds_valid;
- unsigned int min_index, max_index;
-
- /* Offset from start of vertex buffer so we can avoid redefining
- * the same VB packed over and over again.
- */
- unsigned int start_vertex_bias;
-
- /**
- * Certain vertex attribute formats aren't natively handled by the
- * hardware and require special VS code to fix up their values.
- *
- * These bitfields indicate which workarounds are needed.
- */
- uint8_t attrib_wa_flags[VERT_ATTRIB_MAX];
-
- /* High bits of the last seen vertex buffer address (for workarounds). */
- uint16_t last_bo_high_bits[33];
- } vb;
-
- struct {
- /**
- * Index buffer for this draw_prims call.
- *
- * Updates are signaled by BRW_NEW_INDICES.
- */
- const struct _mesa_index_buffer *ib;
-
- /* Updates are signaled by BRW_NEW_INDEX_BUFFER. */
- struct brw_bo *bo;
- uint32_t size;
- unsigned index_size;
-
- /* Offset to index buffer index to use in CMD_3D_PRIM so that we can
- * avoid re-uploading the IB packet over and over if we're actually
- * referencing the same index buffer.
- */
- unsigned int start_vertex_offset;
-
- /* High bits of the last seen index buffer address (for workarounds). */
- uint16_t last_bo_high_bits;
-
- /* Used to understand is GPU state of primitive restart is up to date */
- bool enable_cut_index;
- } ib;
-
- /* Active vertex program:
- */
- struct gl_program *programs[MESA_SHADER_STAGES];
-
- /**
- * Number of samples in ctx->DrawBuffer, updated by BRW_NEW_NUM_SAMPLES so
- * that we don't have to reemit that state every time we change FBOs.
- */
- unsigned int num_samples;
-
- /* BRW_NEW_URB_ALLOCATIONS:
- */
- struct {
- GLuint vsize; /* vertex size plus header in urb registers */
- GLuint gsize; /* GS output size in urb registers */
- GLuint hsize; /* Tessellation control output size in urb registers */
- GLuint dsize; /* Tessellation evaluation output size in urb registers */
- GLuint csize; /* constant buffer size in urb registers */
- GLuint sfsize; /* setup data size in urb registers */
-
- bool constrained;
-
- GLuint nr_vs_entries;
- GLuint nr_hs_entries;
- GLuint nr_ds_entries;
- GLuint nr_gs_entries;
- GLuint nr_clip_entries;
- GLuint nr_sf_entries;
- GLuint nr_cs_entries;
-
- GLuint vs_start;
- GLuint hs_start;
- GLuint ds_start;
- GLuint gs_start;
- GLuint clip_start;
- GLuint sf_start;
- GLuint cs_start;
- /**
- * URB size in the current configuration. The units this is expressed
- * in are somewhat inconsistent, see intel_device_info::urb::size.
- *
- * FINISHME: Represent the URB size consistently in KB on all platforms.
- */
- GLuint size;
-
- /* True if the most recently sent _3DSTATE_URB message allocated
- * URB space for the GS.
- */
- bool gs_present;
-
- /* True if the most recently sent _3DSTATE_URB message allocated
- * URB space for the HS and DS.
- */
- bool tess_present;
- } urb;
-
-
- /* BRW_NEW_PUSH_CONSTANT_ALLOCATION */
- struct {
- GLuint wm_start; /**< pos of first wm const in CURBE buffer */
- GLuint wm_size; /**< number of float[4] consts, multiple of 16 */
- GLuint clip_start;
- GLuint clip_size;
- GLuint vs_start;
- GLuint vs_size;
- GLuint total_size;
-
- /**
- * Pointer to the (intel_upload.c-generated) BO containing the uniforms
- * for upload to the CURBE.
- */
- struct brw_bo *curbe_bo;
- /** Offset within curbe_bo of space for current curbe entry */
- GLuint curbe_offset;
- } curbe;
-
- /**
- * Layout of vertex data exiting the geometry portion of the pipleine.
- * This comes from the last enabled shader stage (GS, DS, or VS).
- *
- * BRW_NEW_VUE_MAP_GEOM_OUT is flagged when the VUE map changes.
- */
- struct brw_vue_map vue_map_geom_out;
-
- struct {
- struct brw_stage_state base;
- } vs;
-
- struct {
- struct brw_stage_state base;
- } tcs;
-
- struct {
- struct brw_stage_state base;
- } tes;
-
- struct {
- struct brw_stage_state base;
-
- /**
- * True if the 3DSTATE_GS command most recently emitted to the 3D
- * pipeline enabled the GS; false otherwise.
- */
- bool enabled;
- } gs;
-
- struct {
- struct brw_ff_gs_prog_data *prog_data;
-
- bool prog_active;
- /** Offset in the program cache to the CLIP program pre-gfx6 */
- uint32_t prog_offset;
- uint32_t state_offset;
-
- uint32_t bind_bo_offset;
- /**
- * Surface offsets for the binding table. We only need surfaces to
- * implement transform feedback so BRW_MAX_SOL_BINDINGS is all that we
- * need in this case.
- */
- uint32_t surf_offset[BRW_MAX_SOL_BINDINGS];
- } ff_gs;
-
- struct {
- struct brw_clip_prog_data *prog_data;
-
- /** Offset in the program cache to the CLIP program pre-gfx6 */
- uint32_t prog_offset;
-
- /* Offset in the batch to the CLIP state on pre-gfx6. */
- uint32_t state_offset;
-
- /* As of gfx6, this is the offset in the batch to the CLIP VP,
- * instead of vp_bo.
- */
- uint32_t vp_offset;
-
- /**
- * The number of viewports to use. If gl_ViewportIndex is written,
- * we can have up to ctx->Const.MaxViewports viewports. If not,
- * the viewport index is always 0, so we can only emit one.
- */
- uint8_t viewport_count;
- } clip;
-
-
- struct {
- struct brw_sf_prog_data *prog_data;
-
- /** Offset in the program cache to the CLIP program pre-gfx6 */
- uint32_t prog_offset;
- uint32_t state_offset;
- uint32_t vp_offset;
- } sf;
-
- struct {
- struct brw_stage_state base;
-
- /**
- * Buffer object used in place of multisampled null render targets on
- * Gfx6. See brw_emit_null_surface_state().
- */
- struct brw_bo *multisampled_null_render_target_bo;
-
- float offset_clamp;
- } wm;
-
- struct {
- struct brw_stage_state base;
- } cs;
-
- struct {
- uint32_t state_offset;
- uint32_t blend_state_offset;
- uint32_t depth_stencil_state_offset;
- uint32_t vp_offset;
- } cc;
-
- struct {
- struct brw_query_object *obj;
- bool begin_emitted;
- } query;
-
- struct {
- enum brw_predicate_state state;
- bool supported;
- } predicate;
-
- struct intel_perf_context *perf_ctx;
-
- int num_atoms[BRW_NUM_PIPELINES];
- const struct brw_tracked_state render_atoms[76];
- const struct brw_tracked_state compute_atoms[11];
-
- const enum isl_format *mesa_to_isl_render_format;
- const bool *mesa_format_supports_render;
-
- /* PrimitiveRestart */
- struct {
- bool in_progress;
- bool enable_cut_index;
- unsigned restart_index;
- } prim_restart;
-
- /** Computed depth/stencil/hiz state from the current attached
- * renderbuffers, valid only during the drawing state upload loop after
- * brw_workaround_depthstencil_alignment().
- */
- struct {
- /* Inter-tile (page-aligned) byte offsets. */
- uint32_t depth_offset;
- /* Intra-tile x,y offsets for drawing to combined depth-stencil. Only
- * used for Gen < 6.
- */
- uint32_t tile_x, tile_y;
- } depthstencil;
-
- uint32_t num_instances;
- int basevertex;
- int baseinstance;
-
- struct {
- const struct intel_l3_config *config;
- } l3;
-
- struct {
- struct brw_bo *bo;
- const char **names;
- int *ids;
- enum shader_time_shader_type *types;
- struct shader_times *cumulative;
- int num_entries;
- int max_entries;
- double report_time;
- } shader_time;
-
- struct brw_fast_clear_state *fast_clear_state;
-
- /* Array of aux usages to use for drawing. Aux usage for render targets is
- * a bit more complex than simply calling a single function so we need some
- * way of passing it form brw_draw.c to surface state setup.
- */
- enum isl_aux_usage draw_aux_usage[MAX_DRAW_BUFFERS];
-
- enum gfx9_astc5x5_wa_tex_type gfx9_astc5x5_wa_tex_mask;
-
- /** Last rendering scale argument provided to brw_emit_hashing_mode(). */
- unsigned current_hash_scale;
-
- __DRIcontext *driContext;
- struct brw_screen *screen;
- void *mem_ctx;
-};
-
-/* brw_clear.c */
-extern void brw_init_clear_functions(struct dd_function_table *functions);
-
-/*======================================================================
- * brw_context.c
- */
-extern const char *const brw_vendor_string;
-
-extern const char *
-brw_get_renderer_string(const struct brw_screen *screen);
-
-enum {
- DRI_CONF_BO_REUSE_DISABLED,
- DRI_CONF_BO_REUSE_ALL
-};
-
-void brw_update_renderbuffers(__DRIcontext *context,
- __DRIdrawable *drawable);
-void brw_prepare_render(struct brw_context *brw);
-
-void gfx9_apply_single_tex_astc5x5_wa(struct brw_context *brw,
- mesa_format format,
- enum isl_aux_usage aux_usage);
-
-void brw_predraw_resolve_inputs(struct brw_context *brw, bool rendering,
- bool *draw_aux_buffer_disabled);
-
-void brw_resolve_for_dri2_flush(struct brw_context *brw,
- __DRIdrawable *drawable);
-
-GLboolean brw_create_context(gl_api api,
- const struct gl_config *mesaVis,
- __DRIcontext *driContextPriv,
- const struct __DriverContextConfig *ctx_config,
- unsigned *error,
- void *sharedContextPrivate);
-
-/*======================================================================
- * brw_misc_state.c
- */
-void brw_workaround_depthstencil_alignment(struct brw_context *brw,
- GLbitfield clear_mask);
-void brw_emit_hashing_mode(struct brw_context *brw, unsigned width,
- unsigned height, unsigned scale);
-
-/* brw_object_purgeable.c */
-void brw_init_object_purgeable_functions(struct dd_function_table *functions);
-
-/*======================================================================
- * brw_queryobj.c
- */
-void brw_init_common_queryobj_functions(struct dd_function_table *functions);
-void gfx4_init_queryobj_functions(struct dd_function_table *functions);
-void brw_emit_query_begin(struct brw_context *brw);
-void brw_emit_query_end(struct brw_context *brw);
-void brw_query_counter(struct gl_context *ctx, struct gl_query_object *q);
-bool brw_is_query_pipelined(struct brw_query_object *query);
-uint64_t brw_raw_timestamp_delta(struct brw_context *brw,
- uint64_t time0, uint64_t time1);
-
-/** gfx6_queryobj.c */
-void gfx6_init_queryobj_functions(struct dd_function_table *functions);
-void brw_write_timestamp(struct brw_context *brw, struct brw_bo *bo, int idx);
-void brw_write_depth_count(struct brw_context *brw, struct brw_bo *bo, int idx);
-
-/** hsw_queryobj.c */
-void hsw_overflow_result_to_gpr0(struct brw_context *brw,
- struct brw_query_object *query,
- int count);
-void hsw_init_queryobj_functions(struct dd_function_table *functions);
-
-/** brw_conditional_render.c */
-void brw_init_conditional_render_functions(struct dd_function_table *functions);
-bool brw_check_conditional_render(struct brw_context *brw);
-
-/** brw_batch.c */
-void brw_load_register_mem(struct brw_context *brw,
- uint32_t reg,
- struct brw_bo *bo,
- uint32_t offset);
-void brw_load_register_mem64(struct brw_context *brw,
- uint32_t reg,
- struct brw_bo *bo,
- uint32_t offset);
-void brw_store_register_mem32(struct brw_context *brw,
- struct brw_bo *bo, uint32_t reg, uint32_t offset);
-void brw_store_register_mem64(struct brw_context *brw,
- struct brw_bo *bo, uint32_t reg, uint32_t offset);
-void brw_load_register_imm32(struct brw_context *brw,
- uint32_t reg, uint32_t imm);
-void brw_load_register_imm64(struct brw_context *brw,
- uint32_t reg, uint64_t imm);
-void brw_load_register_reg(struct brw_context *brw, uint32_t dst,
- uint32_t src);
-void brw_load_register_reg64(struct brw_context *brw, uint32_t dst,
- uint32_t src);
-void brw_store_data_imm32(struct brw_context *brw, struct brw_bo *bo,
- uint32_t offset, uint32_t imm);
-void brw_store_data_imm64(struct brw_context *brw, struct brw_bo *bo,
- uint32_t offset, uint64_t imm);
-
-/*======================================================================
- * intel_tex_validate.c
- */
-void brw_validate_textures( struct brw_context *brw );
-
-
-/*======================================================================
- * brw_program.c
- */
-void brw_init_frag_prog_functions(struct dd_function_table *functions);
-
-void brw_get_scratch_bo(struct brw_context *brw,
- struct brw_bo **scratch_bo, int size);
-void brw_alloc_stage_scratch(struct brw_context *brw,
- struct brw_stage_state *stage_state,
- unsigned per_thread_size);
-void brw_init_shader_time(struct brw_context *brw);
-int brw_get_shader_time_index(struct brw_context *brw,
- struct gl_program *prog,
- enum shader_time_shader_type type,
- bool is_glsl_sh);
-void brw_collect_and_report_shader_time(struct brw_context *brw);
-void brw_destroy_shader_time(struct brw_context *brw);
-
-/* brw_urb.c
- */
-void brw_calculate_urb_fence(struct brw_context *brw, unsigned csize,
- unsigned vsize, unsigned sfsize);
-void brw_upload_urb_fence(struct brw_context *brw);
-
-/* brw_curbe.c
- */
-void brw_upload_cs_urb_state(struct brw_context *brw);
-
-/* brw_vs.c */
-gl_clip_plane *brw_select_clip_planes(struct gl_context *ctx);
-
-/* brw_draw_upload.c */
-unsigned brw_get_vertex_surface_type(struct brw_context *brw,
- const struct gl_vertex_format *glformat);
-
-static inline unsigned
-brw_get_index_type(unsigned index_size)
-{
- /* The hw needs 0x00, 0x01, and 0x02 for ubyte, ushort, and uint,
- * respectively.
- */
- return index_size >> 1;
-}
-
-void brw_prepare_vertices(struct brw_context *brw);
-
-/* brw_wm_surface_state.c */
-void brw_update_buffer_texture_surface(struct gl_context *ctx,
- unsigned unit,
- uint32_t *surf_offset);
-void
-brw_update_sol_surface(struct brw_context *brw,
- struct gl_buffer_object *buffer_obj,
- uint32_t *out_offset, unsigned num_vector_components,
- unsigned stride_dwords, unsigned offset_dwords);
-void brw_upload_ubo_surfaces(struct brw_context *brw, struct gl_program *prog,
- struct brw_stage_state *stage_state,
- struct brw_stage_prog_data *prog_data);
-void brw_upload_image_surfaces(struct brw_context *brw,
- const struct gl_program *prog,
- struct brw_stage_state *stage_state,
- struct brw_stage_prog_data *prog_data);
-
-/* brw_surface_formats.c */
-void brw_screen_init_surface_formats(struct brw_screen *screen);
-void brw_init_surface_formats(struct brw_context *brw);
-bool brw_render_target_supported(struct brw_context *brw,
- struct gl_renderbuffer *rb);
-uint32_t brw_depth_format(struct brw_context *brw, mesa_format format);
-
-/* brw_performance_query.c */
-void brw_init_performance_queries(struct brw_context *brw);
-
-/* intel_extensions.c */
-extern void brw_init_extensions(struct gl_context *ctx);
-
-/* intel_state.c */
-extern int brw_translate_shadow_compare_func(GLenum func);
-extern int brw_translate_compare_func(GLenum func);
-extern int brw_translate_stencil_op(GLenum op);
-
-/* brw_sync.c */
-void brw_init_syncobj_functions(struct dd_function_table *functions);
-
-/* gfx6_sol.c */
-struct gl_transform_feedback_object *
-brw_new_transform_feedback(struct gl_context *ctx, GLuint name);
-void
-brw_delete_transform_feedback(struct gl_context *ctx,
- struct gl_transform_feedback_object *obj);
-void
-brw_begin_transform_feedback(struct gl_context *ctx, GLenum mode,
- struct gl_transform_feedback_object *obj);
-void
-brw_end_transform_feedback(struct gl_context *ctx,
- struct gl_transform_feedback_object *obj);
-void
-brw_pause_transform_feedback(struct gl_context *ctx,
- struct gl_transform_feedback_object *obj);
-void
-brw_resume_transform_feedback(struct gl_context *ctx,
- struct gl_transform_feedback_object *obj);
-void
-brw_save_primitives_written_counters(struct brw_context *brw,
- struct brw_transform_feedback_object *obj);
-GLsizei
-brw_get_transform_feedback_vertex_count(struct gl_context *ctx,
- struct gl_transform_feedback_object *obj,
- GLuint stream);
-
-/* gfx7_sol_state.c */
-void
-gfx7_begin_transform_feedback(struct gl_context *ctx, GLenum mode,
- struct gl_transform_feedback_object *obj);
-void
-gfx7_end_transform_feedback(struct gl_context *ctx,
- struct gl_transform_feedback_object *obj);
-void
-gfx7_pause_transform_feedback(struct gl_context *ctx,
- struct gl_transform_feedback_object *obj);
-void
-gfx7_resume_transform_feedback(struct gl_context *ctx,
- struct gl_transform_feedback_object *obj);
-
-/* hsw_sol.c */
-void
-hsw_begin_transform_feedback(struct gl_context *ctx, GLenum mode,
- struct gl_transform_feedback_object *obj);
-void
-hsw_end_transform_feedback(struct gl_context *ctx,
- struct gl_transform_feedback_object *obj);
-void
-hsw_pause_transform_feedback(struct gl_context *ctx,
- struct gl_transform_feedback_object *obj);
-void
-hsw_resume_transform_feedback(struct gl_context *ctx,
- struct gl_transform_feedback_object *obj);
-
-/* brw_blorp_blit.cpp */
-GLbitfield
-brw_blorp_framebuffer(struct brw_context *brw,
- struct gl_framebuffer *readFb,
- struct gl_framebuffer *drawFb,
- GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
- GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
- GLbitfield mask, GLenum filter);
-
-bool
-brw_blorp_copytexsubimage(struct brw_context *brw,
- struct gl_renderbuffer *src_rb,
- struct gl_texture_image *dst_image,
- int slice,
- int srcX0, int srcY0,
- int dstX0, int dstY0,
- int width, int height);
-
-/* brw_generate_mipmap.c */
-void brw_generate_mipmap(struct gl_context *ctx, GLenum target,
- struct gl_texture_object *tex_obj);
-
-void
-gfx6_get_sample_position(struct gl_context *ctx,
- struct gl_framebuffer *fb,
- GLuint index,
- GLfloat *result);
-
-/* gfx8_multisample_state.c */
-void gfx8_emit_3dstate_sample_pattern(struct brw_context *brw);
-
-/* gfx7_l3_state.c */
-void brw_emit_l3_state(struct brw_context *brw);
-
-/* gfx7_urb.c */
-void
-gfx7_emit_push_constant_state(struct brw_context *brw, unsigned vs_size,
- unsigned hs_size, unsigned ds_size,
- unsigned gs_size, unsigned fs_size);
-
-void
-gfx6_upload_urb(struct brw_context *brw, unsigned vs_size,
- bool gs_present, unsigned gs_size);
-void
-gfx7_upload_urb(struct brw_context *brw, unsigned vs_size,
- bool gs_present, bool tess_present);
-
-/* brw_reset.c */
-extern GLenum
-brw_get_graphics_reset_status(struct gl_context *ctx);
-void
-brw_check_for_reset(struct brw_context *brw);
-
-/* brw_compute.c */
-extern void
-brw_init_compute_functions(struct dd_function_table *functions);
-
-/* brw_program_binary.c */
-extern void
-brw_program_binary_init(unsigned device_id);
-extern void
-brw_get_program_binary_driver_sha1(struct gl_context *ctx, uint8_t *sha1);
-void brw_serialize_program_binary(struct gl_context *ctx,
- struct gl_shader_program *sh_prog,
- struct gl_program *prog);
-extern void
-brw_deserialize_program_binary(struct gl_context *ctx,
- struct gl_shader_program *shProg,
- struct gl_program *prog);
-void
-brw_program_serialize_nir(struct gl_context *ctx, struct gl_program *prog);
-void
-brw_program_deserialize_driver_blob(struct gl_context *ctx,
- struct gl_program *prog,
- gl_shader_stage stage);
-
-/*======================================================================
- * Inline conversion functions. These are better-typed than the
- * macros used previously:
- */
-static inline struct brw_context *
-brw_context( struct gl_context *ctx )
-{
- return (struct brw_context *)ctx;
-}
-
-static inline struct brw_program *
-brw_program(struct gl_program *p)
-{
- return (struct brw_program *) p;
-}
-
-static inline const struct brw_program *
-brw_program_const(const struct gl_program *p)
-{
- return (const struct brw_program *) p;
-}
-
-static inline bool
-brw_depth_writes_enabled(const struct brw_context *brw)
-{
- const struct gl_context *ctx = &brw->ctx;
-
- /* We consider depth writes disabled if the depth function is GL_EQUAL,
- * because it would just overwrite the existing depth value with itself.
- *
- * These bonus depth writes not only use bandwidth, but they also can
- * prevent early depth processing. For example, if the pixel shader
- * discards, the hardware must invoke the to determine whether or not
- * to do the depth write. If writes are disabled, we may still be able
- * to do the depth test before the shader, and skip the shader execution.
- *
- * The Broadwell 3DSTATE_WM_DEPTH_STENCIL documentation also contains
- * a programming note saying to disable depth writes for EQUAL.
- */
- return ctx->Depth.Test && ctx->Depth.Mask && ctx->Depth.Func != GL_EQUAL;
-}
-
-void
-brw_emit_depthbuffer(struct brw_context *brw);
-
-uint32_t get_hw_prim_for_gl_prim(int mode);
-
-void
-gfx6_upload_push_constants(struct brw_context *brw,
- const struct gl_program *prog,
- const struct brw_stage_prog_data *prog_data,
- struct brw_stage_state *stage_state);
-
-bool
-gfx9_use_linear_1d_layout(const struct brw_context *brw,
- const struct brw_mipmap_tree *mt);
-
-/* brw_queryformat.c */
-void brw_query_internal_format(struct gl_context *ctx, GLenum target,
- GLenum internalFormat, GLenum pname,
- GLint *params);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
+++ /dev/null
-/*
- * Mesa 3-D graphics library
- *
- * Copyright (C) 2014 Intel Corporation All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * Jason Ekstrand <jason.ekstrand@intel.com>
- */
-
-#include "brw_blorp.h"
-#include "brw_fbo.h"
-#include "brw_tex.h"
-#include "brw_blit.h"
-#include "brw_mipmap_tree.h"
-#include "main/formats.h"
-#include "main/teximage.h"
-#include "drivers/common/meta.h"
-
-static void
-copy_miptrees(struct brw_context *brw,
- struct brw_mipmap_tree *src_mt,
- int src_x, int src_y, int src_z, unsigned src_level,
- struct brw_mipmap_tree *dst_mt,
- int dst_x, int dst_y, int dst_z, unsigned dst_level,
- int src_width, int src_height)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- if (devinfo->ver <= 5) {
- /* On gfx4-5, try BLT first.
- *
- * Gfx4-5 have a single ring for both 3D and BLT operations, so there's
- * no inter-ring synchronization issues like on Gfx6+. It is apparently
- * faster than using the 3D pipeline. Original Gfx4 also has to rebase
- * and copy miptree slices in order to render to unaligned locations.
- */
- if (brw_miptree_copy(brw, src_mt, src_level, src_z, src_x, src_y,
- dst_mt, dst_level, dst_z, dst_x, dst_y,
- src_width, src_height))
- return;
- }
-
- brw_blorp_copy_miptrees(brw,
- src_mt, src_level, src_z,
- dst_mt, dst_level, dst_z,
- src_x, src_y, dst_x, dst_y,
- src_width, src_height);
-}
-
-static void
-brw_copy_image_sub_data(struct gl_context *ctx,
- struct gl_texture_image *src_image,
- struct gl_renderbuffer *src_renderbuffer,
- int src_x, int src_y, int src_z,
- struct gl_texture_image *dst_image,
- struct gl_renderbuffer *dst_renderbuffer,
- int dst_x, int dst_y, int dst_z,
- int src_width, int src_height)
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_mipmap_tree *src_mt, *dst_mt;
- unsigned src_level, dst_level;
-
- if (src_image) {
- src_mt = brw_texture_image(src_image)->mt;
- src_level = src_image->Level + src_image->TexObject->Attrib.MinLevel;
-
- /* Cube maps actually have different images per face */
- if (src_image->TexObject->Target == GL_TEXTURE_CUBE_MAP)
- src_z = src_image->Face;
-
- src_z += src_image->TexObject->Attrib.MinLayer;
- } else {
- assert(src_renderbuffer);
- src_mt = brw_renderbuffer(src_renderbuffer)->mt;
- src_image = src_renderbuffer->TexImage;
- src_level = 0;
- }
-
- if (dst_image) {
- dst_mt = brw_texture_image(dst_image)->mt;
-
- dst_level = dst_image->Level + dst_image->TexObject->Attrib.MinLevel;
-
- /* Cube maps actually have different images per face */
- if (dst_image->TexObject->Target == GL_TEXTURE_CUBE_MAP)
- dst_z = dst_image->Face;
-
- dst_z += dst_image->TexObject->Attrib.MinLayer;
- } else {
- assert(dst_renderbuffer);
- dst_mt = brw_renderbuffer(dst_renderbuffer)->mt;
- dst_image = dst_renderbuffer->TexImage;
- dst_level = 0;
- }
-
- copy_miptrees(brw, src_mt, src_x, src_y, src_z, src_level,
- dst_mt, dst_x, dst_y, dst_z, dst_level,
- src_width, src_height);
-
- /* CopyImage only works for equal formats, texture view equivalence
- * classes, and a couple special cases for compressed textures.
- *
- * Notably, GL_DEPTH_STENCIL does not appear in any equivalence
- * classes, so we know the formats must be the same, and thus both
- * will either have stencil, or not. They can't be mismatched.
- */
- assert((src_mt->stencil_mt != NULL) == (dst_mt->stencil_mt != NULL));
-
- if (dst_mt->stencil_mt) {
- copy_miptrees(brw, src_mt->stencil_mt, src_x, src_y, src_z, src_level,
- dst_mt->stencil_mt, dst_x, dst_y, dst_z, dst_level,
- src_width, src_height);
- }
-}
-
-void
-brw_init_copy_image_functions(struct dd_function_table *functions)
-{
- functions->CopyImageSubData = brw_copy_image_sub_data;
-}
+++ /dev/null
-/*
- * Copyright (c) 2014 - 2015 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-#include "util/ralloc.h"
-#include "brw_context.h"
-#include "brw_cs.h"
-#include "brw_wm.h"
-#include "brw_mipmap_tree.h"
-#include "brw_state.h"
-#include "brw_batch.h"
-#include "compiler/brw_nir.h"
-#include "brw_program.h"
-#include "compiler/glsl/ir_uniform.h"
-
-static void
-assign_cs_binding_table_offsets(const struct intel_device_info *devinfo,
- const struct gl_program *prog,
- struct brw_cs_prog_data *prog_data)
-{
- uint32_t next_binding_table_offset = 0;
-
- /* May not be used if the gl_NumWorkGroups variable is not accessed. */
- prog_data->binding_table.work_groups_start = next_binding_table_offset;
- next_binding_table_offset++;
-
- brw_assign_common_binding_table_offsets(devinfo, prog, &prog_data->base,
- next_binding_table_offset);
-}
-
-static bool
-brw_codegen_cs_prog(struct brw_context *brw,
- struct brw_program *cp,
- struct brw_cs_prog_key *key)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- const GLuint *program;
- void *mem_ctx = ralloc_context(NULL);
- struct brw_cs_prog_data prog_data;
- bool start_busy = false;
- double start_time = 0;
- nir_shader *nir = nir_shader_clone(mem_ctx, cp->program.nir);
-
- memset(&prog_data, 0, sizeof(prog_data));
-
- if (cp->program.info.shared_size > 64 * 1024) {
- cp->program.sh.data->LinkStatus = LINKING_FAILURE;
- const char *error_str =
- "Compute shader used more than 64KB of shared variables";
- ralloc_strcat(&cp->program.sh.data->InfoLog, error_str);
- _mesa_problem(NULL, "Failed to link compute shader: %s\n", error_str);
-
- ralloc_free(mem_ctx);
- return false;
- }
-
- assign_cs_binding_table_offsets(devinfo, &cp->program, &prog_data);
-
- brw_nir_setup_glsl_uniforms(mem_ctx, nir,
- &cp->program, &prog_data.base, true);
-
- if (unlikely(brw->perf_debug)) {
- start_busy = (brw->batch.last_bo &&
- brw_bo_busy(brw->batch.last_bo));
- start_time = get_time();
- }
-
-
- brw_nir_lower_cs_intrinsics(nir);
-
- struct brw_compile_cs_params params = {
- .nir = nir,
- .key = key,
- .prog_data = &prog_data,
- .log_data = brw,
- };
-
- if (INTEL_DEBUG(DEBUG_SHADER_TIME)) {
- params.shader_time = true;
- params.shader_time_index =
- brw_get_shader_time_index(brw, &cp->program, ST_CS, true);
- }
-
- program = brw_compile_cs(brw->screen->compiler, mem_ctx, ¶ms);
- if (program == NULL) {
- cp->program.sh.data->LinkStatus = LINKING_FAILURE;
- ralloc_strcat(&cp->program.sh.data->InfoLog, params.error_str);
- _mesa_problem(NULL, "Failed to compile compute shader: %s\n", params.error_str);
-
- ralloc_free(mem_ctx);
- return false;
- }
-
- if (unlikely(brw->perf_debug)) {
- if (cp->compiled_once) {
- brw_debug_recompile(brw, MESA_SHADER_COMPUTE, cp->program.Id,
- &key->base);
- }
- cp->compiled_once = true;
-
- if (start_busy && !brw_bo_busy(brw->batch.last_bo)) {
- perf_debug("CS compile took %.03f ms and stalled the GPU\n",
- (get_time() - start_time) * 1000);
- }
- }
-
- brw_alloc_stage_scratch(brw, &brw->cs.base, prog_data.base.total_scratch);
-
- /* The param and pull_param arrays will be freed by the shader cache. */
- ralloc_steal(NULL, prog_data.base.param);
- ralloc_steal(NULL, prog_data.base.pull_param);
- brw_upload_cache(&brw->cache, BRW_CACHE_CS_PROG,
- key, sizeof(*key),
- program, prog_data.base.program_size,
- &prog_data, sizeof(prog_data),
- &brw->cs.base.prog_offset, &brw->cs.base.prog_data);
- ralloc_free(mem_ctx);
-
- return true;
-}
-
-
-void
-brw_cs_populate_key(struct brw_context *brw, struct brw_cs_prog_key *key)
-{
- struct gl_context *ctx = &brw->ctx;
- /* BRW_NEW_COMPUTE_PROGRAM */
- const struct brw_program *cp =
- (struct brw_program *) brw->programs[MESA_SHADER_COMPUTE];
-
- memset(key, 0, sizeof(*key));
-
- /* _NEW_TEXTURE */
- brw_populate_base_prog_key(ctx, cp, &key->base);
-}
-
-
-void
-brw_upload_cs_prog(struct brw_context *brw)
-{
- struct gl_context *ctx = &brw->ctx;
- struct brw_cs_prog_key key;
- struct brw_program *cp =
- (struct brw_program *) brw->programs[MESA_SHADER_COMPUTE];
-
- if (!cp)
- return;
-
- if (!brw_state_dirty(brw, _NEW_TEXTURE, BRW_NEW_COMPUTE_PROGRAM))
- return;
-
- brw->cs.base.sampler_count =
- util_last_bit(ctx->ComputeProgram._Current->SamplersUsed);
-
- brw_cs_populate_key(brw, &key);
-
- if (brw_search_cache(&brw->cache, BRW_CACHE_CS_PROG, &key, sizeof(key),
- &brw->cs.base.prog_offset, &brw->cs.base.prog_data,
- true))
- return;
-
- if (brw_disk_cache_upload_program(brw, MESA_SHADER_COMPUTE))
- return;
-
- cp = (struct brw_program *) brw->programs[MESA_SHADER_COMPUTE];
- cp->id = key.base.program_string_id;
-
- ASSERTED bool success = brw_codegen_cs_prog(brw, cp, &key);
- assert(success);
-}
-
-void
-brw_cs_populate_default_key(const struct brw_compiler *compiler,
- struct brw_cs_prog_key *key,
- struct gl_program *prog)
-{
- const struct intel_device_info *devinfo = compiler->devinfo;
- memset(key, 0, sizeof(*key));
- brw_populate_default_base_prog_key(devinfo, brw_program(prog), &key->base);
-}
-
-bool
-brw_cs_precompile(struct gl_context *ctx, struct gl_program *prog)
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_cs_prog_key key;
-
- struct brw_program *bcp = brw_program(prog);
-
- brw_cs_populate_default_key(brw->screen->compiler, &key, prog);
-
- uint32_t old_prog_offset = brw->cs.base.prog_offset;
- struct brw_stage_prog_data *old_prog_data = brw->cs.base.prog_data;
-
- bool success = brw_codegen_cs_prog(brw, bcp, &key);
-
- brw->cs.base.prog_offset = old_prog_offset;
- brw->cs.base.prog_data = old_prog_data;
-
- return success;
-}
+++ /dev/null
-/*
- * Copyright © 2014 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-
-#ifndef BRW_CS_H
-#define BRW_CS_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-void
-brw_upload_cs_prog(struct brw_context *brw);
-
-void
-brw_cs_populate_key(struct brw_context *brw, struct brw_cs_prog_key *key);
-void
-brw_cs_populate_default_key(const struct brw_compiler *compiler,
- struct brw_cs_prog_key *key,
- struct gl_program *prog);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* BRW_CS_H */
+++ /dev/null
-/*
- Copyright (C) Intel Corp. 2006. All Rights Reserved.
- Intel funded Tungsten Graphics to
- develop this 3D driver.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
- **********************************************************************/
- /*
- * Authors:
- * Keith Whitwell <keithw@vmware.com>
- */
-
-/** @file brw_curbe.c
- *
- * Push constant handling for gfx4/5.
- *
- * Push constants are constant values (such as GLSL uniforms) that are
- * pre-loaded into a shader stage's register space at thread spawn time. On
- * gfx4 and gfx5, we create a blob in memory containing all the push constants
- * for all the stages in order. At CMD_CONST_BUFFER time that blob is loaded
- * into URB space as a constant URB entry (CURBE) so that it can be accessed
- * quickly at thread setup time. Each individual fixed function unit's state
- * (brw_vs_state.c for example) tells the hardware which subset of the CURBE
- * it wants in its register space, and we calculate those areas here under the
- * BRW_NEW_PUSH_CONSTANT_ALLOCATION state flag. The brw_urb.c allocation will control
- * how many CURBEs can be loaded into the hardware at once before a pipeline
- * stall occurs at CMD_CONST_BUFFER time.
- *
- * On gfx6+, constant handling becomes a much simpler set of per-unit state.
- * See gfx6_upload_vec4_push_constants() in gfx6_vs_state.c for that code.
- */
-
-
-#include "compiler/nir/nir.h"
-#include "main/context.h"
-#include "main/macros.h"
-#include "main/enums.h"
-#include "program/prog_parameter.h"
-#include "program/prog_print.h"
-#include "program/prog_statevars.h"
-#include "util/bitscan.h"
-#include "brw_batch.h"
-#include "brw_buffer_objects.h"
-#include "brw_context.h"
-#include "brw_defines.h"
-#include "brw_state.h"
-#include "brw_util.h"
-#include "util/u_math.h"
-
-
-/**
- * Partition the CURBE between the various users of constant values.
- *
- * If the users all fit within the previous allocatation, we avoid changing
- * the layout because that means reuploading all unit state and uploading new
- * constant buffers.
- */
-static void calculate_curbe_offsets( struct brw_context *brw )
-{
- struct gl_context *ctx = &brw->ctx;
- /* BRW_NEW_FS_PROG_DATA */
- const GLuint nr_fp_regs = (brw->wm.base.prog_data->nr_params + 15) / 16;
-
- /* BRW_NEW_VS_PROG_DATA */
- const GLuint nr_vp_regs = (brw->vs.base.prog_data->nr_params + 15) / 16;
- GLuint nr_clip_regs = 0;
- GLuint total_regs;
-
- /* _NEW_TRANSFORM */
- if (ctx->Transform.ClipPlanesEnabled) {
- GLuint nr_planes = 6 + util_bitcount(ctx->Transform.ClipPlanesEnabled);
- nr_clip_regs = (nr_planes * 4 + 15) / 16;
- }
-
-
- total_regs = nr_fp_regs + nr_vp_regs + nr_clip_regs;
-
- /* The CURBE allocation size is limited to 32 512-bit units (128 EU
- * registers, or 1024 floats). See CS_URB_STATE in the gfx4 or gfx5
- * (volume 1, part 1) PRMs.
- *
- * Note that in brw_fs.cpp we're only loading up to 16 EU registers of
- * values as push constants before spilling to pull constants, and in
- * brw_vec4.cpp we're loading up to 32 registers of push constants. An EU
- * register is 1/2 of one of these URB entry units, so that leaves us 16 EU
- * regs for clip.
- */
- assert(total_regs <= 32);
-
- /* Lazy resize:
- */
- if (nr_fp_regs > brw->curbe.wm_size ||
- nr_vp_regs > brw->curbe.vs_size ||
- nr_clip_regs != brw->curbe.clip_size ||
- (total_regs < brw->curbe.total_size / 4 &&
- brw->curbe.total_size > 16)) {
-
- GLuint reg = 0;
-
- /* Calculate a new layout:
- */
- reg = 0;
- brw->curbe.wm_start = reg;
- brw->curbe.wm_size = nr_fp_regs; reg += nr_fp_regs;
- brw->curbe.clip_start = reg;
- brw->curbe.clip_size = nr_clip_regs; reg += nr_clip_regs;
- brw->curbe.vs_start = reg;
- brw->curbe.vs_size = nr_vp_regs; reg += nr_vp_regs;
- brw->curbe.total_size = reg;
-
- if (0)
- fprintf(stderr, "curbe wm %d+%d clip %d+%d vs %d+%d\n",
- brw->curbe.wm_start,
- brw->curbe.wm_size,
- brw->curbe.clip_start,
- brw->curbe.clip_size,
- brw->curbe.vs_start,
- brw->curbe.vs_size );
-
- brw->ctx.NewDriverState |= BRW_NEW_PUSH_CONSTANT_ALLOCATION;
- }
-}
-
-
-const struct brw_tracked_state brw_curbe_offsets = {
- .dirty = {
- .mesa = _NEW_TRANSFORM,
- .brw = BRW_NEW_CONTEXT |
- BRW_NEW_BLORP |
- BRW_NEW_FS_PROG_DATA |
- BRW_NEW_VS_PROG_DATA,
- },
- .emit = calculate_curbe_offsets
-};
-
-
-
-
-/** Uploads the CS_URB_STATE packet.
- *
- * Just like brw_vs_state.c and brw_wm_state.c define a URB entry size and
- * number of entries for their stages, constant buffers do so using this state
- * packet. Having multiple CURBEs in the URB at the same time allows the
- * hardware to avoid a pipeline stall between primitives using different
- * constant buffer contents.
- */
-void brw_upload_cs_urb_state(struct brw_context *brw)
-{
- BEGIN_BATCH(2);
- OUT_BATCH(CMD_CS_URB_STATE << 16 | (2-2));
-
- /* BRW_NEW_URB_FENCE */
- if (brw->urb.csize == 0) {
- OUT_BATCH(0);
- } else {
- /* BRW_NEW_URB_FENCE */
- assert(brw->urb.nr_cs_entries);
- OUT_BATCH((brw->urb.csize - 1) << 4 | brw->urb.nr_cs_entries);
- }
- ADVANCE_BATCH();
-}
-
-static const GLfloat fixed_plane[6][4] = {
- { 0, 0, -1, 1 },
- { 0, 0, 1, 1 },
- { 0, -1, 0, 1 },
- { 0, 1, 0, 1 },
- {-1, 0, 0, 1 },
- { 1, 0, 0, 1 }
-};
-
-/**
- * Gathers together all the uniform values into a block of memory to be
- * uploaded into the CURBE, then emits the state packet telling the hardware
- * the new location.
- */
-static void
-brw_upload_constant_buffer(struct brw_context *brw)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- struct gl_context *ctx = &brw->ctx;
- /* BRW_NEW_PUSH_CONSTANT_ALLOCATION */
- const GLuint sz = brw->curbe.total_size;
- const GLuint bufsz = sz * 16 * sizeof(GLfloat);
- gl_constant_value *buf;
- GLuint i;
- gl_clip_plane *clip_planes;
-
- /* BRW_NEW_FRAGMENT_PROGRAM */
- struct gl_program *fp = brw->programs[MESA_SHADER_FRAGMENT];
-
- /* BRW_NEW_VERTEX_PROGRAM */
- struct gl_program *vp = brw->programs[MESA_SHADER_VERTEX];
-
- if (sz == 0) {
- goto emit;
- }
-
- buf = brw_upload_space(&brw->upload, bufsz, 64,
- &brw->curbe.curbe_bo, &brw->curbe.curbe_offset);
-
- STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float));
-
- /* fragment shader constants */
- if (brw->curbe.wm_size) {
- _mesa_load_state_parameters(ctx, fp->Parameters);
-
- /* BRW_NEW_PUSH_CONSTANT_ALLOCATION */
- GLuint offset = brw->curbe.wm_start * 16;
-
- /* BRW_NEW_FS_PROG_DATA | _NEW_PROGRAM_CONSTANTS: copy uniform values */
- brw_populate_constant_data(brw, fp, &brw->wm.base, &buf[offset],
- brw->wm.base.prog_data->param,
- brw->wm.base.prog_data->nr_params);
- }
-
- /* clipper constants */
- if (brw->curbe.clip_size) {
- GLuint offset = brw->curbe.clip_start * 16;
- GLbitfield mask;
-
- /* If any planes are going this way, send them all this way:
- */
- for (i = 0; i < 6; i++) {
- buf[offset + i * 4 + 0].f = fixed_plane[i][0];
- buf[offset + i * 4 + 1].f = fixed_plane[i][1];
- buf[offset + i * 4 + 2].f = fixed_plane[i][2];
- buf[offset + i * 4 + 3].f = fixed_plane[i][3];
- }
-
- /* Clip planes: _NEW_TRANSFORM plus _NEW_PROJECTION to get to
- * clip-space:
- */
- clip_planes = brw_select_clip_planes(ctx);
- mask = ctx->Transform.ClipPlanesEnabled;
- while (mask) {
- const int j = u_bit_scan(&mask);
- buf[offset + i * 4 + 0].f = clip_planes[j][0];
- buf[offset + i * 4 + 1].f = clip_planes[j][1];
- buf[offset + i * 4 + 2].f = clip_planes[j][2];
- buf[offset + i * 4 + 3].f = clip_planes[j][3];
- i++;
- }
- }
-
- /* vertex shader constants */
- if (brw->curbe.vs_size) {
- _mesa_load_state_parameters(ctx, vp->Parameters);
-
- GLuint offset = brw->curbe.vs_start * 16;
-
- /* BRW_NEW_VS_PROG_DATA | _NEW_PROGRAM_CONSTANTS: copy uniform values */
- brw_populate_constant_data(brw, vp, &brw->vs.base, &buf[offset],
- brw->vs.base.prog_data->param,
- brw->vs.base.prog_data->nr_params);
- }
-
- if (0) {
- for (i = 0; i < sz*16; i+=4)
- fprintf(stderr, "curbe %d.%d: %f %f %f %f\n", i/8, i&4,
- buf[i+0].f, buf[i+1].f, buf[i+2].f, buf[i+3].f);
- }
-
- /* Because this provokes an action (ie copy the constants into the
- * URB), it shouldn't be shortcircuited if identical to the
- * previous time - because eg. the urb destination may have
- * changed, or the urb contents different to last time.
- *
- * Note that the data referred to is actually copied internally,
- * not just used in place according to passed pointer.
- *
- * It appears that the CS unit takes care of using each available
- * URB entry (Const URB Entry == CURBE) in turn, and issuing
- * flushes as necessary when doublebuffering of CURBEs isn't
- * possible.
- */
-
-emit:
- /* BRW_NEW_URB_FENCE: From the gfx4 PRM, volume 1, section 3.9.8
- * (CONSTANT_BUFFER (CURBE Load)):
- *
- * "Modifying the CS URB allocation via URB_FENCE invalidates any
- * previous CURBE entries. Therefore software must subsequently
- * [re]issue a CONSTANT_BUFFER command before CURBE data can be used
- * in the pipeline."
- */
- BEGIN_BATCH(2);
- if (brw->curbe.total_size == 0) {
- OUT_BATCH((CMD_CONST_BUFFER << 16) | (2 - 2));
- OUT_BATCH(0);
- } else {
- OUT_BATCH((CMD_CONST_BUFFER << 16) | (1 << 8) | (2 - 2));
- OUT_RELOC(brw->curbe.curbe_bo, 0,
- (brw->curbe.total_size - 1) + brw->curbe.curbe_offset);
- }
- ADVANCE_BATCH();
-
- /* Work around a Broadwater/Crestline depth interpolator bug. The
- * following sequence will cause GPU hangs:
- *
- * 1. Change state so that all depth related fields in CC_STATE are
- * disabled, and in WM_STATE, only "PS Use Source Depth" is enabled.
- * 2. Emit a CONSTANT_BUFFER packet.
- * 3. Draw via 3DPRIMITIVE.
- *
- * The recommended workaround is to emit a non-pipelined state change after
- * emitting CONSTANT_BUFFER, in order to drain the windowizer pipeline.
- *
- * We arbitrarily choose 3DSTATE_GLOBAL_DEPTH_CLAMP_OFFSET (as it's small),
- * and always emit it when "PS Use Source Depth" is set. We could be more
- * precise, but the additional complexity is probably not worth it.
- *
- * BRW_NEW_FRAGMENT_PROGRAM
- */
- if (devinfo->verx10 == 40 &&
- BITSET_TEST(fp->info.system_values_read, SYSTEM_VALUE_FRAG_COORD)) {
- BEGIN_BATCH(2);
- OUT_BATCH(_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP << 16 | (2 - 2));
- OUT_BATCH(0);
- ADVANCE_BATCH();
- }
-}
-
-const struct brw_tracked_state brw_constant_buffer = {
- .dirty = {
- .mesa = _NEW_PROGRAM_CONSTANTS,
- .brw = BRW_NEW_BATCH |
- BRW_NEW_BLORP |
- BRW_NEW_PUSH_CONSTANT_ALLOCATION |
- BRW_NEW_FRAGMENT_PROGRAM |
- BRW_NEW_FS_PROG_DATA |
- BRW_NEW_PSP | /* Implicit - hardware requires this, not used above */
- BRW_NEW_URB_FENCE |
- BRW_NEW_VS_PROG_DATA,
- },
- .emit = brw_upload_constant_buffer,
-};
+++ /dev/null
-/*
- Copyright (C) Intel Corp. 2006. All Rights Reserved.
- Intel funded Tungsten Graphics to
- develop this 3D driver.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
- **********************************************************************/
- /*
- * Authors:
- * Keith Whitwell <keithw@vmware.com>
- */
-
-#ifndef BRW_DEFINES_H
-#define BRW_DEFINES_H
-
-#include "util/macros.h"
-
-#define INTEL_MASK(high, low) (((1u<<((high)-(low)+1))-1)<<(low))
-/* Using the GNU statement expression extension */
-#define SET_FIELD(value, field) \
- ({ \
- uint32_t fieldval = (uint32_t)(value) << field ## _SHIFT; \
- assert((fieldval & ~ field ## _MASK) == 0); \
- fieldval & field ## _MASK; \
- })
-
-#define GET_BITS(data, high, low) ((data & INTEL_MASK((high), (low))) >> (low))
-#define GET_FIELD(word, field) (((word) & field ## _MASK) >> field ## _SHIFT)
-
-/**
- * For use with masked MMIO registers where the upper 16 bits control which
- * of the lower bits are committed to the register.
- */
-#define REG_MASK(value) ((value) << 16)
-
-/* 3D state:
- */
-#define CMD_3D_PRIM 0x7b00 /* 3DPRIMITIVE */
-/* DW0 */
-# define GFX4_3DPRIM_TOPOLOGY_TYPE_SHIFT 10
-# define GFX4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 15)
-# define GFX4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM (1 << 15)
-# define GFX7_3DPRIM_INDIRECT_PARAMETER_ENABLE (1 << 10)
-# define GFX7_3DPRIM_PREDICATE_ENABLE (1 << 8)
-/* DW1 */
-# define GFX7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 8)
-# define GFX7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM (1 << 8)
-
-#define BRW_ANISORATIO_2 0
-#define BRW_ANISORATIO_4 1
-#define BRW_ANISORATIO_6 2
-#define BRW_ANISORATIO_8 3
-#define BRW_ANISORATIO_10 4
-#define BRW_ANISORATIO_12 5
-#define BRW_ANISORATIO_14 6
-#define BRW_ANISORATIO_16 7
-
-#define BRW_BLENDFACTOR_ONE 0x1
-#define BRW_BLENDFACTOR_SRC_COLOR 0x2
-#define BRW_BLENDFACTOR_SRC_ALPHA 0x3
-#define BRW_BLENDFACTOR_DST_ALPHA 0x4
-#define BRW_BLENDFACTOR_DST_COLOR 0x5
-#define BRW_BLENDFACTOR_SRC_ALPHA_SATURATE 0x6
-#define BRW_BLENDFACTOR_CONST_COLOR 0x7
-#define BRW_BLENDFACTOR_CONST_ALPHA 0x8
-#define BRW_BLENDFACTOR_SRC1_COLOR 0x9
-#define BRW_BLENDFACTOR_SRC1_ALPHA 0x0A
-#define BRW_BLENDFACTOR_ZERO 0x11
-#define BRW_BLENDFACTOR_INV_SRC_COLOR 0x12
-#define BRW_BLENDFACTOR_INV_SRC_ALPHA 0x13
-#define BRW_BLENDFACTOR_INV_DST_ALPHA 0x14
-#define BRW_BLENDFACTOR_INV_DST_COLOR 0x15
-#define BRW_BLENDFACTOR_INV_CONST_COLOR 0x17
-#define BRW_BLENDFACTOR_INV_CONST_ALPHA 0x18
-#define BRW_BLENDFACTOR_INV_SRC1_COLOR 0x19
-#define BRW_BLENDFACTOR_INV_SRC1_ALPHA 0x1A
-
-#define BRW_BLENDFUNCTION_ADD 0
-#define BRW_BLENDFUNCTION_SUBTRACT 1
-#define BRW_BLENDFUNCTION_REVERSE_SUBTRACT 2
-#define BRW_BLENDFUNCTION_MIN 3
-#define BRW_BLENDFUNCTION_MAX 4
-
-#define BRW_ALPHATEST_FORMAT_UNORM8 0
-#define BRW_ALPHATEST_FORMAT_FLOAT32 1
-
-#define BRW_CHROMAKEY_KILL_ON_ANY_MATCH 0
-#define BRW_CHROMAKEY_REPLACE_BLACK 1
-
-#define BRW_CLIP_API_OGL 0
-#define BRW_CLIP_API_DX 1
-
-#define BRW_CLIP_NDCSPACE 0
-#define BRW_CLIP_SCREENSPACE 1
-
-#define BRW_COMPAREFUNCTION_ALWAYS 0
-#define BRW_COMPAREFUNCTION_NEVER 1
-#define BRW_COMPAREFUNCTION_LESS 2
-#define BRW_COMPAREFUNCTION_EQUAL 3
-#define BRW_COMPAREFUNCTION_LEQUAL 4
-#define BRW_COMPAREFUNCTION_GREATER 5
-#define BRW_COMPAREFUNCTION_NOTEQUAL 6
-#define BRW_COMPAREFUNCTION_GEQUAL 7
-
-#define BRW_COVERAGE_PIXELS_HALF 0
-#define BRW_COVERAGE_PIXELS_1 1
-#define BRW_COVERAGE_PIXELS_2 2
-#define BRW_COVERAGE_PIXELS_4 3
-
-#define BRW_CULLMODE_BOTH 0
-#define BRW_CULLMODE_NONE 1
-#define BRW_CULLMODE_FRONT 2
-#define BRW_CULLMODE_BACK 3
-
-#define BRW_DEFAULTCOLOR_R8G8B8A8_UNORM 0
-#define BRW_DEFAULTCOLOR_R32G32B32A32_FLOAT 1
-
-#define BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT 0
-#define BRW_DEPTHFORMAT_D32_FLOAT 1
-#define BRW_DEPTHFORMAT_D24_UNORM_S8_UINT 2
-#define BRW_DEPTHFORMAT_D24_UNORM_X8_UINT 3 /* GFX5 */
-#define BRW_DEPTHFORMAT_D16_UNORM 5
-
-#define BRW_FLOATING_POINT_IEEE_754 0
-#define BRW_FLOATING_POINT_NON_IEEE_754 1
-
-#define BRW_FRONTWINDING_CW 0
-#define BRW_FRONTWINDING_CCW 1
-
-#define BRW_CUT_INDEX_ENABLE (1 << 10)
-
-#define BRW_INDEX_BYTE 0
-#define BRW_INDEX_WORD 1
-#define BRW_INDEX_DWORD 2
-
-#define BRW_LOGICOPFUNCTION_CLEAR 0
-#define BRW_LOGICOPFUNCTION_NOR 1
-#define BRW_LOGICOPFUNCTION_AND_INVERTED 2
-#define BRW_LOGICOPFUNCTION_COPY_INVERTED 3
-#define BRW_LOGICOPFUNCTION_AND_REVERSE 4
-#define BRW_LOGICOPFUNCTION_INVERT 5
-#define BRW_LOGICOPFUNCTION_XOR 6
-#define BRW_LOGICOPFUNCTION_NAND 7
-#define BRW_LOGICOPFUNCTION_AND 8
-#define BRW_LOGICOPFUNCTION_EQUIV 9
-#define BRW_LOGICOPFUNCTION_NOOP 10
-#define BRW_LOGICOPFUNCTION_OR_INVERTED 11
-#define BRW_LOGICOPFUNCTION_COPY 12
-#define BRW_LOGICOPFUNCTION_OR_REVERSE 13
-#define BRW_LOGICOPFUNCTION_OR 14
-#define BRW_LOGICOPFUNCTION_SET 15
-
-#define BRW_MAPFILTER_NEAREST 0x0
-#define BRW_MAPFILTER_LINEAR 0x1
-#define BRW_MAPFILTER_ANISOTROPIC 0x2
-
-#define BRW_MIPFILTER_NONE 0
-#define BRW_MIPFILTER_NEAREST 1
-#define BRW_MIPFILTER_LINEAR 3
-
-#define BRW_ADDRESS_ROUNDING_ENABLE_U_MAG 0x20
-#define BRW_ADDRESS_ROUNDING_ENABLE_U_MIN 0x10
-#define BRW_ADDRESS_ROUNDING_ENABLE_V_MAG 0x08
-#define BRW_ADDRESS_ROUNDING_ENABLE_V_MIN 0x04
-#define BRW_ADDRESS_ROUNDING_ENABLE_R_MAG 0x02
-#define BRW_ADDRESS_ROUNDING_ENABLE_R_MIN 0x01
-
-#define BRW_PREFILTER_ALWAYS 0x0
-#define BRW_PREFILTER_NEVER 0x1
-#define BRW_PREFILTER_LESS 0x2
-#define BRW_PREFILTER_EQUAL 0x3
-#define BRW_PREFILTER_LEQUAL 0x4
-#define BRW_PREFILTER_GREATER 0x5
-#define BRW_PREFILTER_NOTEQUAL 0x6
-#define BRW_PREFILTER_GEQUAL 0x7
-
-#define BRW_PROVOKING_VERTEX_0 0
-#define BRW_PROVOKING_VERTEX_1 1
-#define BRW_PROVOKING_VERTEX_2 2
-
-#define BRW_RASTRULE_UPPER_LEFT 0
-#define BRW_RASTRULE_UPPER_RIGHT 1
-/* These are listed as "Reserved, but not seen as useful"
- * in Intel documentation (page 212, "Point Rasterization Rule",
- * section 7.4 "SF Pipeline State Summary", of document
- * "Intel® 965 Express Chipset Family and Intel® G35 Express
- * Chipset Graphics Controller Programmer's Reference Manual,
- * Volume 2: 3D/Media", Revision 1.0b as of January 2008,
- * available at
- * https://01.org/linuxgraphics/documentation/hardware-specification-prms
- * at the time of this writing).
- *
- * These appear to be supported on at least some
- * i965-family devices, and the BRW_RASTRULE_LOWER_RIGHT
- * is useful when using OpenGL to render to a FBO
- * (which has the pixel coordinate Y orientation inverted
- * with respect to the normal OpenGL pixel coordinate system).
- */
-#define BRW_RASTRULE_LOWER_LEFT 2
-#define BRW_RASTRULE_LOWER_RIGHT 3
-
-#define BRW_RENDERTARGET_CLAMPRANGE_UNORM 0
-#define BRW_RENDERTARGET_CLAMPRANGE_SNORM 1
-#define BRW_RENDERTARGET_CLAMPRANGE_FORMAT 2
-
-#define BRW_STENCILOP_KEEP 0
-#define BRW_STENCILOP_ZERO 1
-#define BRW_STENCILOP_REPLACE 2
-#define BRW_STENCILOP_INCRSAT 3
-#define BRW_STENCILOP_DECRSAT 4
-#define BRW_STENCILOP_INCR 5
-#define BRW_STENCILOP_DECR 6
-#define BRW_STENCILOP_INVERT 7
-
-/* Surface state DW0 */
-#define GFX8_SURFACE_IS_ARRAY (1 << 28)
-#define GFX8_SURFACE_VALIGN_4 (1 << 16)
-#define GFX8_SURFACE_VALIGN_8 (2 << 16)
-#define GFX8_SURFACE_VALIGN_16 (3 << 16)
-#define GFX8_SURFACE_HALIGN_4 (1 << 14)
-#define GFX8_SURFACE_HALIGN_8 (2 << 14)
-#define GFX8_SURFACE_HALIGN_16 (3 << 14)
-#define GFX8_SURFACE_TILING_NONE (0 << 12)
-#define GFX8_SURFACE_TILING_W (1 << 12)
-#define GFX8_SURFACE_TILING_X (2 << 12)
-#define GFX8_SURFACE_TILING_Y (3 << 12)
-#define GFX8_SURFACE_SAMPLER_L2_BYPASS_DISABLE (1 << 9)
-#define BRW_SURFACE_RC_READ_WRITE (1 << 8)
-#define BRW_SURFACE_MIPLAYOUT_SHIFT 10
-#define BRW_SURFACE_MIPMAPLAYOUT_BELOW 0
-#define BRW_SURFACE_MIPMAPLAYOUT_RIGHT 1
-#define BRW_SURFACE_CUBEFACE_ENABLES 0x3f
-#define BRW_SURFACE_BLEND_ENABLED (1 << 13)
-#define BRW_SURFACE_WRITEDISABLE_B_SHIFT 14
-#define BRW_SURFACE_WRITEDISABLE_G_SHIFT 15
-#define BRW_SURFACE_WRITEDISABLE_R_SHIFT 16
-#define BRW_SURFACE_WRITEDISABLE_A_SHIFT 17
-
-#define GFX9_SURFACE_ASTC_HDR_FORMAT_BIT 0x100
-
-#define BRW_SURFACE_FORMAT_SHIFT 18
-#define BRW_SURFACE_FORMAT_MASK INTEL_MASK(26, 18)
-
-#define BRW_SURFACERETURNFORMAT_FLOAT32 0
-#define BRW_SURFACERETURNFORMAT_S1 1
-
-#define BRW_SURFACE_TYPE_SHIFT 29
-#define BRW_SURFACE_TYPE_MASK INTEL_MASK(31, 29)
-#define BRW_SURFACE_1D 0
-#define BRW_SURFACE_2D 1
-#define BRW_SURFACE_3D 2
-#define BRW_SURFACE_CUBE 3
-#define BRW_SURFACE_BUFFER 4
-#define BRW_SURFACE_NULL 7
-
-#define GFX7_SURFACE_IS_ARRAY (1 << 28)
-#define GFX7_SURFACE_VALIGN_2 (0 << 16)
-#define GFX7_SURFACE_VALIGN_4 (1 << 16)
-#define GFX7_SURFACE_HALIGN_4 (0 << 15)
-#define GFX7_SURFACE_HALIGN_8 (1 << 15)
-#define GFX7_SURFACE_TILING_NONE (0 << 13)
-#define GFX7_SURFACE_TILING_X (2 << 13)
-#define GFX7_SURFACE_TILING_Y (3 << 13)
-#define GFX7_SURFACE_ARYSPC_FULL (0 << 10)
-#define GFX7_SURFACE_ARYSPC_LOD0 (1 << 10)
-
-/* Surface state DW2 */
-#define BRW_SURFACE_HEIGHT_SHIFT 19
-#define BRW_SURFACE_HEIGHT_MASK INTEL_MASK(31, 19)
-#define BRW_SURFACE_WIDTH_SHIFT 6
-#define BRW_SURFACE_WIDTH_MASK INTEL_MASK(18, 6)
-#define BRW_SURFACE_LOD_SHIFT 2
-#define BRW_SURFACE_LOD_MASK INTEL_MASK(5, 2)
-#define GFX7_SURFACE_HEIGHT_SHIFT 16
-#define GFX7_SURFACE_HEIGHT_MASK INTEL_MASK(29, 16)
-#define GFX7_SURFACE_WIDTH_SHIFT 0
-#define GFX7_SURFACE_WIDTH_MASK INTEL_MASK(13, 0)
-
-/* Surface state DW3 */
-#define BRW_SURFACE_DEPTH_SHIFT 21
-#define BRW_SURFACE_DEPTH_MASK INTEL_MASK(31, 21)
-#define BRW_SURFACE_PITCH_SHIFT 3
-#define BRW_SURFACE_PITCH_MASK INTEL_MASK(19, 3)
-#define BRW_SURFACE_TILED (1 << 1)
-#define BRW_SURFACE_TILED_Y (1 << 0)
-#define HSW_SURFACE_IS_INTEGER_FORMAT (1 << 18)
-
-/* Surface state DW4 */
-#define BRW_SURFACE_MIN_LOD_SHIFT 28
-#define BRW_SURFACE_MIN_LOD_MASK INTEL_MASK(31, 28)
-#define BRW_SURFACE_MIN_ARRAY_ELEMENT_SHIFT 17
-#define BRW_SURFACE_MIN_ARRAY_ELEMENT_MASK INTEL_MASK(27, 17)
-#define BRW_SURFACE_RENDER_TARGET_VIEW_EXTENT_SHIFT 8
-#define BRW_SURFACE_RENDER_TARGET_VIEW_EXTENT_MASK INTEL_MASK(16, 8)
-#define BRW_SURFACE_MULTISAMPLECOUNT_1 (0 << 4)
-#define BRW_SURFACE_MULTISAMPLECOUNT_4 (2 << 4)
-#define GFX7_SURFACE_MULTISAMPLECOUNT_1 (0 << 3)
-#define GFX8_SURFACE_MULTISAMPLECOUNT_2 (1 << 3)
-#define GFX7_SURFACE_MULTISAMPLECOUNT_4 (2 << 3)
-#define GFX7_SURFACE_MULTISAMPLECOUNT_8 (3 << 3)
-#define GFX8_SURFACE_MULTISAMPLECOUNT_16 (4 << 3)
-#define GFX7_SURFACE_MSFMT_MSS (0 << 6)
-#define GFX7_SURFACE_MSFMT_DEPTH_STENCIL (1 << 6)
-#define GFX7_SURFACE_MIN_ARRAY_ELEMENT_SHIFT 18
-#define GFX7_SURFACE_MIN_ARRAY_ELEMENT_MASK INTEL_MASK(28, 18)
-#define GFX7_SURFACE_RENDER_TARGET_VIEW_EXTENT_SHIFT 7
-#define GFX7_SURFACE_RENDER_TARGET_VIEW_EXTENT_MASK INTEL_MASK(17, 7)
-
-/* Surface state DW5 */
-#define BRW_SURFACE_X_OFFSET_SHIFT 25
-#define BRW_SURFACE_X_OFFSET_MASK INTEL_MASK(31, 25)
-#define BRW_SURFACE_VERTICAL_ALIGN_ENABLE (1 << 24)
-#define BRW_SURFACE_Y_OFFSET_SHIFT 20
-#define BRW_SURFACE_Y_OFFSET_MASK INTEL_MASK(23, 20)
-#define GFX7_SURFACE_MIN_LOD_SHIFT 4
-#define GFX7_SURFACE_MIN_LOD_MASK INTEL_MASK(7, 4)
-#define GFX8_SURFACE_Y_OFFSET_SHIFT 21
-#define GFX8_SURFACE_Y_OFFSET_MASK INTEL_MASK(23, 21)
-
-#define GFX9_SURFACE_MIP_TAIL_START_LOD_SHIFT 8
-#define GFX9_SURFACE_MIP_TAIL_START_LOD_MASK INTEL_MASK(11, 8)
-
-/* Surface state DW6 */
-#define GFX7_SURFACE_MCS_ENABLE (1 << 0)
-#define GFX7_SURFACE_MCS_PITCH_SHIFT 3
-#define GFX7_SURFACE_MCS_PITCH_MASK INTEL_MASK(11, 3)
-#define GFX8_SURFACE_AUX_QPITCH_SHIFT 16
-#define GFX8_SURFACE_AUX_QPITCH_MASK INTEL_MASK(30, 16)
-#define GFX8_SURFACE_AUX_PITCH_SHIFT 3
-#define GFX8_SURFACE_AUX_PITCH_MASK INTEL_MASK(11, 3)
-#define GFX8_SURFACE_AUX_MODE_MASK INTEL_MASK(2, 0)
-
-#define GFX8_SURFACE_AUX_MODE_NONE 0
-#define GFX8_SURFACE_AUX_MODE_MCS 1
-#define GFX8_SURFACE_AUX_MODE_APPEND 2
-#define GFX8_SURFACE_AUX_MODE_HIZ 3
-#define GFX9_SURFACE_AUX_MODE_CCS_E 5
-
-/* Surface state DW7 */
-#define GFX9_SURFACE_RT_COMPRESSION_SHIFT 30
-#define GFX9_SURFACE_RT_COMPRESSION_MASK INTEL_MASK(30, 30)
-#define GFX7_SURFACE_CLEAR_COLOR_SHIFT 28
-#define GFX7_SURFACE_SCS_R_SHIFT 25
-#define GFX7_SURFACE_SCS_R_MASK INTEL_MASK(27, 25)
-#define GFX7_SURFACE_SCS_G_SHIFT 22
-#define GFX7_SURFACE_SCS_G_MASK INTEL_MASK(24, 22)
-#define GFX7_SURFACE_SCS_B_SHIFT 19
-#define GFX7_SURFACE_SCS_B_MASK INTEL_MASK(21, 19)
-#define GFX7_SURFACE_SCS_A_SHIFT 16
-#define GFX7_SURFACE_SCS_A_MASK INTEL_MASK(18, 16)
-
-/* The actual swizzle values/what channel to use */
-#define HSW_SCS_ZERO 0
-#define HSW_SCS_ONE 1
-#define HSW_SCS_RED 4
-#define HSW_SCS_GREEN 5
-#define HSW_SCS_BLUE 6
-#define HSW_SCS_ALPHA 7
-
-/* SAMPLER_STATE DW0 */
-#define BRW_SAMPLER_DISABLE (1 << 31)
-#define BRW_SAMPLER_LOD_PRECLAMP_ENABLE (1 << 28)
-#define GFX6_SAMPLER_MIN_MAG_NOT_EQUAL (1 << 27) /* Gfx6 only */
-#define BRW_SAMPLER_BASE_MIPLEVEL_MASK INTEL_MASK(26, 22)
-#define BRW_SAMPLER_BASE_MIPLEVEL_SHIFT 22
-#define BRW_SAMPLER_MIP_FILTER_MASK INTEL_MASK(21, 20)
-#define BRW_SAMPLER_MIP_FILTER_SHIFT 20
-#define BRW_SAMPLER_MAG_FILTER_MASK INTEL_MASK(19, 17)
-#define BRW_SAMPLER_MAG_FILTER_SHIFT 17
-#define BRW_SAMPLER_MIN_FILTER_MASK INTEL_MASK(16, 14)
-#define BRW_SAMPLER_MIN_FILTER_SHIFT 14
-#define GFX4_SAMPLER_LOD_BIAS_MASK INTEL_MASK(13, 3)
-#define GFX4_SAMPLER_LOD_BIAS_SHIFT 3
-#define GFX4_SAMPLER_SHADOW_FUNCTION_MASK INTEL_MASK(2, 0)
-#define GFX4_SAMPLER_SHADOW_FUNCTION_SHIFT 0
-
-#define GFX7_SAMPLER_LOD_BIAS_MASK INTEL_MASK(13, 1)
-#define GFX7_SAMPLER_LOD_BIAS_SHIFT 1
-#define GFX7_SAMPLER_EWA_ANISOTROPIC_ALGORITHM (1 << 0)
-
-/* SAMPLER_STATE DW1 */
-#define GFX4_SAMPLER_MIN_LOD_MASK INTEL_MASK(31, 22)
-#define GFX4_SAMPLER_MIN_LOD_SHIFT 22
-#define GFX4_SAMPLER_MAX_LOD_MASK INTEL_MASK(21, 12)
-#define GFX4_SAMPLER_MAX_LOD_SHIFT 12
-#define GFX4_SAMPLER_CUBE_CONTROL_OVERRIDE (1 << 9)
-/* Wrap modes are in DW1 on Gfx4-6 and DW3 on Gfx7+ */
-#define BRW_SAMPLER_TCX_WRAP_MODE_MASK INTEL_MASK(8, 6)
-#define BRW_SAMPLER_TCX_WRAP_MODE_SHIFT 6
-#define BRW_SAMPLER_TCY_WRAP_MODE_MASK INTEL_MASK(5, 3)
-#define BRW_SAMPLER_TCY_WRAP_MODE_SHIFT 3
-#define BRW_SAMPLER_TCZ_WRAP_MODE_MASK INTEL_MASK(2, 0)
-#define BRW_SAMPLER_TCZ_WRAP_MODE_SHIFT 0
-
-#define GFX7_SAMPLER_MIN_LOD_MASK INTEL_MASK(31, 20)
-#define GFX7_SAMPLER_MIN_LOD_SHIFT 20
-#define GFX7_SAMPLER_MAX_LOD_MASK INTEL_MASK(19, 8)
-#define GFX7_SAMPLER_MAX_LOD_SHIFT 8
-#define GFX7_SAMPLER_SHADOW_FUNCTION_MASK INTEL_MASK(3, 1)
-#define GFX7_SAMPLER_SHADOW_FUNCTION_SHIFT 1
-#define GFX7_SAMPLER_CUBE_CONTROL_OVERRIDE (1 << 0)
-
-/* SAMPLER_STATE DW2 - border color pointer */
-
-/* SAMPLER_STATE DW3 */
-#define BRW_SAMPLER_MAX_ANISOTROPY_MASK INTEL_MASK(21, 19)
-#define BRW_SAMPLER_MAX_ANISOTROPY_SHIFT 19
-#define BRW_SAMPLER_ADDRESS_ROUNDING_MASK INTEL_MASK(18, 13)
-#define BRW_SAMPLER_ADDRESS_ROUNDING_SHIFT 13
-#define GFX7_SAMPLER_NON_NORMALIZED_COORDINATES (1 << 10)
-/* Gfx7+ wrap modes reuse the same BRW_SAMPLER_TC*_WRAP_MODE enums. */
-#define GFX6_SAMPLER_NON_NORMALIZED_COORDINATES (1 << 0)
-
-enum brw_wrap_mode {
- BRW_TEXCOORDMODE_WRAP = 0,
- BRW_TEXCOORDMODE_MIRROR = 1,
- BRW_TEXCOORDMODE_CLAMP = 2,
- BRW_TEXCOORDMODE_CUBE = 3,
- BRW_TEXCOORDMODE_CLAMP_BORDER = 4,
- BRW_TEXCOORDMODE_MIRROR_ONCE = 5,
- GFX8_TEXCOORDMODE_HALF_BORDER = 6,
-};
-
-#define BRW_THREAD_PRIORITY_NORMAL 0
-#define BRW_THREAD_PRIORITY_HIGH 1
-
-#define BRW_TILEWALK_XMAJOR 0
-#define BRW_TILEWALK_YMAJOR 1
-
-#define BRW_VERTEX_SUBPIXEL_PRECISION_8BITS 0
-#define BRW_VERTEX_SUBPIXEL_PRECISION_4BITS 1
-
-
-#define CMD_URB_FENCE 0x6000
-#define CMD_CS_URB_STATE 0x6001
-#define CMD_CONST_BUFFER 0x6002
-
-#define CMD_STATE_BASE_ADDRESS 0x6101
-#define CMD_STATE_SIP 0x6102
-#define CMD_PIPELINE_SELECT_965 0x6104
-#define CMD_PIPELINE_SELECT_GM45 0x6904
-
-#define _3DSTATE_PIPELINED_POINTERS 0x7800
-#define _3DSTATE_BINDING_TABLE_POINTERS 0x7801
-# define GFX6_BINDING_TABLE_MODIFY_VS (1 << 8)
-# define GFX6_BINDING_TABLE_MODIFY_GS (1 << 9)
-# define GFX6_BINDING_TABLE_MODIFY_PS (1 << 12)
-
-#define _3DSTATE_BINDING_TABLE_POINTERS_VS 0x7826 /* GFX7+ */
-#define _3DSTATE_BINDING_TABLE_POINTERS_HS 0x7827 /* GFX7+ */
-#define _3DSTATE_BINDING_TABLE_POINTERS_DS 0x7828 /* GFX7+ */
-#define _3DSTATE_BINDING_TABLE_POINTERS_GS 0x7829 /* GFX7+ */
-#define _3DSTATE_BINDING_TABLE_POINTERS_PS 0x782A /* GFX7+ */
-
-#define _3DSTATE_SAMPLER_STATE_POINTERS 0x7802 /* GFX6+ */
-# define PS_SAMPLER_STATE_CHANGE (1 << 12)
-# define GS_SAMPLER_STATE_CHANGE (1 << 9)
-# define VS_SAMPLER_STATE_CHANGE (1 << 8)
-/* DW1: VS */
-/* DW2: GS */
-/* DW3: PS */
-
-#define _3DSTATE_SAMPLER_STATE_POINTERS_VS 0x782B /* GFX7+ */
-#define _3DSTATE_SAMPLER_STATE_POINTERS_HS 0x782C /* GFX7+ */
-#define _3DSTATE_SAMPLER_STATE_POINTERS_DS 0x782D /* GFX7+ */
-#define _3DSTATE_SAMPLER_STATE_POINTERS_GS 0x782E /* GFX7+ */
-#define _3DSTATE_SAMPLER_STATE_POINTERS_PS 0x782F /* GFX7+ */
-
-#define _3DSTATE_VERTEX_BUFFERS 0x7808
-# define BRW_VB0_INDEX_SHIFT 27
-# define GFX6_VB0_INDEX_SHIFT 26
-# define BRW_VB0_ACCESS_VERTEXDATA (0 << 26)
-# define BRW_VB0_ACCESS_INSTANCEDATA (1 << 26)
-# define GFX6_VB0_ACCESS_VERTEXDATA (0 << 20)
-# define GFX6_VB0_ACCESS_INSTANCEDATA (1 << 20)
-# define GFX7_VB0_ADDRESS_MODIFYENABLE (1 << 14)
-# define BRW_VB0_PITCH_SHIFT 0
-
-#define _3DSTATE_VERTEX_ELEMENTS 0x7809
-# define BRW_VE0_INDEX_SHIFT 27
-# define GFX6_VE0_INDEX_SHIFT 26
-# define BRW_VE0_FORMAT_SHIFT 16
-# define BRW_VE0_VALID (1 << 26)
-# define GFX6_VE0_VALID (1 << 25)
-# define GFX6_VE0_EDGE_FLAG_ENABLE (1 << 15)
-# define BRW_VE0_SRC_OFFSET_SHIFT 0
-# define BRW_VE1_COMPONENT_NOSTORE 0
-# define BRW_VE1_COMPONENT_STORE_SRC 1
-# define BRW_VE1_COMPONENT_STORE_0 2
-# define BRW_VE1_COMPONENT_STORE_1_FLT 3
-# define BRW_VE1_COMPONENT_STORE_1_INT 4
-# define BRW_VE1_COMPONENT_STORE_VID 5
-# define BRW_VE1_COMPONENT_STORE_IID 6
-# define BRW_VE1_COMPONENT_STORE_PID 7
-# define BRW_VE1_COMPONENT_0_SHIFT 28
-# define BRW_VE1_COMPONENT_1_SHIFT 24
-# define BRW_VE1_COMPONENT_2_SHIFT 20
-# define BRW_VE1_COMPONENT_3_SHIFT 16
-# define BRW_VE1_DST_OFFSET_SHIFT 0
-
-#define CMD_INDEX_BUFFER 0x780a
-#define GFX4_3DSTATE_VF_STATISTICS 0x780b
-#define GM45_3DSTATE_VF_STATISTICS 0x680b
-#define _3DSTATE_CC_STATE_POINTERS 0x780e /* GFX6+ */
-#define _3DSTATE_BLEND_STATE_POINTERS 0x7824 /* GFX7+ */
-#define _3DSTATE_DEPTH_STENCIL_STATE_POINTERS 0x7825 /* GFX7+ */
-
-#define _3DSTATE_URB 0x7805 /* GFX6 */
-# define GFX6_URB_VS_SIZE_SHIFT 16
-# define GFX6_URB_VS_ENTRIES_SHIFT 0
-# define GFX6_URB_GS_ENTRIES_SHIFT 8
-# define GFX6_URB_GS_SIZE_SHIFT 0
-
-#define _3DSTATE_VF 0x780c /* GFX7.5+ */
-#define HSW_CUT_INDEX_ENABLE (1 << 8)
-
-#define _3DSTATE_VF_INSTANCING 0x7849 /* GFX8+ */
-# define GFX8_VF_INSTANCING_ENABLE (1 << 8)
-
-#define _3DSTATE_VF_SGVS 0x784a /* GFX8+ */
-# define GFX8_SGVS_ENABLE_INSTANCE_ID (1 << 31)
-# define GFX8_SGVS_INSTANCE_ID_COMPONENT_SHIFT 29
-# define GFX8_SGVS_INSTANCE_ID_ELEMENT_OFFSET_SHIFT 16
-# define GFX8_SGVS_ENABLE_VERTEX_ID (1 << 15)
-# define GFX8_SGVS_VERTEX_ID_COMPONENT_SHIFT 13
-# define GFX8_SGVS_VERTEX_ID_ELEMENT_OFFSET_SHIFT 0
-
-#define _3DSTATE_VF_TOPOLOGY 0x784b /* GFX8+ */
-
-#define _3DSTATE_WM_CHROMAKEY 0x784c /* GFX8+ */
-
-#define _3DSTATE_URB_VS 0x7830 /* GFX7+ */
-#define _3DSTATE_URB_HS 0x7831 /* GFX7+ */
-#define _3DSTATE_URB_DS 0x7832 /* GFX7+ */
-#define _3DSTATE_URB_GS 0x7833 /* GFX7+ */
-# define GFX7_URB_ENTRY_SIZE_SHIFT 16
-# define GFX7_URB_STARTING_ADDRESS_SHIFT 25
-
-#define _3DSTATE_PUSH_CONSTANT_ALLOC_VS 0x7912 /* GFX7+ */
-#define _3DSTATE_PUSH_CONSTANT_ALLOC_HS 0x7913 /* GFX7+ */
-#define _3DSTATE_PUSH_CONSTANT_ALLOC_DS 0x7914 /* GFX7+ */
-#define _3DSTATE_PUSH_CONSTANT_ALLOC_GS 0x7915 /* GFX7+ */
-#define _3DSTATE_PUSH_CONSTANT_ALLOC_PS 0x7916 /* GFX7+ */
-# define GFX7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT 16
-
-#define _3DSTATE_VIEWPORT_STATE_POINTERS 0x780d /* GFX6+ */
-# define GFX6_CC_VIEWPORT_MODIFY (1 << 12)
-# define GFX6_SF_VIEWPORT_MODIFY (1 << 11)
-# define GFX6_CLIP_VIEWPORT_MODIFY (1 << 10)
-# define GFX6_NUM_VIEWPORTS 16
-
-#define _3DSTATE_VIEWPORT_STATE_POINTERS_CC 0x7823 /* GFX7+ */
-#define _3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL 0x7821 /* GFX7+ */
-
-#define _3DSTATE_SCISSOR_STATE_POINTERS 0x780f /* GFX6+ */
-
-#define _3DSTATE_VS 0x7810 /* GFX6+ */
-/* DW2 */
-# define GFX6_VS_SPF_MODE (1 << 31)
-# define GFX6_VS_VECTOR_MASK_ENABLE (1 << 30)
-# define GFX6_VS_SAMPLER_COUNT_SHIFT 27
-# define GFX6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
-# define GFX6_VS_FLOATING_POINT_MODE_IEEE_754 (0 << 16)
-# define GFX6_VS_FLOATING_POINT_MODE_ALT (1 << 16)
-# define HSW_VS_UAV_ACCESS_ENABLE (1 << 12)
-/* DW4 */
-# define GFX6_VS_DISPATCH_START_GRF_SHIFT 20
-# define GFX6_VS_URB_READ_LENGTH_SHIFT 11
-# define GFX6_VS_URB_ENTRY_READ_OFFSET_SHIFT 4
-/* DW5 */
-# define GFX6_VS_MAX_THREADS_SHIFT 25
-# define HSW_VS_MAX_THREADS_SHIFT 23
-# define GFX6_VS_STATISTICS_ENABLE (1 << 10)
-# define GFX6_VS_CACHE_DISABLE (1 << 1)
-# define GFX6_VS_ENABLE (1 << 0)
-/* Gfx8+ DW7 */
-# define GFX8_VS_SIMD8_ENABLE (1 << 2)
-/* Gfx8+ DW8 */
-# define GFX8_VS_URB_ENTRY_OUTPUT_OFFSET_SHIFT 21
-# define GFX8_VS_URB_OUTPUT_LENGTH_SHIFT 16
-# define GFX8_VS_USER_CLIP_DISTANCE_SHIFT 8
-
-#define _3DSTATE_GS 0x7811 /* GFX6+ */
-/* DW2 */
-# define GFX6_GS_SPF_MODE (1 << 31)
-# define GFX6_GS_VECTOR_MASK_ENABLE (1 << 30)
-# define GFX6_GS_SAMPLER_COUNT_SHIFT 27
-# define GFX6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
-# define GFX6_GS_FLOATING_POINT_MODE_IEEE_754 (0 << 16)
-# define GFX6_GS_FLOATING_POINT_MODE_ALT (1 << 16)
-# define HSW_GS_UAV_ACCESS_ENABLE (1 << 12)
-/* DW4 */
-# define GFX7_GS_OUTPUT_VERTEX_SIZE_SHIFT 23
-# define GFX7_GS_OUTPUT_TOPOLOGY_SHIFT 17
-# define GFX6_GS_URB_READ_LENGTH_SHIFT 11
-# define GFX7_GS_INCLUDE_VERTEX_HANDLES (1 << 10)
-# define GFX6_GS_URB_ENTRY_READ_OFFSET_SHIFT 4
-# define GFX6_GS_DISPATCH_START_GRF_SHIFT 0
-/* DW5 */
-# define GFX6_GS_MAX_THREADS_SHIFT 25
-# define HSW_GS_MAX_THREADS_SHIFT 24
-# define IVB_GS_CONTROL_DATA_FORMAT_SHIFT 24
-# define GFX7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT 0
-# define GFX7_GS_CONTROL_DATA_FORMAT_GSCTL_SID 1
-# define GFX7_GS_CONTROL_DATA_HEADER_SIZE_SHIFT 20
-# define GFX7_GS_INSTANCE_CONTROL_SHIFT 15
-# define GFX7_GS_DISPATCH_MODE_SHIFT 11
-# define GFX7_GS_DISPATCH_MODE_MASK INTEL_MASK(12, 11)
-# define GFX6_GS_STATISTICS_ENABLE (1 << 10)
-# define GFX6_GS_SO_STATISTICS_ENABLE (1 << 9)
-# define GFX6_GS_RENDERING_ENABLE (1 << 8)
-# define GFX7_GS_INCLUDE_PRIMITIVE_ID (1 << 4)
-# define GFX7_GS_REORDER_TRAILING (1 << 2)
-# define GFX7_GS_ENABLE (1 << 0)
-/* DW6 */
-# define HSW_GS_CONTROL_DATA_FORMAT_SHIFT 31
-# define GFX6_GS_REORDER (1 << 30)
-# define GFX6_GS_DISCARD_ADJACENCY (1 << 29)
-# define GFX6_GS_SVBI_PAYLOAD_ENABLE (1 << 28)
-# define GFX6_GS_SVBI_POSTINCREMENT_ENABLE (1 << 27)
-# define GFX6_GS_SVBI_POSTINCREMENT_VALUE_SHIFT 16
-# define GFX6_GS_SVBI_POSTINCREMENT_VALUE_MASK INTEL_MASK(25, 16)
-# define GFX6_GS_ENABLE (1 << 15)
-
-/* Gfx8+ DW8 */
-# define GFX8_GS_STATIC_OUTPUT (1 << 30)
-# define GFX8_GS_STATIC_VERTEX_COUNT_SHIFT 16
-# define GFX8_GS_STATIC_VERTEX_COUNT_MASK INTEL_MASK(26, 16)
-
-/* Gfx8+ DW9 */
-# define GFX8_GS_URB_ENTRY_OUTPUT_OFFSET_SHIFT 21
-# define GFX8_GS_URB_OUTPUT_LENGTH_SHIFT 16
-# define GFX8_GS_USER_CLIP_DISTANCE_SHIFT 8
-
-# define BRW_GS_EDGE_INDICATOR_0 (1 << 8)
-# define BRW_GS_EDGE_INDICATOR_1 (1 << 9)
-
-#define _3DSTATE_HS 0x781B /* GFX7+ */
-/* DW1 */
-# define GFX7_HS_SAMPLER_COUNT_MASK INTEL_MASK(29, 27)
-# define GFX7_HS_SAMPLER_COUNT_SHIFT 27
-# define GFX7_HS_BINDING_TABLE_ENTRY_COUNT_MASK INTEL_MASK(25, 18)
-# define GFX7_HS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
-# define GFX7_HS_FLOATING_POINT_MODE_IEEE_754 (0 << 16)
-# define GFX7_HS_FLOATING_POINT_MODE_ALT (1 << 16)
-# define GFX7_HS_MAX_THREADS_SHIFT 0
-/* DW2 */
-# define GFX7_HS_ENABLE (1 << 31)
-# define GFX7_HS_STATISTICS_ENABLE (1 << 29)
-# define GFX8_HS_MAX_THREADS_SHIFT 8
-# define GFX7_HS_INSTANCE_COUNT_MASK INTEL_MASK(3, 0)
-# define GFX7_HS_INSTANCE_COUNT_SHIFT 0
-/* DW5 */
-# define GFX7_HS_SINGLE_PROGRAM_FLOW (1 << 27)
-# define GFX7_HS_VECTOR_MASK_ENABLE (1 << 26)
-# define HSW_HS_ACCESSES_UAV (1 << 25)
-# define GFX7_HS_INCLUDE_VERTEX_HANDLES (1 << 24)
-# define GFX7_HS_DISPATCH_START_GRF_MASK INTEL_MASK(23, 19)
-# define GFX7_HS_DISPATCH_START_GRF_SHIFT 19
-# define GFX7_HS_URB_READ_LENGTH_MASK INTEL_MASK(16, 11)
-# define GFX7_HS_URB_READ_LENGTH_SHIFT 11
-# define GFX7_HS_URB_ENTRY_READ_OFFSET_MASK INTEL_MASK(9, 4)
-# define GFX7_HS_URB_ENTRY_READ_OFFSET_SHIFT 4
-
-#define _3DSTATE_TE 0x781C /* GFX7+ */
-/* DW1 */
-# define GFX7_TE_PARTITIONING_SHIFT 12
-# define GFX7_TE_OUTPUT_TOPOLOGY_SHIFT 8
-# define GFX7_TE_DOMAIN_SHIFT 4
-//# define GFX7_TE_MODE_SW (1 << 1)
-# define GFX7_TE_ENABLE (1 << 0)
-
-#define _3DSTATE_DS 0x781D /* GFX7+ */
-/* DW2 */
-# define GFX7_DS_SINGLE_DOMAIN_POINT_DISPATCH (1 << 31)
-# define GFX7_DS_VECTOR_MASK_ENABLE (1 << 30)
-# define GFX7_DS_SAMPLER_COUNT_MASK INTEL_MASK(29, 27)
-# define GFX7_DS_SAMPLER_COUNT_SHIFT 27
-# define GFX7_DS_BINDING_TABLE_ENTRY_COUNT_MASK INTEL_MASK(25, 18)
-# define GFX7_DS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
-# define GFX7_DS_FLOATING_POINT_MODE_IEEE_754 (0 << 16)
-# define GFX7_DS_FLOATING_POINT_MODE_ALT (1 << 16)
-# define HSW_DS_ACCESSES_UAV (1 << 14)
-/* DW4 */
-# define GFX7_DS_DISPATCH_START_GRF_MASK INTEL_MASK(24, 20)
-# define GFX7_DS_DISPATCH_START_GRF_SHIFT 20
-# define GFX7_DS_URB_READ_LENGTH_MASK INTEL_MASK(17, 11)
-# define GFX7_DS_URB_READ_LENGTH_SHIFT 11
-# define GFX7_DS_URB_ENTRY_READ_OFFSET_MASK INTEL_MASK(9, 4)
-# define GFX7_DS_URB_ENTRY_READ_OFFSET_SHIFT 4
-/* DW5 */
-# define GFX7_DS_MAX_THREADS_SHIFT 25
-# define HSW_DS_MAX_THREADS_SHIFT 21
-# define GFX7_DS_STATISTICS_ENABLE (1 << 10)
-# define GFX7_DS_SIMD8_DISPATCH_ENABLE (1 << 3)
-# define GFX7_DS_COMPUTE_W_COORDINATE_ENABLE (1 << 2)
-# define GFX7_DS_CACHE_DISABLE (1 << 1)
-# define GFX7_DS_ENABLE (1 << 0)
-/* Gfx8+ DW8 */
-# define GFX8_DS_URB_ENTRY_OUTPUT_OFFSET_MASK INTEL_MASK(26, 21)
-# define GFX8_DS_URB_ENTRY_OUTPUT_OFFSET_SHIFT 21
-# define GFX8_DS_URB_OUTPUT_LENGTH_MASK INTEL_MASK(20, 16)
-# define GFX8_DS_URB_OUTPUT_LENGTH_SHIFT 16
-# define GFX8_DS_USER_CLIP_DISTANCE_MASK INTEL_MASK(15, 8)
-# define GFX8_DS_USER_CLIP_DISTANCE_SHIFT 8
-# define GFX8_DS_USER_CULL_DISTANCE_MASK INTEL_MASK(7, 0)
-# define GFX8_DS_USER_CULL_DISTANCE_SHIFT 0
-
-
-#define _3DSTATE_CLIP 0x7812 /* GFX6+ */
-/* DW1 */
-# define GFX7_CLIP_WINDING_CW (0 << 20)
-# define GFX7_CLIP_WINDING_CCW (1 << 20)
-# define GFX7_CLIP_VERTEX_SUBPIXEL_PRECISION_8 (0 << 19)
-# define GFX7_CLIP_VERTEX_SUBPIXEL_PRECISION_4 (1 << 19)
-# define GFX7_CLIP_EARLY_CULL (1 << 18)
-# define GFX8_CLIP_FORCE_USER_CLIP_DISTANCE_BITMASK (1 << 17)
-# define GFX7_CLIP_CULLMODE_BOTH (0 << 16)
-# define GFX7_CLIP_CULLMODE_NONE (1 << 16)
-# define GFX7_CLIP_CULLMODE_FRONT (2 << 16)
-# define GFX7_CLIP_CULLMODE_BACK (3 << 16)
-# define GFX6_CLIP_STATISTICS_ENABLE (1 << 10)
-/**
- * Just does cheap culling based on the clip distance. Bits must be
- * disjoint with USER_CLIP_CLIP_DISTANCE bits.
- */
-# define GFX6_USER_CLIP_CULL_DISTANCES_SHIFT 0
-/* DW2 */
-# define GFX6_CLIP_ENABLE (1 << 31)
-# define GFX6_CLIP_API_OGL (0 << 30)
-# define GFX6_CLIP_API_D3D (1 << 30)
-# define GFX6_CLIP_XY_TEST (1 << 28)
-# define GFX6_CLIP_Z_TEST (1 << 27)
-# define GFX6_CLIP_GB_TEST (1 << 26)
-/** 8-bit field of which user clip distances to clip aganist. */
-# define GFX6_USER_CLIP_CLIP_DISTANCES_SHIFT 16
-# define GFX6_CLIP_MODE_NORMAL (0 << 13)
-# define GFX6_CLIP_MODE_REJECT_ALL (3 << 13)
-# define GFX6_CLIP_MODE_ACCEPT_ALL (4 << 13)
-# define GFX6_CLIP_PERSPECTIVE_DIVIDE_DISABLE (1 << 9)
-# define GFX6_CLIP_NON_PERSPECTIVE_BARYCENTRIC_ENABLE (1 << 8)
-# define GFX6_CLIP_TRI_PROVOKE_SHIFT 4
-# define GFX6_CLIP_LINE_PROVOKE_SHIFT 2
-# define GFX6_CLIP_TRIFAN_PROVOKE_SHIFT 0
-/* DW3 */
-# define GFX6_CLIP_MIN_POINT_WIDTH_SHIFT 17
-# define GFX6_CLIP_MAX_POINT_WIDTH_SHIFT 6
-# define GFX6_CLIP_FORCE_ZERO_RTAINDEX (1 << 5)
-# define GFX6_CLIP_MAX_VP_INDEX_MASK INTEL_MASK(3, 0)
-
-#define _3DSTATE_SF 0x7813 /* GFX6+ */
-/* DW1 (for gfx6) */
-# define GFX6_SF_NUM_OUTPUTS_SHIFT 22
-# define GFX6_SF_SWIZZLE_ENABLE (1 << 21)
-# define GFX6_SF_POINT_SPRITE_UPPERLEFT (0 << 20)
-# define GFX6_SF_POINT_SPRITE_LOWERLEFT (1 << 20)
-# define GFX9_SF_LINE_WIDTH_SHIFT 12 /* U11.7 */
-# define GFX6_SF_URB_ENTRY_READ_LENGTH_SHIFT 11
-# define GFX6_SF_URB_ENTRY_READ_OFFSET_SHIFT 4
-/* DW2 */
-# define GFX6_SF_LEGACY_GLOBAL_DEPTH_BIAS (1 << 11)
-# define GFX6_SF_STATISTICS_ENABLE (1 << 10)
-# define GFX6_SF_GLOBAL_DEPTH_OFFSET_SOLID (1 << 9)
-# define GFX6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME (1 << 8)
-# define GFX6_SF_GLOBAL_DEPTH_OFFSET_POINT (1 << 7)
-# define GFX6_SF_FRONT_SOLID (0 << 5)
-# define GFX6_SF_FRONT_WIREFRAME (1 << 5)
-# define GFX6_SF_FRONT_POINT (2 << 5)
-# define GFX6_SF_BACK_SOLID (0 << 3)
-# define GFX6_SF_BACK_WIREFRAME (1 << 3)
-# define GFX6_SF_BACK_POINT (2 << 3)
-# define GFX6_SF_VIEWPORT_TRANSFORM_ENABLE (1 << 1)
-# define GFX6_SF_WINDING_CCW (1 << 0)
-/* DW3 */
-# define GFX6_SF_LINE_AA_ENABLE (1 << 31)
-# define GFX6_SF_CULL_BOTH (0 << 29)
-# define GFX6_SF_CULL_NONE (1 << 29)
-# define GFX6_SF_CULL_FRONT (2 << 29)
-# define GFX6_SF_CULL_BACK (3 << 29)
-# define GFX6_SF_LINE_WIDTH_SHIFT 18 /* U3.7 */
-# define GFX6_SF_LINE_END_CAP_WIDTH_0_5 (0 << 16)
-# define GFX6_SF_LINE_END_CAP_WIDTH_1_0 (1 << 16)
-# define GFX6_SF_LINE_END_CAP_WIDTH_2_0 (2 << 16)
-# define GFX6_SF_LINE_END_CAP_WIDTH_4_0 (3 << 16)
-# define GFX6_SF_SCISSOR_ENABLE (1 << 11)
-# define GFX6_SF_MSRAST_OFF_PIXEL (0 << 8)
-# define GFX6_SF_MSRAST_OFF_PATTERN (1 << 8)
-# define GFX6_SF_MSRAST_ON_PIXEL (2 << 8)
-# define GFX6_SF_MSRAST_ON_PATTERN (3 << 8)
-/* DW4 */
-# define GFX6_SF_TRI_PROVOKE_SHIFT 29
-# define GFX6_SF_LINE_PROVOKE_SHIFT 27
-# define GFX6_SF_TRIFAN_PROVOKE_SHIFT 25
-# define GFX6_SF_LINE_AA_MODE_MANHATTAN (0 << 14)
-# define GFX6_SF_LINE_AA_MODE_TRUE (1 << 14)
-# define GFX6_SF_VERTEX_SUBPIXEL_8BITS (0 << 12)
-# define GFX6_SF_VERTEX_SUBPIXEL_4BITS (1 << 12)
-# define GFX6_SF_USE_STATE_POINT_WIDTH (1 << 11)
-# define GFX6_SF_POINT_WIDTH_SHIFT 0 /* U8.3 */
-/* DW5: depth offset constant */
-/* DW6: depth offset scale */
-/* DW7: depth offset clamp */
-/* DW8 */
-# define ATTRIBUTE_1_OVERRIDE_W (1 << 31)
-# define ATTRIBUTE_1_OVERRIDE_Z (1 << 30)
-# define ATTRIBUTE_1_OVERRIDE_Y (1 << 29)
-# define ATTRIBUTE_1_OVERRIDE_X (1 << 28)
-# define ATTRIBUTE_1_CONST_SOURCE_SHIFT 25
-# define ATTRIBUTE_1_SWIZZLE_SHIFT 22
-# define ATTRIBUTE_1_SOURCE_SHIFT 16
-# define ATTRIBUTE_0_OVERRIDE_W (1 << 15)
-# define ATTRIBUTE_0_OVERRIDE_Z (1 << 14)
-# define ATTRIBUTE_0_OVERRIDE_Y (1 << 13)
-# define ATTRIBUTE_0_OVERRIDE_X (1 << 12)
-# define ATTRIBUTE_0_CONST_SOURCE_SHIFT 9
-# define ATTRIBUTE_CONST_0000 0
-# define ATTRIBUTE_CONST_0001_FLOAT 1
-# define ATTRIBUTE_CONST_1111_FLOAT 2
-# define ATTRIBUTE_CONST_PRIM_ID 3
-# define ATTRIBUTE_0_SWIZZLE_SHIFT 6
-# define ATTRIBUTE_0_SOURCE_SHIFT 0
-
-# define ATTRIBUTE_SWIZZLE_INPUTATTR 0
-# define ATTRIBUTE_SWIZZLE_INPUTATTR_FACING 1
-# define ATTRIBUTE_SWIZZLE_INPUTATTR_W 2
-# define ATTRIBUTE_SWIZZLE_INPUTATTR_FACING_W 3
-# define ATTRIBUTE_SWIZZLE_SHIFT 6
-
-/* DW16: Point sprite texture coordinate enables */
-/* DW17: Constant interpolation enables */
-/* DW18: attr 0-7 wrap shortest enables */
-/* DW19: attr 8-16 wrap shortest enables */
-
-/* On GFX7, many fields of 3DSTATE_SF were split out into a new command:
- * 3DSTATE_SBE. The remaining fields live in different DWords, but retain
- * the same bit-offset. The only new field:
- */
-/* GFX7/DW1: */
-# define GFX7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT 12
-/* GFX7/DW2: */
-# define HSW_SF_LINE_STIPPLE_ENABLE (1 << 14)
-
-# define GFX8_SF_SMOOTH_POINT_ENABLE (1 << 13)
-
-#define _3DSTATE_SBE 0x781F /* GFX7+ */
-/* DW1 */
-# define GFX8_SBE_FORCE_URB_ENTRY_READ_LENGTH (1 << 29)
-# define GFX8_SBE_FORCE_URB_ENTRY_READ_OFFSET (1 << 28)
-# define GFX7_SBE_SWIZZLE_CONTROL_MODE (1 << 28)
-# define GFX7_SBE_NUM_OUTPUTS_SHIFT 22
-# define GFX7_SBE_SWIZZLE_ENABLE (1 << 21)
-# define GFX7_SBE_POINT_SPRITE_LOWERLEFT (1 << 20)
-# define GFX7_SBE_URB_ENTRY_READ_LENGTH_SHIFT 11
-# define GFX7_SBE_URB_ENTRY_READ_OFFSET_SHIFT 4
-# define GFX8_SBE_URB_ENTRY_READ_OFFSET_SHIFT 5
-/* DW2-9: Attribute setup (same as DW8-15 of gfx6 _3DSTATE_SF) */
-/* DW10: Point sprite texture coordinate enables */
-/* DW11: Constant interpolation enables */
-/* DW12: attr 0-7 wrap shortest enables */
-/* DW13: attr 8-16 wrap shortest enables */
-
-/* DW4-5: Attribute active components (gfx9) */
-#define GFX9_SBE_ACTIVE_COMPONENT_NONE 0
-#define GFX9_SBE_ACTIVE_COMPONENT_XY 1
-#define GFX9_SBE_ACTIVE_COMPONENT_XYZ 2
-#define GFX9_SBE_ACTIVE_COMPONENT_XYZW 3
-
-#define _3DSTATE_SBE_SWIZ 0x7851 /* GFX8+ */
-
-#define _3DSTATE_RASTER 0x7850 /* GFX8+ */
-/* DW1 */
-# define GFX9_RASTER_VIEWPORT_Z_FAR_CLIP_TEST_ENABLE (1 << 26)
-# define GFX9_RASTER_CONSERVATIVE_RASTERIZATION_ENABLE (1 << 24)
-# define GFX8_RASTER_FRONT_WINDING_CCW (1 << 21)
-# define GFX8_RASTER_CULL_BOTH (0 << 16)
-# define GFX8_RASTER_CULL_NONE (1 << 16)
-# define GFX8_RASTER_CULL_FRONT (2 << 16)
-# define GFX8_RASTER_CULL_BACK (3 << 16)
-# define GFX8_RASTER_SMOOTH_POINT_ENABLE (1 << 13)
-# define GFX8_RASTER_API_MULTISAMPLE_ENABLE (1 << 12)
-# define GFX8_RASTER_LINE_AA_ENABLE (1 << 2)
-# define GFX8_RASTER_SCISSOR_ENABLE (1 << 1)
-# define GFX8_RASTER_VIEWPORT_Z_CLIP_TEST_ENABLE (1 << 0)
-# define GFX9_RASTER_VIEWPORT_Z_NEAR_CLIP_TEST_ENABLE (1 << 0)
-
-/* Gfx8 BLEND_STATE */
-/* DW0 */
-#define GFX8_BLEND_ALPHA_TO_COVERAGE_ENABLE (1 << 31)
-#define GFX8_BLEND_INDEPENDENT_ALPHA_BLEND_ENABLE (1 << 30)
-#define GFX8_BLEND_ALPHA_TO_ONE_ENABLE (1 << 29)
-#define GFX8_BLEND_ALPHA_TO_COVERAGE_DITHER_ENABLE (1 << 28)
-#define GFX8_BLEND_ALPHA_TEST_ENABLE (1 << 27)
-#define GFX8_BLEND_ALPHA_TEST_FUNCTION_MASK INTEL_MASK(26, 24)
-#define GFX8_BLEND_ALPHA_TEST_FUNCTION_SHIFT 24
-#define GFX8_BLEND_COLOR_DITHER_ENABLE (1 << 23)
-#define GFX8_BLEND_X_DITHER_OFFSET_MASK INTEL_MASK(22, 21)
-#define GFX8_BLEND_X_DITHER_OFFSET_SHIFT 21
-#define GFX8_BLEND_Y_DITHER_OFFSET_MASK INTEL_MASK(20, 19)
-#define GFX8_BLEND_Y_DITHER_OFFSET_SHIFT 19
-/* DW1 + 2n */
-#define GFX8_BLEND_COLOR_BUFFER_BLEND_ENABLE (1 << 31)
-#define GFX8_BLEND_SRC_BLEND_FACTOR_MASK INTEL_MASK(30, 26)
-#define GFX8_BLEND_SRC_BLEND_FACTOR_SHIFT 26
-#define GFX8_BLEND_DST_BLEND_FACTOR_MASK INTEL_MASK(25, 21)
-#define GFX8_BLEND_DST_BLEND_FACTOR_SHIFT 21
-#define GFX8_BLEND_COLOR_BLEND_FUNCTION_MASK INTEL_MASK(20, 18)
-#define GFX8_BLEND_COLOR_BLEND_FUNCTION_SHIFT 18
-#define GFX8_BLEND_SRC_ALPHA_BLEND_FACTOR_MASK INTEL_MASK(17, 13)
-#define GFX8_BLEND_SRC_ALPHA_BLEND_FACTOR_SHIFT 13
-#define GFX8_BLEND_DST_ALPHA_BLEND_FACTOR_MASK INTEL_MASK(12, 8)
-#define GFX8_BLEND_DST_ALPHA_BLEND_FACTOR_SHIFT 8
-#define GFX8_BLEND_ALPHA_BLEND_FUNCTION_MASK INTEL_MASK(7, 5)
-#define GFX8_BLEND_ALPHA_BLEND_FUNCTION_SHIFT 5
-#define GFX8_BLEND_WRITE_DISABLE_ALPHA (1 << 3)
-#define GFX8_BLEND_WRITE_DISABLE_RED (1 << 2)
-#define GFX8_BLEND_WRITE_DISABLE_GREEN (1 << 1)
-#define GFX8_BLEND_WRITE_DISABLE_BLUE (1 << 0)
-/* DW1 + 2n + 1 */
-#define GFX8_BLEND_LOGIC_OP_ENABLE (1 << 31)
-#define GFX8_BLEND_LOGIC_OP_FUNCTION_MASK INTEL_MASK(30, 27)
-#define GFX8_BLEND_LOGIC_OP_FUNCTION_SHIFT 27
-#define GFX8_BLEND_PRE_BLEND_SRC_ONLY_CLAMP_ENABLE (1 << 4)
-#define GFX8_BLEND_COLOR_CLAMP_RANGE_RTFORMAT (2 << 2)
-#define GFX8_BLEND_PRE_BLEND_COLOR_CLAMP_ENABLE (1 << 1)
-#define GFX8_BLEND_POST_BLEND_COLOR_CLAMP_ENABLE (1 << 0)
-
-#define _3DSTATE_WM_HZ_OP 0x7852 /* GFX8+ */
-/* DW1 */
-# define GFX8_WM_HZ_STENCIL_CLEAR (1 << 31)
-# define GFX8_WM_HZ_DEPTH_CLEAR (1 << 30)
-# define GFX8_WM_HZ_DEPTH_RESOLVE (1 << 28)
-# define GFX8_WM_HZ_HIZ_RESOLVE (1 << 27)
-# define GFX8_WM_HZ_PIXEL_OFFSET_ENABLE (1 << 26)
-# define GFX8_WM_HZ_FULL_SURFACE_DEPTH_CLEAR (1 << 25)
-# define GFX8_WM_HZ_STENCIL_CLEAR_VALUE_MASK INTEL_MASK(23, 16)
-# define GFX8_WM_HZ_STENCIL_CLEAR_VALUE_SHIFT 16
-# define GFX8_WM_HZ_NUM_SAMPLES_MASK INTEL_MASK(15, 13)
-# define GFX8_WM_HZ_NUM_SAMPLES_SHIFT 13
-/* DW2 */
-# define GFX8_WM_HZ_CLEAR_RECTANGLE_Y_MIN_MASK INTEL_MASK(31, 16)
-# define GFX8_WM_HZ_CLEAR_RECTANGLE_Y_MIN_SHIFT 16
-# define GFX8_WM_HZ_CLEAR_RECTANGLE_X_MIN_MASK INTEL_MASK(15, 0)
-# define GFX8_WM_HZ_CLEAR_RECTANGLE_X_MIN_SHIFT 0
-/* DW3 */
-# define GFX8_WM_HZ_CLEAR_RECTANGLE_Y_MAX_MASK INTEL_MASK(31, 16)
-# define GFX8_WM_HZ_CLEAR_RECTANGLE_Y_MAX_SHIFT 16
-# define GFX8_WM_HZ_CLEAR_RECTANGLE_X_MAX_MASK INTEL_MASK(15, 0)
-# define GFX8_WM_HZ_CLEAR_RECTANGLE_X_MAX_SHIFT 0
-/* DW4 */
-# define GFX8_WM_HZ_SAMPLE_MASK_MASK INTEL_MASK(15, 0)
-# define GFX8_WM_HZ_SAMPLE_MASK_SHIFT 0
-
-
-#define _3DSTATE_PS_BLEND 0x784D /* GFX8+ */
-/* DW1 */
-# define GFX8_PS_BLEND_ALPHA_TO_COVERAGE_ENABLE (1 << 31)
-# define GFX8_PS_BLEND_HAS_WRITEABLE_RT (1 << 30)
-# define GFX8_PS_BLEND_COLOR_BUFFER_BLEND_ENABLE (1 << 29)
-# define GFX8_PS_BLEND_SRC_ALPHA_BLEND_FACTOR_MASK INTEL_MASK(28, 24)
-# define GFX8_PS_BLEND_SRC_ALPHA_BLEND_FACTOR_SHIFT 24
-# define GFX8_PS_BLEND_DST_ALPHA_BLEND_FACTOR_MASK INTEL_MASK(23, 19)
-# define GFX8_PS_BLEND_DST_ALPHA_BLEND_FACTOR_SHIFT 19
-# define GFX8_PS_BLEND_SRC_BLEND_FACTOR_MASK INTEL_MASK(18, 14)
-# define GFX8_PS_BLEND_SRC_BLEND_FACTOR_SHIFT 14
-# define GFX8_PS_BLEND_DST_BLEND_FACTOR_MASK INTEL_MASK(13, 9)
-# define GFX8_PS_BLEND_DST_BLEND_FACTOR_SHIFT 9
-# define GFX8_PS_BLEND_ALPHA_TEST_ENABLE (1 << 8)
-# define GFX8_PS_BLEND_INDEPENDENT_ALPHA_BLEND_ENABLE (1 << 7)
-
-#define _3DSTATE_WM_DEPTH_STENCIL 0x784E /* GFX8+ */
-/* DW1 */
-# define GFX8_WM_DS_STENCIL_FAIL_OP_SHIFT 29
-# define GFX8_WM_DS_Z_FAIL_OP_SHIFT 26
-# define GFX8_WM_DS_Z_PASS_OP_SHIFT 23
-# define GFX8_WM_DS_BF_STENCIL_FUNC_SHIFT 20
-# define GFX8_WM_DS_BF_STENCIL_FAIL_OP_SHIFT 17
-# define GFX8_WM_DS_BF_Z_FAIL_OP_SHIFT 14
-# define GFX8_WM_DS_BF_Z_PASS_OP_SHIFT 11
-# define GFX8_WM_DS_STENCIL_FUNC_SHIFT 8
-# define GFX8_WM_DS_DEPTH_FUNC_SHIFT 5
-# define GFX8_WM_DS_DOUBLE_SIDED_STENCIL_ENABLE (1 << 4)
-# define GFX8_WM_DS_STENCIL_TEST_ENABLE (1 << 3)
-# define GFX8_WM_DS_STENCIL_BUFFER_WRITE_ENABLE (1 << 2)
-# define GFX8_WM_DS_DEPTH_TEST_ENABLE (1 << 1)
-# define GFX8_WM_DS_DEPTH_BUFFER_WRITE_ENABLE (1 << 0)
-/* DW2 */
-# define GFX8_WM_DS_STENCIL_TEST_MASK_MASK INTEL_MASK(31, 24)
-# define GFX8_WM_DS_STENCIL_TEST_MASK_SHIFT 24
-# define GFX8_WM_DS_STENCIL_WRITE_MASK_MASK INTEL_MASK(23, 16)
-# define GFX8_WM_DS_STENCIL_WRITE_MASK_SHIFT 16
-# define GFX8_WM_DS_BF_STENCIL_TEST_MASK_MASK INTEL_MASK(15, 8)
-# define GFX8_WM_DS_BF_STENCIL_TEST_MASK_SHIFT 8
-# define GFX8_WM_DS_BF_STENCIL_WRITE_MASK_MASK INTEL_MASK(7, 0)
-# define GFX8_WM_DS_BF_STENCIL_WRITE_MASK_SHIFT 0
-/* DW3 */
-# define GFX9_WM_DS_STENCIL_REF_MASK INTEL_MASK(15, 8)
-# define GFX9_WM_DS_STENCIL_REF_SHIFT 8
-# define GFX9_WM_DS_BF_STENCIL_REF_MASK INTEL_MASK(7, 0)
-# define GFX9_WM_DS_BF_STENCIL_REF_SHIFT 0
-
-enum brw_pixel_shader_coverage_mask_mode {
- BRW_PSICMS_OFF = 0, /* PS does not use input coverage masks. */
- BRW_PSICMS_NORMAL = 1, /* Input Coverage masks based on outer conservatism
- * and factors in SAMPLE_MASK. If Pixel is
- * conservatively covered, all samples are enabled.
- */
-
- BRW_PSICMS_INNER = 2, /* Input Coverage masks based on inner conservatism
- * and factors in SAMPLE_MASK. If Pixel is
- * conservatively *FULLY* covered, all samples are
- * enabled.
- */
- BRW_PCICMS_DEPTH = 3,
-};
-
-#define _3DSTATE_PS_EXTRA 0x784F /* GFX8+ */
-/* DW1 */
-# define GFX8_PSX_PIXEL_SHADER_VALID (1 << 31)
-# define GFX8_PSX_PIXEL_SHADER_NO_RT_WRITE (1 << 30)
-# define GFX8_PSX_OMASK_TO_RENDER_TARGET (1 << 29)
-# define GFX8_PSX_KILL_ENABLE (1 << 28)
-# define GFX8_PSX_COMPUTED_DEPTH_MODE_SHIFT 26
-# define GFX8_PSX_FORCE_COMPUTED_DEPTH (1 << 25)
-# define GFX8_PSX_USES_SOURCE_DEPTH (1 << 24)
-# define GFX8_PSX_USES_SOURCE_W (1 << 23)
-# define GFX8_PSX_ATTRIBUTE_ENABLE (1 << 8)
-# define GFX8_PSX_SHADER_DISABLES_ALPHA_TO_COVERAGE (1 << 7)
-# define GFX8_PSX_SHADER_IS_PER_SAMPLE (1 << 6)
-# define GFX9_PSX_SHADER_COMPUTES_STENCIL (1 << 5)
-# define GFX9_PSX_SHADER_PULLS_BARY (1 << 3)
-# define GFX8_PSX_SHADER_HAS_UAV (1 << 2)
-# define GFX8_PSX_SHADER_USES_INPUT_COVERAGE_MASK (1 << 1)
-# define GFX9_PSX_SHADER_NORMAL_COVERAGE_MASK_SHIFT 0
-
-#define _3DSTATE_WM 0x7814 /* GFX6+ */
-/* DW1: kernel pointer */
-/* DW2 */
-# define GFX6_WM_SPF_MODE (1 << 31)
-# define GFX6_WM_VECTOR_MASK_ENABLE (1 << 30)
-# define GFX6_WM_SAMPLER_COUNT_SHIFT 27
-# define GFX6_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
-# define GFX6_WM_FLOATING_POINT_MODE_IEEE_754 (0 << 16)
-# define GFX6_WM_FLOATING_POINT_MODE_ALT (1 << 16)
-/* DW3: scratch space */
-/* DW4 */
-# define GFX6_WM_STATISTICS_ENABLE (1 << 31)
-# define GFX6_WM_DEPTH_CLEAR (1 << 30)
-# define GFX6_WM_DEPTH_RESOLVE (1 << 28)
-# define GFX6_WM_HIERARCHICAL_DEPTH_RESOLVE (1 << 27)
-# define GFX6_WM_DISPATCH_START_GRF_SHIFT_0 16
-# define GFX6_WM_DISPATCH_START_GRF_SHIFT_1 8
-# define GFX6_WM_DISPATCH_START_GRF_SHIFT_2 0
-/* DW5 */
-# define GFX6_WM_MAX_THREADS_SHIFT 25
-# define GFX6_WM_KILL_ENABLE (1 << 22)
-# define GFX6_WM_COMPUTED_DEPTH (1 << 21)
-# define GFX6_WM_USES_SOURCE_DEPTH (1 << 20)
-# define GFX6_WM_DISPATCH_ENABLE (1 << 19)
-# define GFX6_WM_LINE_END_CAP_AA_WIDTH_0_5 (0 << 16)
-# define GFX6_WM_LINE_END_CAP_AA_WIDTH_1_0 (1 << 16)
-# define GFX6_WM_LINE_END_CAP_AA_WIDTH_2_0 (2 << 16)
-# define GFX6_WM_LINE_END_CAP_AA_WIDTH_4_0 (3 << 16)
-# define GFX6_WM_LINE_AA_WIDTH_0_5 (0 << 14)
-# define GFX6_WM_LINE_AA_WIDTH_1_0 (1 << 14)
-# define GFX6_WM_LINE_AA_WIDTH_2_0 (2 << 14)
-# define GFX6_WM_LINE_AA_WIDTH_4_0 (3 << 14)
-# define GFX6_WM_POLYGON_STIPPLE_ENABLE (1 << 13)
-# define GFX6_WM_LINE_STIPPLE_ENABLE (1 << 11)
-# define GFX6_WM_OMASK_TO_RENDER_TARGET (1 << 9)
-# define GFX6_WM_USES_SOURCE_W (1 << 8)
-# define GFX6_WM_DUAL_SOURCE_BLEND_ENABLE (1 << 7)
-# define GFX6_WM_32_DISPATCH_ENABLE (1 << 2)
-# define GFX6_WM_16_DISPATCH_ENABLE (1 << 1)
-# define GFX6_WM_8_DISPATCH_ENABLE (1 << 0)
-/* DW6 */
-# define GFX6_WM_NUM_SF_OUTPUTS_SHIFT 20
-# define GFX6_WM_POSOFFSET_NONE (0 << 18)
-# define GFX6_WM_POSOFFSET_CENTROID (2 << 18)
-# define GFX6_WM_POSOFFSET_SAMPLE (3 << 18)
-# define GFX6_WM_POSITION_ZW_PIXEL (0 << 16)
-# define GFX6_WM_POSITION_ZW_CENTROID (2 << 16)
-# define GFX6_WM_POSITION_ZW_SAMPLE (3 << 16)
-# define GFX6_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 15)
-# define GFX6_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC (1 << 14)
-# define GFX6_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC (1 << 13)
-# define GFX6_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 12)
-# define GFX6_WM_PERSPECTIVE_CENTROID_BARYCENTRIC (1 << 11)
-# define GFX6_WM_PERSPECTIVE_PIXEL_BARYCENTRIC (1 << 10)
-# define GFX6_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT 10
-# define GFX6_WM_POINT_RASTRULE_UPPER_RIGHT (1 << 9)
-# define GFX6_WM_MSRAST_OFF_PIXEL (0 << 1)
-# define GFX6_WM_MSRAST_OFF_PATTERN (1 << 1)
-# define GFX6_WM_MSRAST_ON_PIXEL (2 << 1)
-# define GFX6_WM_MSRAST_ON_PATTERN (3 << 1)
-# define GFX6_WM_MSDISPMODE_PERSAMPLE (0 << 0)
-# define GFX6_WM_MSDISPMODE_PERPIXEL (1 << 0)
-/* DW7: kernel 1 pointer */
-/* DW8: kernel 2 pointer */
-
-#define _3DSTATE_CONSTANT_VS 0x7815 /* GFX6+ */
-#define _3DSTATE_CONSTANT_GS 0x7816 /* GFX6+ */
-#define _3DSTATE_CONSTANT_PS 0x7817 /* GFX6+ */
-# define GFX6_CONSTANT_BUFFER_3_ENABLE (1 << 15)
-# define GFX6_CONSTANT_BUFFER_2_ENABLE (1 << 14)
-# define GFX6_CONSTANT_BUFFER_1_ENABLE (1 << 13)
-# define GFX6_CONSTANT_BUFFER_0_ENABLE (1 << 12)
-
-#define _3DSTATE_CONSTANT_HS 0x7819 /* GFX7+ */
-#define _3DSTATE_CONSTANT_DS 0x781A /* GFX7+ */
-
-#define _3DSTATE_STREAMOUT 0x781e /* GFX7+ */
-/* DW1 */
-# define SO_FUNCTION_ENABLE (1 << 31)
-# define SO_RENDERING_DISABLE (1 << 30)
-/* This selects which incoming rendering stream goes down the pipeline. The
- * rendering stream is 0 if not defined by special cases in the GS state.
- */
-# define SO_RENDER_STREAM_SELECT_SHIFT 27
-# define SO_RENDER_STREAM_SELECT_MASK INTEL_MASK(28, 27)
-/* Controls reordering of TRISTRIP_* elements in stream output (not rendering).
- */
-# define SO_REORDER_TRAILING (1 << 26)
-/* Controls SO_NUM_PRIMS_WRITTEN_* and SO_PRIM_STORAGE_* */
-# define SO_STATISTICS_ENABLE (1 << 25)
-# define SO_BUFFER_ENABLE(n) (1 << (8 + (n)))
-/* DW2 */
-# define SO_STREAM_3_VERTEX_READ_OFFSET_SHIFT 29
-# define SO_STREAM_3_VERTEX_READ_OFFSET_MASK INTEL_MASK(29, 29)
-# define SO_STREAM_3_VERTEX_READ_LENGTH_SHIFT 24
-# define SO_STREAM_3_VERTEX_READ_LENGTH_MASK INTEL_MASK(28, 24)
-# define SO_STREAM_2_VERTEX_READ_OFFSET_SHIFT 21
-# define SO_STREAM_2_VERTEX_READ_OFFSET_MASK INTEL_MASK(21, 21)
-# define SO_STREAM_2_VERTEX_READ_LENGTH_SHIFT 16
-# define SO_STREAM_2_VERTEX_READ_LENGTH_MASK INTEL_MASK(20, 16)
-# define SO_STREAM_1_VERTEX_READ_OFFSET_SHIFT 13
-# define SO_STREAM_1_VERTEX_READ_OFFSET_MASK INTEL_MASK(13, 13)
-# define SO_STREAM_1_VERTEX_READ_LENGTH_SHIFT 8
-# define SO_STREAM_1_VERTEX_READ_LENGTH_MASK INTEL_MASK(12, 8)
-# define SO_STREAM_0_VERTEX_READ_OFFSET_SHIFT 5
-# define SO_STREAM_0_VERTEX_READ_OFFSET_MASK INTEL_MASK(5, 5)
-# define SO_STREAM_0_VERTEX_READ_LENGTH_SHIFT 0
-# define SO_STREAM_0_VERTEX_READ_LENGTH_MASK INTEL_MASK(4, 0)
-
-/* 3DSTATE_WM for Gfx7 */
-/* DW1 */
-# define GFX7_WM_STATISTICS_ENABLE (1 << 31)
-# define GFX7_WM_DEPTH_CLEAR (1 << 30)
-# define GFX7_WM_DISPATCH_ENABLE (1 << 29)
-# define GFX7_WM_DEPTH_RESOLVE (1 << 28)
-# define GFX7_WM_HIERARCHICAL_DEPTH_RESOLVE (1 << 27)
-# define GFX7_WM_KILL_ENABLE (1 << 25)
-# define GFX7_WM_COMPUTED_DEPTH_MODE_SHIFT 23
-# define GFX7_WM_USES_SOURCE_DEPTH (1 << 20)
-# define GFX7_WM_EARLY_DS_CONTROL_NORMAL (0 << 21)
-# define GFX7_WM_EARLY_DS_CONTROL_PSEXEC (1 << 21)
-# define GFX7_WM_EARLY_DS_CONTROL_PREPS (2 << 21)
-# define GFX7_WM_USES_SOURCE_W (1 << 19)
-# define GFX7_WM_POSITION_ZW_PIXEL (0 << 17)
-# define GFX7_WM_POSITION_ZW_CENTROID (2 << 17)
-# define GFX7_WM_POSITION_ZW_SAMPLE (3 << 17)
-# define GFX7_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT 11
-# define GFX7_WM_USES_INPUT_COVERAGE_MASK (1 << 10)
-# define GFX7_WM_LINE_END_CAP_AA_WIDTH_0_5 (0 << 8)
-# define GFX7_WM_LINE_END_CAP_AA_WIDTH_1_0 (1 << 8)
-# define GFX7_WM_LINE_END_CAP_AA_WIDTH_2_0 (2 << 8)
-# define GFX7_WM_LINE_END_CAP_AA_WIDTH_4_0 (3 << 8)
-# define GFX7_WM_LINE_AA_WIDTH_0_5 (0 << 6)
-# define GFX7_WM_LINE_AA_WIDTH_1_0 (1 << 6)
-# define GFX7_WM_LINE_AA_WIDTH_2_0 (2 << 6)
-# define GFX7_WM_LINE_AA_WIDTH_4_0 (3 << 6)
-# define GFX7_WM_POLYGON_STIPPLE_ENABLE (1 << 4)
-# define GFX7_WM_LINE_STIPPLE_ENABLE (1 << 3)
-# define GFX7_WM_POINT_RASTRULE_UPPER_RIGHT (1 << 2)
-# define GFX7_WM_MSRAST_OFF_PIXEL (0 << 0)
-# define GFX7_WM_MSRAST_OFF_PATTERN (1 << 0)
-# define GFX7_WM_MSRAST_ON_PIXEL (2 << 0)
-# define GFX7_WM_MSRAST_ON_PATTERN (3 << 0)
-/* DW2 */
-# define GFX7_WM_MSDISPMODE_PERSAMPLE (0 << 31)
-# define GFX7_WM_MSDISPMODE_PERPIXEL (1 << 31)
-# define HSW_WM_UAV_ONLY (1 << 30)
-
-#define _3DSTATE_PS 0x7820 /* GFX7+ */
-/* DW1: kernel pointer */
-/* DW2 */
-# define GFX7_PS_SPF_MODE (1 << 31)
-# define GFX7_PS_VECTOR_MASK_ENABLE (1 << 30)
-# define GFX7_PS_SAMPLER_COUNT_SHIFT 27
-# define GFX7_PS_SAMPLER_COUNT_MASK INTEL_MASK(29, 27)
-# define GFX7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
-# define GFX7_PS_FLOATING_POINT_MODE_IEEE_754 (0 << 16)
-# define GFX7_PS_FLOATING_POINT_MODE_ALT (1 << 16)
-/* DW3: scratch space */
-/* DW4 */
-# define IVB_PS_MAX_THREADS_SHIFT 24
-# define HSW_PS_MAX_THREADS_SHIFT 23
-# define HSW_PS_SAMPLE_MASK_SHIFT 12
-# define HSW_PS_SAMPLE_MASK_MASK INTEL_MASK(19, 12)
-# define GFX7_PS_PUSH_CONSTANT_ENABLE (1 << 11)
-# define GFX7_PS_ATTRIBUTE_ENABLE (1 << 10)
-# define GFX7_PS_OMASK_TO_RENDER_TARGET (1 << 9)
-# define GFX7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE (1 << 8)
-# define GFX7_PS_DUAL_SOURCE_BLEND_ENABLE (1 << 7)
-# define GFX7_PS_RENDER_TARGET_RESOLVE_ENABLE (1 << 6)
-# define GFX9_PS_RENDER_TARGET_RESOLVE_FULL (3 << 6)
-# define HSW_PS_UAV_ACCESS_ENABLE (1 << 5)
-# define GFX7_PS_POSOFFSET_NONE (0 << 3)
-# define GFX7_PS_POSOFFSET_CENTROID (2 << 3)
-# define GFX7_PS_POSOFFSET_SAMPLE (3 << 3)
-# define GFX7_PS_32_DISPATCH_ENABLE (1 << 2)
-# define GFX7_PS_16_DISPATCH_ENABLE (1 << 1)
-# define GFX7_PS_8_DISPATCH_ENABLE (1 << 0)
-/* DW5 */
-# define GFX7_PS_DISPATCH_START_GRF_SHIFT_0 16
-# define GFX7_PS_DISPATCH_START_GRF_SHIFT_1 8
-# define GFX7_PS_DISPATCH_START_GRF_SHIFT_2 0
-/* DW6: kernel 1 pointer */
-/* DW7: kernel 2 pointer */
-
-#define _3DSTATE_SAMPLE_MASK 0x7818 /* GFX6+ */
-
-#define _3DSTATE_DRAWING_RECTANGLE 0x7900
-#define _3DSTATE_BLEND_CONSTANT_COLOR 0x7901
-#define _3DSTATE_CHROMA_KEY 0x7904
-#define _3DSTATE_DEPTH_BUFFER 0x7905 /* GFX4-6 */
-#define _3DSTATE_POLY_STIPPLE_OFFSET 0x7906
-#define _3DSTATE_POLY_STIPPLE_PATTERN 0x7907
-#define _3DSTATE_LINE_STIPPLE_PATTERN 0x7908
-#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP 0x7909
-#define _3DSTATE_AA_LINE_PARAMETERS 0x790a /* G45+ */
-
-#define _3DSTATE_GS_SVB_INDEX 0x790b /* CTG+ */
-/* DW1 */
-# define SVB_INDEX_SHIFT 29
-# define SVB_LOAD_INTERNAL_VERTEX_COUNT (1 << 0) /* SNB+ */
-/* DW2: SVB index */
-/* DW3: SVB maximum index */
-
-#define _3DSTATE_MULTISAMPLE 0x790d /* GFX6+ */
-#define GFX8_3DSTATE_MULTISAMPLE 0x780d /* GFX8+ */
-/* DW1 */
-# define MS_PIXEL_LOCATION_CENTER (0 << 4)
-# define MS_PIXEL_LOCATION_UPPER_LEFT (1 << 4)
-# define MS_NUMSAMPLES_1 (0 << 1)
-# define MS_NUMSAMPLES_2 (1 << 1)
-# define MS_NUMSAMPLES_4 (2 << 1)
-# define MS_NUMSAMPLES_8 (3 << 1)
-# define MS_NUMSAMPLES_16 (4 << 1)
-
-#define _3DSTATE_SAMPLE_PATTERN 0x791c
-
-#define _3DSTATE_STENCIL_BUFFER 0x790e /* ILK, SNB */
-#define _3DSTATE_HIER_DEPTH_BUFFER 0x790f /* ILK, SNB */
-
-#define GFX7_3DSTATE_CLEAR_PARAMS 0x7804
-#define GFX7_3DSTATE_DEPTH_BUFFER 0x7805
-#define GFX7_3DSTATE_STENCIL_BUFFER 0x7806
-# define HSW_STENCIL_ENABLED (1 << 31)
-#define GFX7_3DSTATE_HIER_DEPTH_BUFFER 0x7807
-
-#define _3DSTATE_CLEAR_PARAMS 0x7910 /* ILK, SNB */
-# define GFX5_DEPTH_CLEAR_VALID (1 << 15)
-/* DW1: depth clear value */
-/* DW2 */
-# define GFX7_DEPTH_CLEAR_VALID (1 << 0)
-
-#define _3DSTATE_SO_DECL_LIST 0x7917 /* GFX7+ */
-/* DW1 */
-# define SO_STREAM_TO_BUFFER_SELECTS_3_SHIFT 12
-# define SO_STREAM_TO_BUFFER_SELECTS_3_MASK INTEL_MASK(15, 12)
-# define SO_STREAM_TO_BUFFER_SELECTS_2_SHIFT 8
-# define SO_STREAM_TO_BUFFER_SELECTS_2_MASK INTEL_MASK(11, 8)
-# define SO_STREAM_TO_BUFFER_SELECTS_1_SHIFT 4
-# define SO_STREAM_TO_BUFFER_SELECTS_1_MASK INTEL_MASK(7, 4)
-# define SO_STREAM_TO_BUFFER_SELECTS_0_SHIFT 0
-# define SO_STREAM_TO_BUFFER_SELECTS_0_MASK INTEL_MASK(3, 0)
-/* DW2 */
-# define SO_NUM_ENTRIES_3_SHIFT 24
-# define SO_NUM_ENTRIES_3_MASK INTEL_MASK(31, 24)
-# define SO_NUM_ENTRIES_2_SHIFT 16
-# define SO_NUM_ENTRIES_2_MASK INTEL_MASK(23, 16)
-# define SO_NUM_ENTRIES_1_SHIFT 8
-# define SO_NUM_ENTRIES_1_MASK INTEL_MASK(15, 8)
-# define SO_NUM_ENTRIES_0_SHIFT 0
-# define SO_NUM_ENTRIES_0_MASK INTEL_MASK(7, 0)
-
-/* SO_DECL DW0 */
-# define SO_DECL_OUTPUT_BUFFER_SLOT_SHIFT 12
-# define SO_DECL_OUTPUT_BUFFER_SLOT_MASK INTEL_MASK(13, 12)
-# define SO_DECL_HOLE_FLAG (1 << 11)
-# define SO_DECL_REGISTER_INDEX_SHIFT 4
-# define SO_DECL_REGISTER_INDEX_MASK INTEL_MASK(9, 4)
-# define SO_DECL_COMPONENT_MASK_SHIFT 0
-# define SO_DECL_COMPONENT_MASK_MASK INTEL_MASK(3, 0)
-
-#define _3DSTATE_SO_BUFFER 0x7918 /* GFX7+ */
-/* DW1 */
-# define GFX8_SO_BUFFER_ENABLE (1 << 31)
-# define SO_BUFFER_INDEX_SHIFT 29
-# define SO_BUFFER_INDEX_MASK INTEL_MASK(30, 29)
-# define GFX8_SO_BUFFER_OFFSET_WRITE_ENABLE (1 << 21)
-# define GFX8_SO_BUFFER_OFFSET_ADDRESS_ENABLE (1 << 20)
-# define SO_BUFFER_PITCH_SHIFT 0
-# define SO_BUFFER_PITCH_MASK INTEL_MASK(11, 0)
-/* DW2: start address */
-/* DW3: end address. */
-
-#define _3DSTATE_3D_MODE 0x791e
-# define SLICE_HASHING_TABLE_ENABLE (1 << 6)
-# define SLICE_HASHING_TABLE_ENABLE_MASK REG_MASK(1 << 6)
-
-#define _3DSTATE_SLICE_TABLE_STATE_POINTERS 0x7920
-
-#define CMD_MI_FLUSH 0x0200
-
-# define BLT_X_SHIFT 0
-# define BLT_X_MASK INTEL_MASK(15, 0)
-# define BLT_Y_SHIFT 16
-# define BLT_Y_MASK INTEL_MASK(31, 16)
-
-#define GFX5_MI_REPORT_PERF_COUNT ((0x26 << 23) | (3 - 2))
-/* DW0 */
-# define GFX5_MI_COUNTER_SET_0 (0 << 6)
-# define GFX5_MI_COUNTER_SET_1 (1 << 6)
-/* DW1 */
-# define MI_COUNTER_ADDRESS_GTT (1 << 0)
-/* DW2: a user-defined report ID (written to the buffer but can be anything) */
-
-#define GFX6_MI_REPORT_PERF_COUNT ((0x28 << 23) | (3 - 2))
-
-#define GFX8_MI_REPORT_PERF_COUNT ((0x28 << 23) | (4 - 2))
-
-/* Maximum number of entries that can be addressed using a binding table
- * pointer of type SURFTYPE_BUFFER
- */
-#define BRW_MAX_NUM_BUFFER_ENTRIES (1 << 27)
-
-#define MEDIA_VFE_STATE 0x7000
-/* GFX7 DW2, GFX8+ DW3 */
-# define MEDIA_VFE_STATE_MAX_THREADS_SHIFT 16
-# define MEDIA_VFE_STATE_MAX_THREADS_MASK INTEL_MASK(31, 16)
-# define MEDIA_VFE_STATE_URB_ENTRIES_SHIFT 8
-# define MEDIA_VFE_STATE_URB_ENTRIES_MASK INTEL_MASK(15, 8)
-# define MEDIA_VFE_STATE_RESET_GTW_TIMER_SHIFT 7
-# define MEDIA_VFE_STATE_RESET_GTW_TIMER_MASK INTEL_MASK(7, 7)
-# define MEDIA_VFE_STATE_BYPASS_GTW_SHIFT 6
-# define MEDIA_VFE_STATE_BYPASS_GTW_MASK INTEL_MASK(6, 6)
-# define GFX7_MEDIA_VFE_STATE_GPGPU_MODE_SHIFT 2
-# define GFX7_MEDIA_VFE_STATE_GPGPU_MODE_MASK INTEL_MASK(2, 2)
-/* GFX7 DW4, GFX8+ DW5 */
-# define MEDIA_VFE_STATE_URB_ALLOC_SHIFT 16
-# define MEDIA_VFE_STATE_URB_ALLOC_MASK INTEL_MASK(31, 16)
-# define MEDIA_VFE_STATE_CURBE_ALLOC_SHIFT 0
-# define MEDIA_VFE_STATE_CURBE_ALLOC_MASK INTEL_MASK(15, 0)
-
-#define MEDIA_CURBE_LOAD 0x7001
-#define MEDIA_INTERFACE_DESCRIPTOR_LOAD 0x7002
-/* GFX7 DW4, GFX8+ DW5 */
-# define MEDIA_CURBE_READ_LENGTH_SHIFT 16
-# define MEDIA_CURBE_READ_LENGTH_MASK INTEL_MASK(31, 16)
-# define MEDIA_CURBE_READ_OFFSET_SHIFT 0
-# define MEDIA_CURBE_READ_OFFSET_MASK INTEL_MASK(15, 0)
-/* GFX7 DW5, GFX8+ DW6 */
-# define MEDIA_BARRIER_ENABLE_SHIFT 21
-# define MEDIA_BARRIER_ENABLE_MASK INTEL_MASK(21, 21)
-# define MEDIA_SHARED_LOCAL_MEMORY_SIZE_SHIFT 16
-# define MEDIA_SHARED_LOCAL_MEMORY_SIZE_MASK INTEL_MASK(20, 16)
-# define MEDIA_GPGPU_THREAD_COUNT_SHIFT 0
-# define MEDIA_GPGPU_THREAD_COUNT_MASK INTEL_MASK(7, 0)
-# define GFX8_MEDIA_GPGPU_THREAD_COUNT_SHIFT 0
-# define GFX8_MEDIA_GPGPU_THREAD_COUNT_MASK INTEL_MASK(9, 0)
-/* GFX7 DW6, GFX8+ DW7 */
-# define CROSS_THREAD_READ_LENGTH_SHIFT 0
-# define CROSS_THREAD_READ_LENGTH_MASK INTEL_MASK(7, 0)
-#define MEDIA_STATE_FLUSH 0x7004
-#define GPGPU_WALKER 0x7105
-/* GFX7 DW0 */
-# define GFX7_GPGPU_INDIRECT_PARAMETER_ENABLE (1 << 10)
-# define GFX7_GPGPU_PREDICATE_ENABLE (1 << 8)
-/* GFX8+ DW2 */
-# define GPGPU_WALKER_INDIRECT_LENGTH_SHIFT 0
-# define GPGPU_WALKER_INDIRECT_LENGTH_MASK INTEL_MASK(15, 0)
-/* GFX7 DW2, GFX8+ DW4 */
-# define GPGPU_WALKER_SIMD_SIZE_SHIFT 30
-# define GPGPU_WALKER_SIMD_SIZE_MASK INTEL_MASK(31, 30)
-# define GPGPU_WALKER_THREAD_DEPTH_MAX_SHIFT 16
-# define GPGPU_WALKER_THREAD_DEPTH_MAX_MASK INTEL_MASK(21, 16)
-# define GPGPU_WALKER_THREAD_HEIGHT_MAX_SHIFT 8
-# define GPGPU_WALKER_THREAD_HEIGHT_MAX_MASK INTEL_MASK(31, 8)
-# define GPGPU_WALKER_THREAD_WIDTH_MAX_SHIFT 0
-# define GPGPU_WALKER_THREAD_WIDTH_MAX_MASK INTEL_MASK(5, 0)
-
-#define CMD_MI (0x0 << 29)
-#define CMD_2D (0x2 << 29)
-#define CMD_3D (0x3 << 29)
-
-#define MI_NOOP (CMD_MI | 0)
-
-#define MI_BATCH_BUFFER_END (CMD_MI | 0xA << 23)
-
-#define MI_FLUSH (CMD_MI | (4 << 23))
-#define FLUSH_MAP_CACHE (1 << 0)
-#define INHIBIT_FLUSH_RENDER_CACHE (1 << 2)
-
-#define MI_STORE_DATA_IMM (CMD_MI | (0x20 << 23))
-#define MI_LOAD_REGISTER_IMM (CMD_MI | (0x22 << 23))
-#define MI_LOAD_REGISTER_REG (CMD_MI | (0x2A << 23))
-
-#define MI_FLUSH_DW (CMD_MI | (0x26 << 23))
-
-#define MI_STORE_REGISTER_MEM (CMD_MI | (0x24 << 23))
-# define MI_STORE_REGISTER_MEM_USE_GGTT (1 << 22)
-# define MI_STORE_REGISTER_MEM_PREDICATE (1 << 21)
-
-/* Load a value from memory into a register. Only available on Gfx7+. */
-#define GFX7_MI_LOAD_REGISTER_MEM (CMD_MI | (0x29 << 23))
-# define MI_LOAD_REGISTER_MEM_USE_GGTT (1 << 22)
-
-/* Manipulate the predicate bit based on some register values. Only on Gfx7+ */
-#define GFX7_MI_PREDICATE (CMD_MI | (0xC << 23))
-# define MI_PREDICATE_LOADOP_KEEP (0 << 6)
-# define MI_PREDICATE_LOADOP_LOAD (2 << 6)
-# define MI_PREDICATE_LOADOP_LOADINV (3 << 6)
-# define MI_PREDICATE_COMBINEOP_SET (0 << 3)
-# define MI_PREDICATE_COMBINEOP_AND (1 << 3)
-# define MI_PREDICATE_COMBINEOP_OR (2 << 3)
-# define MI_PREDICATE_COMBINEOP_XOR (3 << 3)
-# define MI_PREDICATE_COMPAREOP_TRUE (0 << 0)
-# define MI_PREDICATE_COMPAREOP_FALSE (1 << 0)
-# define MI_PREDICATE_COMPAREOP_SRCS_EQUAL (2 << 0)
-# define MI_PREDICATE_COMPAREOP_DELTAS_EQUAL (3 << 0)
-
-#define HSW_MI_MATH (CMD_MI | (0x1a << 23))
-
-#define MI_MATH_ALU2(opcode, operand1, operand2) \
- ( ((MI_MATH_OPCODE_##opcode) << 20) | ((MI_MATH_OPERAND_##operand1) << 10) | \
- ((MI_MATH_OPERAND_##operand2) << 0) )
-
-#define MI_MATH_ALU1(opcode, operand1) \
- ( ((MI_MATH_OPCODE_##opcode) << 20) | ((MI_MATH_OPERAND_##operand1) << 10) )
-
-#define MI_MATH_ALU0(opcode) \
- ( ((MI_MATH_OPCODE_##opcode) << 20) )
-
-#define MI_MATH_OPCODE_NOOP 0x000
-#define MI_MATH_OPCODE_LOAD 0x080
-#define MI_MATH_OPCODE_LOADINV 0x480
-#define MI_MATH_OPCODE_LOAD0 0x081
-#define MI_MATH_OPCODE_LOAD1 0x481
-#define MI_MATH_OPCODE_ADD 0x100
-#define MI_MATH_OPCODE_SUB 0x101
-#define MI_MATH_OPCODE_AND 0x102
-#define MI_MATH_OPCODE_OR 0x103
-#define MI_MATH_OPCODE_XOR 0x104
-#define MI_MATH_OPCODE_STORE 0x180
-#define MI_MATH_OPCODE_STOREINV 0x580
-
-#define MI_MATH_OPERAND_R0 0x00
-#define MI_MATH_OPERAND_R1 0x01
-#define MI_MATH_OPERAND_R2 0x02
-#define MI_MATH_OPERAND_R3 0x03
-#define MI_MATH_OPERAND_R4 0x04
-#define MI_MATH_OPERAND_SRCA 0x20
-#define MI_MATH_OPERAND_SRCB 0x21
-#define MI_MATH_OPERAND_ACCU 0x31
-#define MI_MATH_OPERAND_ZF 0x32
-#define MI_MATH_OPERAND_CF 0x33
-
-#define XY_SETUP_BLT_CMD (CMD_2D | (0x01 << 22))
-
-#define XY_COLOR_BLT_CMD (CMD_2D | (0x50 << 22))
-
-#define XY_SRC_COPY_BLT_CMD (CMD_2D | (0x53 << 22))
-
-#define XY_FAST_COPY_BLT_CMD (CMD_2D | (0x42 << 22))
-
-#define XY_TEXT_IMMEDIATE_BLIT_CMD (CMD_2D | (0x31 << 22))
-# define XY_TEXT_BYTE_PACKED (1 << 16)
-
-/* BR00 */
-#define XY_BLT_WRITE_ALPHA (1 << 21)
-#define XY_BLT_WRITE_RGB (1 << 20)
-#define XY_SRC_TILED (1 << 15)
-#define XY_DST_TILED (1 << 11)
-
-/* BR00 */
-#define XY_FAST_SRC_TILED_64K (3 << 20)
-#define XY_FAST_SRC_TILED_Y (2 << 20)
-#define XY_FAST_SRC_TILED_X (1 << 20)
-
-#define XY_FAST_DST_TILED_64K (3 << 13)
-#define XY_FAST_DST_TILED_Y (2 << 13)
-#define XY_FAST_DST_TILED_X (1 << 13)
-
-/* BR13 */
-#define BR13_8 (0x0 << 24)
-#define BR13_565 (0x1 << 24)
-#define BR13_8888 (0x3 << 24)
-#define BR13_16161616 (0x4 << 24)
-#define BR13_32323232 (0x5 << 24)
-
-#define GFX6_SO_PRIM_STORAGE_NEEDED 0x2280
-#define GFX7_SO_PRIM_STORAGE_NEEDED(n) (0x5240 + (n) * 8)
-
-#define GFX6_SO_NUM_PRIMS_WRITTEN 0x2288
-#define GFX7_SO_NUM_PRIMS_WRITTEN(n) (0x5200 + (n) * 8)
-
-#define GFX7_SO_WRITE_OFFSET(n) (0x5280 + (n) * 4)
-
-#define TIMESTAMP 0x2358
-
-#define BCS_SWCTRL 0x22200
-# define BCS_SWCTRL_SRC_Y (1 << 0)
-# define BCS_SWCTRL_DST_Y (1 << 1)
-
-#define OACONTROL 0x2360
-# define OACONTROL_COUNTER_SELECT_SHIFT 2
-# define OACONTROL_ENABLE_COUNTERS (1 << 0)
-
-/* Auto-Draw / Indirect Registers */
-#define GFX7_3DPRIM_END_OFFSET 0x2420
-#define GFX7_3DPRIM_START_VERTEX 0x2430
-#define GFX7_3DPRIM_VERTEX_COUNT 0x2434
-#define GFX7_3DPRIM_INSTANCE_COUNT 0x2438
-#define GFX7_3DPRIM_START_INSTANCE 0x243C
-#define GFX7_3DPRIM_BASE_VERTEX 0x2440
-
-/* Auto-Compute / Indirect Registers */
-#define GFX7_GPGPU_DISPATCHDIMX 0x2500
-#define GFX7_GPGPU_DISPATCHDIMY 0x2504
-#define GFX7_GPGPU_DISPATCHDIMZ 0x2508
-
-#define GFX7_CACHE_MODE_0 0x7000
-#define GFX7_CACHE_MODE_1 0x7004
-# define GFX9_FLOAT_BLEND_OPTIMIZATION_ENABLE (1 << 4)
-# define GFX9_MSC_RAW_HAZARD_AVOIDANCE_BIT (1 << 9)
-# define GFX8_HIZ_NP_PMA_FIX_ENABLE (1 << 11)
-# define GFX8_HIZ_NP_EARLY_Z_FAILS_DISABLE (1 << 13)
-# define GFX9_PARTIAL_RESOLVE_DISABLE_IN_VC (1 << 1)
-# define GFX8_HIZ_PMA_MASK_BITS \
- REG_MASK(GFX8_HIZ_NP_PMA_FIX_ENABLE | GFX8_HIZ_NP_EARLY_Z_FAILS_DISABLE)
-# define GFX11_DISABLE_REPACKING_FOR_COMPRESSION (1 << 15)
-
-#define GFX7_GT_MODE 0x7008
-# define GFX9_SUBSLICE_HASHING_8x8 (0 << 8)
-# define GFX9_SUBSLICE_HASHING_16x4 (1 << 8)
-# define GFX9_SUBSLICE_HASHING_8x4 (2 << 8)
-# define GFX9_SUBSLICE_HASHING_16x16 (3 << 8)
-# define GFX9_SUBSLICE_HASHING_MASK_BITS REG_MASK(3 << 8)
-# define GFX9_SLICE_HASHING_NORMAL (0 << 11)
-# define GFX9_SLICE_HASHING_DISABLED (1 << 11)
-# define GFX9_SLICE_HASHING_32x16 (2 << 11)
-# define GFX9_SLICE_HASHING_32x32 (3 << 11)
-# define GFX9_SLICE_HASHING_MASK_BITS REG_MASK(3 << 11)
-
-/* Predicate registers */
-#define MI_PREDICATE_SRC0 0x2400
-#define MI_PREDICATE_SRC1 0x2408
-#define MI_PREDICATE_DATA 0x2410
-#define MI_PREDICATE_RESULT 0x2418
-#define MI_PREDICATE_RESULT_1 0x241C
-#define MI_PREDICATE_RESULT_2 0x2214
-
-#define HSW_CS_GPR(n) (0x2600 + (n) * 8)
-
-/* L3 cache control registers. */
-#define GFX7_L3SQCREG1 0xb010
-/* L3SQ general and high priority credit initialization. */
-# define IVB_L3SQCREG1_SQGHPCI_DEFAULT 0x00730000
-# define VLV_L3SQCREG1_SQGHPCI_DEFAULT 0x00d30000
-# define HSW_L3SQCREG1_SQGHPCI_DEFAULT 0x00610000
-# define GFX7_L3SQCREG1_CONV_DC_UC (1 << 24)
-# define GFX7_L3SQCREG1_CONV_IS_UC (1 << 25)
-# define GFX7_L3SQCREG1_CONV_C_UC (1 << 26)
-# define GFX7_L3SQCREG1_CONV_T_UC (1 << 27)
-
-#define GFX7_L3CNTLREG2 0xb020
-# define GFX7_L3CNTLREG2_SLM_ENABLE (1 << 0)
-# define GFX7_L3CNTLREG2_URB_ALLOC_SHIFT 1
-# define GFX7_L3CNTLREG2_URB_ALLOC_MASK INTEL_MASK(6, 1)
-# define GFX7_L3CNTLREG2_URB_LOW_BW (1 << 7)
-# define GFX7_L3CNTLREG2_ALL_ALLOC_SHIFT 8
-# define GFX7_L3CNTLREG2_ALL_ALLOC_MASK INTEL_MASK(13, 8)
-# define GFX7_L3CNTLREG2_RO_ALLOC_SHIFT 14
-# define GFX7_L3CNTLREG2_RO_ALLOC_MASK INTEL_MASK(19, 14)
-# define GFX7_L3CNTLREG2_RO_LOW_BW (1 << 20)
-# define GFX7_L3CNTLREG2_DC_ALLOC_SHIFT 21
-# define GFX7_L3CNTLREG2_DC_ALLOC_MASK INTEL_MASK(26, 21)
-# define GFX7_L3CNTLREG2_DC_LOW_BW (1 << 27)
-
-#define GFX7_L3CNTLREG3 0xb024
-# define GFX7_L3CNTLREG3_IS_ALLOC_SHIFT 1
-# define GFX7_L3CNTLREG3_IS_ALLOC_MASK INTEL_MASK(6, 1)
-# define GFX7_L3CNTLREG3_IS_LOW_BW (1 << 7)
-# define GFX7_L3CNTLREG3_C_ALLOC_SHIFT 8
-# define GFX7_L3CNTLREG3_C_ALLOC_MASK INTEL_MASK(13, 8)
-# define GFX7_L3CNTLREG3_C_LOW_BW (1 << 14)
-# define GFX7_L3CNTLREG3_T_ALLOC_SHIFT 15
-# define GFX7_L3CNTLREG3_T_ALLOC_MASK INTEL_MASK(20, 15)
-# define GFX7_L3CNTLREG3_T_LOW_BW (1 << 21)
-
-#define HSW_SCRATCH1 0xb038
-#define HSW_SCRATCH1_L3_ATOMIC_DISABLE (1 << 27)
-
-#define HSW_ROW_CHICKEN3 0xe49c
-#define HSW_ROW_CHICKEN3_L3_ATOMIC_DISABLE (1 << 6)
-
-#define GFX8_L3CNTLREG 0x7034
-# define GFX8_L3CNTLREG_SLM_ENABLE (1 << 0)
-# define GFX8_L3CNTLREG_URB_ALLOC_SHIFT 1
-# define GFX8_L3CNTLREG_URB_ALLOC_MASK INTEL_MASK(7, 1)
-# define GFX8_L3CNTLREG_RO_ALLOC_SHIFT 11
-# define GFX8_L3CNTLREG_RO_ALLOC_MASK INTEL_MASK(17, 11)
-# define GFX8_L3CNTLREG_DC_ALLOC_SHIFT 18
-# define GFX8_L3CNTLREG_DC_ALLOC_MASK INTEL_MASK(24, 18)
-# define GFX8_L3CNTLREG_ALL_ALLOC_SHIFT 25
-# define GFX8_L3CNTLREG_ALL_ALLOC_MASK INTEL_MASK(31, 25)
-# define GFX8_L3CNTLREG_EDBC_NO_HANG (1 << 9)
-# define GFX11_L3CNTLREG_USE_FULL_WAYS (1 << 10)
-
-#define GFX10_CACHE_MODE_SS 0x0e420
-#define GFX10_FLOAT_BLEND_OPTIMIZATION_ENABLE (1 << 4)
-
-#define INSTPM 0x20c0
-# define INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE (1 << 6)
-
-#define CS_DEBUG_MODE2 0x20d8 /* Gfx9+ */
-# define CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE (1 << 4)
-
-#define SLICE_COMMON_ECO_CHICKEN1 0x731c /* Gfx9+ */
-# define GLK_SCEC_BARRIER_MODE_GPGPU (0 << 7)
-# define GLK_SCEC_BARRIER_MODE_3D_HULL (1 << 7)
-# define GLK_SCEC_BARRIER_MODE_MASK REG_MASK(1 << 7)
-# define GFX11_STATE_CACHE_REDIRECT_TO_CS_SECTION_ENABLE (1 << 11)
-
-#define HALF_SLICE_CHICKEN7 0xE194
-# define TEXEL_OFFSET_FIX_ENABLE (1 << 1)
-# define TEXEL_OFFSET_FIX_MASK REG_MASK(1 << 1)
-
-#define GFX11_SAMPLER_MODE 0xE18C
-# define HEADERLESS_MESSAGE_FOR_PREEMPTABLE_CONTEXTS (1 << 5)
-# define HEADERLESS_MESSAGE_FOR_PREEMPTABLE_CONTEXTS_MASK REG_MASK(1 << 5)
-
-#define CS_CHICKEN1 0x2580 /* Gfx9+ */
-# define GFX9_REPLAY_MODE_MIDBUFFER (0 << 0)
-# define GFX9_REPLAY_MODE_MIDOBJECT (1 << 0)
-# define GFX9_REPLAY_MODE_MASK REG_MASK(1 << 0)
-
-#endif
+++ /dev/null
-/*
- * Copyright © 2014 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "compiler/glsl/ir_uniform.h"
-#include "compiler/glsl/shader_cache.h"
-#include "main/mtypes.h"
-#include "util/blob.h"
-#include "util/build_id.h"
-#include "util/debug.h"
-#include "util/disk_cache.h"
-#include "util/macros.h"
-#include "util/mesa-sha1.h"
-
-#include "compiler/brw_eu.h"
-#include "dev/intel_debug.h"
-
-#include "brw_context.h"
-#include "brw_program.h"
-#include "brw_cs.h"
-#include "brw_gs.h"
-#include "brw_state.h"
-#include "brw_vs.h"
-#include "brw_wm.h"
-
-static bool
-debug_enabled_for_stage(gl_shader_stage stage)
-{
- static const uint64_t stage_debug_flags[] = {
- DEBUG_VS, DEBUG_TCS, DEBUG_TES, DEBUG_GS, DEBUG_WM, DEBUG_CS,
- };
- assert((int)stage >= 0 && stage < ARRAY_SIZE(stage_debug_flags));
- return INTEL_DEBUG(stage_debug_flags[stage]);
-}
-
-static void
-intel_shader_sha1(struct gl_program *prog, gl_shader_stage stage,
- void *key, unsigned char *out_sha1)
-{
- char sha1_buf[41];
- unsigned char sha1[20];
- char manifest[256];
- int offset = 0;
-
- _mesa_sha1_format(sha1_buf, prog->sh.data->sha1);
- offset += snprintf(manifest, sizeof(manifest), "program: %s\n", sha1_buf);
-
- _mesa_sha1_compute(key, brw_prog_key_size(stage), sha1);
- _mesa_sha1_format(sha1_buf, sha1);
- offset += snprintf(manifest + offset, sizeof(manifest) - offset,
- "%s_key: %s\n", _mesa_shader_stage_to_abbrev(stage),
- sha1_buf);
-
- _mesa_sha1_compute(manifest, strlen(manifest), out_sha1);
-}
-
-static bool
-read_blob_program_data(struct blob_reader *binary, struct gl_program *prog,
- gl_shader_stage stage, const uint8_t **program,
- struct brw_stage_prog_data *prog_data)
-{
- return
- brw_read_blob_program_data(binary, prog, stage, program, prog_data) &&
- (binary->current == binary->end);
-}
-
-static bool
-read_and_upload(struct brw_context *brw, struct disk_cache *cache,
- struct gl_program *prog, gl_shader_stage stage)
-{
- unsigned char binary_sha1[20];
-
- union brw_any_prog_key prog_key;
-
- switch (stage) {
- case MESA_SHADER_VERTEX:
- brw_vs_populate_key(brw, &prog_key.vs);
- break;
- case MESA_SHADER_TESS_CTRL:
- brw_tcs_populate_key(brw, &prog_key.tcs);
- break;
- case MESA_SHADER_TESS_EVAL:
- brw_tes_populate_key(brw, &prog_key.tes);
- break;
- case MESA_SHADER_GEOMETRY:
- brw_gs_populate_key(brw, &prog_key.gs);
- break;
- case MESA_SHADER_FRAGMENT:
- brw_wm_populate_key(brw, &prog_key.wm);
- break;
- case MESA_SHADER_COMPUTE:
- brw_cs_populate_key(brw, &prog_key.cs);
- break;
- default:
- unreachable("Unsupported stage!");
- }
-
- /* We don't care what instance of the program it is for the disk cache hash
- * lookup, so set the id to 0 for the sha1 hashing. program_string_id will
- * be set below.
- */
- prog_key.base.program_string_id = 0;
-
- intel_shader_sha1(prog, stage, &prog_key, binary_sha1);
-
- size_t buffer_size;
- uint8_t *buffer = disk_cache_get(cache, binary_sha1, &buffer_size);
- if (buffer == NULL) {
- if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) {
- char sha1_buf[41];
- _mesa_sha1_format(sha1_buf, binary_sha1);
- fprintf(stderr, "No cached %s binary found for: %s\n",
- _mesa_shader_stage_to_abbrev(stage), sha1_buf);
- }
- return false;
- }
-
- if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) {
- char sha1_buf[41];
- _mesa_sha1_format(sha1_buf, binary_sha1);
- fprintf(stderr, "attempting to populate bo cache with binary: %s\n",
- sha1_buf);
- }
-
- struct blob_reader binary;
- blob_reader_init(&binary, buffer, buffer_size);
-
- const uint8_t *program;
- struct brw_stage_prog_data *prog_data =
- ralloc_size(NULL, sizeof(union brw_any_prog_data));
- if (!read_blob_program_data(&binary, prog, stage, &program, prog_data)) {
- /* Something very bad has gone wrong discard the item from the cache and
- * rebuild from source.
- */
- if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) {
- fprintf(stderr, "Error reading program from cache (invalid i965 "
- "cache item)\n");
- }
-
- disk_cache_remove(cache, binary_sha1);
- ralloc_free(prog_data);
- free(buffer);
- return false;
- }
-
- enum brw_cache_id cache_id;
- struct brw_stage_state *stage_state;
-
- switch (stage) {
- case MESA_SHADER_VERTEX:
- cache_id = BRW_CACHE_VS_PROG;
- stage_state = &brw->vs.base;
- break;
- case MESA_SHADER_TESS_CTRL:
- cache_id = BRW_CACHE_TCS_PROG;
- stage_state = &brw->tcs.base;
- break;
- case MESA_SHADER_TESS_EVAL:
- cache_id = BRW_CACHE_TES_PROG;
- stage_state = &brw->tes.base;
- break;
- case MESA_SHADER_GEOMETRY:
- cache_id = BRW_CACHE_GS_PROG;
- stage_state = &brw->gs.base;
- break;
- case MESA_SHADER_FRAGMENT:
- cache_id = BRW_CACHE_FS_PROG;
- stage_state = &brw->wm.base;
- break;
- case MESA_SHADER_COMPUTE:
- cache_id = BRW_CACHE_CS_PROG;
- stage_state = &brw->cs.base;
- break;
- default:
- unreachable("Unsupported stage!");
- }
-
- prog_key.base.program_string_id = brw_program(prog)->id;
-
- brw_alloc_stage_scratch(brw, stage_state, prog_data->total_scratch);
-
- if (unlikely(debug_enabled_for_stage(stage))) {
- fprintf(stderr, "NIR for %s program %d loaded from disk shader cache:\n",
- _mesa_shader_stage_to_abbrev(stage), brw_program(prog)->id);
- brw_program_deserialize_driver_blob(&brw->ctx, prog, stage);
- nir_shader *nir = prog->nir;
- nir_print_shader(nir, stderr);
- fprintf(stderr, "Native code for %s %s shader %s from disk cache:\n",
- nir->info.label ? nir->info.label : "unnamed",
- _mesa_shader_stage_to_string(nir->info.stage), nir->info.name);
- brw_disassemble_with_labels(&brw->screen->devinfo, program, 0,
- prog_data->program_size, stderr);
- }
-
- brw_upload_cache(&brw->cache, cache_id, &prog_key, brw_prog_key_size(stage),
- program, prog_data->program_size, prog_data,
- brw_prog_data_size(stage), &stage_state->prog_offset,
- &stage_state->prog_data);
-
- prog->program_written_to_cache = true;
-
- ralloc_free(prog_data);
- free(buffer);
-
- return true;
-}
-
-bool
-brw_disk_cache_upload_program(struct brw_context *brw, gl_shader_stage stage)
-{
- struct disk_cache *cache = brw->ctx.Cache;
- if (cache == NULL)
- return false;
-
- struct gl_program *prog = brw->ctx._Shader->CurrentProgram[stage];
- if (prog == NULL)
- return false;
-
- if (prog->sh.data->spirv)
- return false;
-
- if (brw->ctx._Shader->Flags & GLSL_CACHE_FALLBACK)
- goto fail;
-
- if (!read_and_upload(brw, cache, prog, stage))
- goto fail;
-
- if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) {
- fprintf(stderr, "read gen program from cache\n");
- }
-
- return true;
-
-fail:
- prog->program_written_to_cache = false;
- if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) {
- fprintf(stderr, "falling back to nir %s.\n",
- _mesa_shader_stage_to_abbrev(prog->info.stage));
- }
-
- brw_program_deserialize_driver_blob(&brw->ctx, prog, stage);
-
- return false;
-}
-
-static void
-write_program_data(struct brw_context *brw, struct gl_program *prog,
- void *key, struct brw_stage_prog_data *prog_data,
- uint32_t prog_offset, struct disk_cache *cache,
- gl_shader_stage stage)
-{
- struct blob binary;
- blob_init(&binary);
-
- const void *program_map = brw->cache.map + prog_offset;
- /* TODO: Improve perf for non-LLC. It would be best to save it at program
- * generation time when the program is in normal memory accessible with
- * cache to the CPU. Another easier change would be to use
- * _mesa_streaming_load_memcpy to read from the program mapped memory. */
- brw_write_blob_program_data(&binary, stage, program_map, prog_data);
-
- unsigned char sha1[20];
- char buf[41];
- intel_shader_sha1(prog, stage, key, sha1);
- _mesa_sha1_format(buf, sha1);
- if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) {
- fprintf(stderr, "putting binary in cache: %s\n", buf);
- }
-
- disk_cache_put(cache, sha1, binary.data, binary.size, NULL);
-
- prog->program_written_to_cache = true;
- blob_finish(&binary);
-}
-
-void
-brw_disk_cache_write_render_programs(struct brw_context *brw)
-{
- struct disk_cache *cache = brw->ctx.Cache;
- if (cache == NULL)
- return;
-
- struct gl_program *prog;
- gl_shader_stage stage;
- for (stage = MESA_SHADER_VERTEX; stage <= MESA_SHADER_FRAGMENT; stage++) {
- prog = brw->ctx._Shader->CurrentProgram[stage];
- if (prog && prog->sh.data->spirv)
- return;
- }
-
- prog = brw->ctx._Shader->CurrentProgram[MESA_SHADER_VERTEX];
- if (prog && !prog->program_written_to_cache) {
- struct brw_vs_prog_key vs_key;
- brw_vs_populate_key(brw, &vs_key);
- vs_key.base.program_string_id = 0;
-
- write_program_data(brw, prog, &vs_key, brw->vs.base.prog_data,
- brw->vs.base.prog_offset, cache,
- MESA_SHADER_VERTEX);
- }
-
- prog = brw->ctx._Shader->CurrentProgram[MESA_SHADER_TESS_CTRL];
- if (prog && !prog->program_written_to_cache) {
- struct brw_tcs_prog_key tcs_key;
- brw_tcs_populate_key(brw, &tcs_key);
- tcs_key.base.program_string_id = 0;
-
- write_program_data(brw, prog, &tcs_key, brw->tcs.base.prog_data,
- brw->tcs.base.prog_offset, cache,
- MESA_SHADER_TESS_CTRL);
- }
-
- prog = brw->ctx._Shader->CurrentProgram[MESA_SHADER_TESS_EVAL];
- if (prog && !prog->program_written_to_cache) {
- struct brw_tes_prog_key tes_key;
- brw_tes_populate_key(brw, &tes_key);
- tes_key.base.program_string_id = 0;
-
- write_program_data(brw, prog, &tes_key, brw->tes.base.prog_data,
- brw->tes.base.prog_offset, cache,
- MESA_SHADER_TESS_EVAL);
- }
-
- prog = brw->ctx._Shader->CurrentProgram[MESA_SHADER_GEOMETRY];
- if (prog && !prog->program_written_to_cache) {
- struct brw_gs_prog_key gs_key;
- brw_gs_populate_key(brw, &gs_key);
- gs_key.base.program_string_id = 0;
-
- write_program_data(brw, prog, &gs_key, brw->gs.base.prog_data,
- brw->gs.base.prog_offset, cache,
- MESA_SHADER_GEOMETRY);
- }
-
- prog = brw->ctx._Shader->CurrentProgram[MESA_SHADER_FRAGMENT];
- if (prog && !prog->program_written_to_cache) {
- struct brw_wm_prog_key wm_key;
- brw_wm_populate_key(brw, &wm_key);
- wm_key.base.program_string_id = 0;
-
- write_program_data(brw, prog, &wm_key, brw->wm.base.prog_data,
- brw->wm.base.prog_offset, cache,
- MESA_SHADER_FRAGMENT);
- }
-}
-
-void
-brw_disk_cache_write_compute_program(struct brw_context *brw)
-{
- struct disk_cache *cache = brw->ctx.Cache;
- if (cache == NULL)
- return;
-
- struct gl_program *prog =
- brw->ctx._Shader->CurrentProgram[MESA_SHADER_COMPUTE];
-
- if (prog && prog->sh.data->spirv)
- return;
-
- if (prog && !prog->program_written_to_cache) {
- struct brw_cs_prog_key cs_key;
- brw_cs_populate_key(brw, &cs_key);
- cs_key.base.program_string_id = 0;
-
- write_program_data(brw, prog, &cs_key, brw->cs.base.prog_data,
- brw->cs.base.prog_offset, cache,
- MESA_SHADER_COMPUTE);
- }
-}
-
-void
-brw_disk_cache_init(struct brw_screen *screen)
-{
-#ifdef ENABLE_SHADER_CACHE
- if (INTEL_DEBUG(DEBUG_DISK_CACHE_DISABLE_MASK))
- return;
-
- /* array length: print length + null char + 1 extra to verify it is unused */
- char renderer[11];
- ASSERTED int len = snprintf(renderer, sizeof(renderer), "i965_%04x",
- screen->deviceID);
- assert(len == sizeof(renderer) - 2);
-
- const struct build_id_note *note =
- build_id_find_nhdr_for_addr(brw_disk_cache_init);
- assert(note && build_id_length(note) == 20 /* sha1 */);
-
- const uint8_t *id_sha1 = build_id_data(note);
- assert(id_sha1);
-
- char timestamp[41];
- _mesa_sha1_format(timestamp, id_sha1);
-
- const uint64_t driver_flags =
- brw_get_compiler_config_value(screen->compiler);
- screen->disk_cache = disk_cache_create(renderer, timestamp, driver_flags);
-#endif
-}
+++ /dev/null
-/*
- * Copyright 2003 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include <sys/errno.h>
-
-#include "main/arrayobj.h"
-#include "main/blend.h"
-#include "main/context.h"
-#include "main/condrender.h"
-#include "main/samplerobj.h"
-#include "main/state.h"
-#include "main/enums.h"
-#include "main/macros.h"
-#include "main/transformfeedback.h"
-#include "main/framebuffer.h"
-#include "main/varray.h"
-#include "tnl/tnl.h"
-#include "vbo/vbo.h"
-#include "swrast/swrast.h"
-#include "swrast_setup/swrast_setup.h"
-#include "drivers/common/meta.h"
-#include "util/bitscan.h"
-#include "util/bitset.h"
-
-#include "brw_blorp.h"
-#include "brw_draw.h"
-#include "brw_defines.h"
-#include "compiler/brw_eu_defines.h"
-#include "brw_context.h"
-#include "brw_state.h"
-
-#include "brw_batch.h"
-#include "brw_buffers.h"
-#include "brw_fbo.h"
-#include "brw_mipmap_tree.h"
-#include "brw_buffer_objects.h"
-
-#define FILE_DEBUG_FLAG DEBUG_PRIMS
-
-
-static const GLenum reduced_prim[GL_POLYGON+1] = {
- [GL_POINTS] = GL_POINTS,
- [GL_LINES] = GL_LINES,
- [GL_LINE_LOOP] = GL_LINES,
- [GL_LINE_STRIP] = GL_LINES,
- [GL_TRIANGLES] = GL_TRIANGLES,
- [GL_TRIANGLE_STRIP] = GL_TRIANGLES,
- [GL_TRIANGLE_FAN] = GL_TRIANGLES,
- [GL_QUADS] = GL_TRIANGLES,
- [GL_QUAD_STRIP] = GL_TRIANGLES,
- [GL_POLYGON] = GL_TRIANGLES
-};
-
-/* When the primitive changes, set a state bit and re-validate. Not
- * the nicest and would rather deal with this by having all the
- * programs be immune to the active primitive (ie. cope with all
- * possibilities). That may not be realistic however.
- */
-static void
-brw_set_prim(struct brw_context *brw, const struct _mesa_prim *prim)
-{
- struct gl_context *ctx = &brw->ctx;
- uint32_t hw_prim = get_hw_prim_for_gl_prim(prim->mode);
-
- DBG("PRIM: %s\n", _mesa_enum_to_string(prim->mode));
-
- /* Slight optimization to avoid the GS program when not needed:
- */
- if (prim->mode == GL_QUAD_STRIP &&
- ctx->Light.ShadeModel != GL_FLAT &&
- ctx->Polygon.FrontMode == GL_FILL &&
- ctx->Polygon.BackMode == GL_FILL)
- hw_prim = _3DPRIM_TRISTRIP;
-
- if (prim->mode == GL_QUADS && prim->count == 4 &&
- ctx->Light.ShadeModel != GL_FLAT &&
- ctx->Polygon.FrontMode == GL_FILL &&
- ctx->Polygon.BackMode == GL_FILL) {
- hw_prim = _3DPRIM_TRIFAN;
- }
-
- if (hw_prim != brw->primitive) {
- brw->primitive = hw_prim;
- brw->ctx.NewDriverState |= BRW_NEW_PRIMITIVE;
-
- if (reduced_prim[prim->mode] != brw->reduced_primitive) {
- brw->reduced_primitive = reduced_prim[prim->mode];
- brw->ctx.NewDriverState |= BRW_NEW_REDUCED_PRIMITIVE;
- }
- }
-}
-
-static void
-gfx6_set_prim(struct brw_context *brw, const struct _mesa_prim *prim)
-{
- const struct gl_context *ctx = &brw->ctx;
- uint32_t hw_prim;
-
- DBG("PRIM: %s\n", _mesa_enum_to_string(prim->mode));
-
- if (prim->mode == GL_PATCHES) {
- hw_prim = _3DPRIM_PATCHLIST(ctx->TessCtrlProgram.patch_vertices);
- } else {
- hw_prim = get_hw_prim_for_gl_prim(prim->mode);
- }
-
- if (hw_prim != brw->primitive) {
- brw->primitive = hw_prim;
- brw->ctx.NewDriverState |= BRW_NEW_PRIMITIVE;
- if (prim->mode == GL_PATCHES)
- brw->ctx.NewDriverState |= BRW_NEW_PATCH_PRIMITIVE;
- }
-}
-
-
-/**
- * The hardware is capable of removing dangling vertices on its own; however,
- * prior to Gfx6, we sometimes convert quads into trifans (and quad strips
- * into tristrips), since pre-Gfx6 hardware requires a GS to render quads.
- * This function manually trims dangling vertices from a draw call involving
- * quads so that those dangling vertices won't get drawn when we convert to
- * trifans/tristrips.
- */
-static GLuint
-trim(GLenum prim, GLuint length)
-{
- if (prim == GL_QUAD_STRIP)
- return length > 3 ? (length - length % 2) : 0;
- else if (prim == GL_QUADS)
- return length - length % 4;
- else
- return length;
-}
-
-
-static void
-brw_emit_prim(struct brw_context *brw,
- const struct _mesa_prim *prim,
- uint32_t hw_prim,
- bool is_indexed,
- GLuint num_instances, GLuint base_instance,
- struct brw_transform_feedback_object *xfb_obj,
- unsigned stream,
- bool is_indirect,
- GLsizeiptr indirect_offset)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- int verts_per_instance;
- int vertex_access_type;
- int indirect_flag;
-
- DBG("PRIM: %s %d %d\n", _mesa_enum_to_string(prim->mode),
- prim->start, prim->count);
-
- int start_vertex_location = prim->start;
- int base_vertex_location = prim->basevertex;
-
- if (is_indexed) {
- vertex_access_type = devinfo->ver >= 7 ?
- GFX7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM :
- GFX4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM;
- start_vertex_location += brw->ib.start_vertex_offset;
- base_vertex_location += brw->vb.start_vertex_bias;
- } else {
- vertex_access_type = devinfo->ver >= 7 ?
- GFX7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL :
- GFX4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL;
- start_vertex_location += brw->vb.start_vertex_bias;
- }
-
- /* We only need to trim the primitive count on pre-Gfx6. */
- if (devinfo->ver < 6)
- verts_per_instance = trim(prim->mode, prim->count);
- else
- verts_per_instance = prim->count;
-
- /* If nothing to emit, just return. */
- if (verts_per_instance == 0 && !is_indirect && !xfb_obj)
- return;
-
- /* If we're set to always flush, do it before and after the primitive emit.
- * We want to catch both missed flushes that hurt instruction/state cache
- * and missed flushes of the render cache as it heads to other parts of
- * the besides the draw code.
- */
- if (brw->always_flush_cache)
- brw_emit_mi_flush(brw);
-
- /* If indirect, emit a bunch of loads from the indirect BO. */
- if (xfb_obj) {
- indirect_flag = GFX7_3DPRIM_INDIRECT_PARAMETER_ENABLE;
-
- brw_load_register_mem(brw, GFX7_3DPRIM_VERTEX_COUNT,
- xfb_obj->prim_count_bo,
- stream * sizeof(uint32_t));
- BEGIN_BATCH(9);
- OUT_BATCH(MI_LOAD_REGISTER_IMM | (9 - 2));
- OUT_BATCH(GFX7_3DPRIM_INSTANCE_COUNT);
- OUT_BATCH(num_instances);
- OUT_BATCH(GFX7_3DPRIM_START_VERTEX);
- OUT_BATCH(0);
- OUT_BATCH(GFX7_3DPRIM_BASE_VERTEX);
- OUT_BATCH(0);
- OUT_BATCH(GFX7_3DPRIM_START_INSTANCE);
- OUT_BATCH(0);
- ADVANCE_BATCH();
- } else if (is_indirect) {
- struct gl_buffer_object *indirect_buffer = brw->ctx.DrawIndirectBuffer;
- struct brw_bo *bo = brw_bufferobj_buffer(brw,
- brw_buffer_object(indirect_buffer),
- indirect_offset, 5 * sizeof(GLuint), false);
-
- indirect_flag = GFX7_3DPRIM_INDIRECT_PARAMETER_ENABLE;
-
- brw_load_register_mem(brw, GFX7_3DPRIM_VERTEX_COUNT, bo,
- indirect_offset + 0);
- brw_load_register_mem(brw, GFX7_3DPRIM_INSTANCE_COUNT, bo,
- indirect_offset + 4);
-
- brw_load_register_mem(brw, GFX7_3DPRIM_START_VERTEX, bo,
- indirect_offset + 8);
- if (is_indexed) {
- brw_load_register_mem(brw, GFX7_3DPRIM_BASE_VERTEX, bo,
- indirect_offset + 12);
- brw_load_register_mem(brw, GFX7_3DPRIM_START_INSTANCE, bo,
- indirect_offset + 16);
- } else {
- brw_load_register_mem(brw, GFX7_3DPRIM_START_INSTANCE, bo,
- indirect_offset + 12);
- brw_load_register_imm32(brw, GFX7_3DPRIM_BASE_VERTEX, 0);
- }
- } else {
- indirect_flag = 0;
- }
-
- BEGIN_BATCH(devinfo->ver >= 7 ? 7 : 6);
-
- if (devinfo->ver >= 7) {
- const int predicate_enable =
- (brw->predicate.state == BRW_PREDICATE_STATE_USE_BIT)
- ? GFX7_3DPRIM_PREDICATE_ENABLE : 0;
-
- OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2) | indirect_flag | predicate_enable);
- OUT_BATCH(hw_prim | vertex_access_type);
- } else {
- OUT_BATCH(CMD_3D_PRIM << 16 | (6 - 2) |
- hw_prim << GFX4_3DPRIM_TOPOLOGY_TYPE_SHIFT |
- vertex_access_type);
- }
- OUT_BATCH(verts_per_instance);
- OUT_BATCH(start_vertex_location);
- OUT_BATCH(num_instances);
- OUT_BATCH(base_instance);
- OUT_BATCH(base_vertex_location);
- ADVANCE_BATCH();
-
- if (brw->always_flush_cache)
- brw_emit_mi_flush(brw);
-}
-
-
-static void
-brw_clear_buffers(struct brw_context *brw)
-{
- for (unsigned i = 0; i < brw->vb.nr_buffers; ++i) {
- brw_bo_unreference(brw->vb.buffers[i].bo);
- brw->vb.buffers[i].bo = NULL;
- }
- brw->vb.nr_buffers = 0;
-
- for (unsigned i = 0; i < brw->vb.nr_enabled; ++i) {
- brw->vb.enabled[i]->buffer = -1;
- }
-#ifndef NDEBUG
- for (unsigned i = 0; i < VERT_ATTRIB_MAX; i++) {
- assert(brw->vb.inputs[i].buffer == -1);
- }
-#endif
-}
-
-
-static uint8_t get_wa_flags(const struct gl_vertex_format *glformat)
-{
- uint8_t wa_flags = 0;
-
- switch (glformat->Type) {
- case GL_FIXED:
- wa_flags = glformat->Size;
- break;
-
- case GL_INT_2_10_10_10_REV:
- wa_flags |= BRW_ATTRIB_WA_SIGN;
- FALLTHROUGH;
-
- case GL_UNSIGNED_INT_2_10_10_10_REV:
- if (glformat->Format == GL_BGRA)
- wa_flags |= BRW_ATTRIB_WA_BGRA;
-
- if (glformat->Normalized)
- wa_flags |= BRW_ATTRIB_WA_NORMALIZE;
- else if (!glformat->Integer)
- wa_flags |= BRW_ATTRIB_WA_SCALE;
-
- break;
- }
-
- return wa_flags;
-}
-
-
-static void
-brw_merge_inputs(struct brw_context *brw)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- const struct gl_context *ctx = &brw->ctx;
-
- if (devinfo->verx10 <= 70) {
- /* Prior to Haswell, the hardware can't natively support GL_FIXED or
- * 2_10_10_10_REV vertex formats. Set appropriate workaround flags.
- */
- const struct gl_vertex_array_object *vao = ctx->Array._DrawVAO;
- const uint64_t vs_inputs = ctx->VertexProgram._Current->info.inputs_read;
- assert((vs_inputs & ~((uint64_t)VERT_BIT_ALL)) == 0);
-
- unsigned vaomask = vs_inputs & _mesa_draw_array_bits(ctx);
- while (vaomask) {
- const gl_vert_attrib i = u_bit_scan(&vaomask);
- const uint8_t wa_flags =
- get_wa_flags(_mesa_draw_array_format(vao, i));
-
- if (brw->vb.attrib_wa_flags[i] != wa_flags) {
- brw->vb.attrib_wa_flags[i] = wa_flags;
- brw->ctx.NewDriverState |= BRW_NEW_VS_ATTRIB_WORKAROUNDS;
- }
- }
-
- unsigned currmask = vs_inputs & _mesa_draw_current_bits(ctx);
- while (currmask) {
- const gl_vert_attrib i = u_bit_scan(&currmask);
- const uint8_t wa_flags =
- get_wa_flags(_mesa_draw_current_format(ctx, i));
-
- if (brw->vb.attrib_wa_flags[i] != wa_flags) {
- brw->vb.attrib_wa_flags[i] = wa_flags;
- brw->ctx.NewDriverState |= BRW_NEW_VS_ATTRIB_WORKAROUNDS;
- }
- }
- }
-}
-
-/* Disable auxiliary buffers if a renderbuffer is also bound as a texture
- * or shader image. This causes a self-dependency, where both rendering
- * and sampling may concurrently read or write the CCS buffer, causing
- * incorrect pixels.
- */
-static bool
-brw_disable_rb_aux_buffer(struct brw_context *brw,
- bool *draw_aux_buffer_disabled,
- struct brw_mipmap_tree *tex_mt,
- unsigned min_level, unsigned num_levels,
- const char *usage)
-{
- const struct gl_framebuffer *fb = brw->ctx.DrawBuffer;
- bool found = false;
-
- /* We only need to worry about color compression and fast clears. */
- if (tex_mt->aux_usage != ISL_AUX_USAGE_CCS_D &&
- tex_mt->aux_usage != ISL_AUX_USAGE_CCS_E)
- return false;
-
- for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
- const struct brw_renderbuffer *irb =
- brw_renderbuffer(fb->_ColorDrawBuffers[i]);
-
- if (irb && irb->mt->bo == tex_mt->bo &&
- irb->mt_level >= min_level &&
- irb->mt_level < min_level + num_levels) {
- found = draw_aux_buffer_disabled[i] = true;
- }
- }
-
- if (found) {
- perf_debug("Disabling CCS because a renderbuffer is also bound %s.\n",
- usage);
- }
-
- return found;
-}
-
-/** Implement the ASTC 5x5 sampler workaround
- *
- * Gfx9 sampling hardware has a bug where an ASTC 5x5 compressed surface
- * cannot live in the sampler cache at the same time as an aux compressed
- * surface. In order to work around the bug we have to stall rendering with a
- * CS and pixel scoreboard stall (implicit in the CS stall) and invalidate the
- * texture cache whenever one of ASTC 5x5 or aux compressed may be in the
- * sampler cache and we're about to render with something which samples from
- * the other.
- *
- * In the case of a single shader which textures from both ASTC 5x5 and
- * a texture which is CCS or HiZ compressed, we have to resolve the aux
- * compressed texture prior to rendering. This second part is handled in
- * brw_predraw_resolve_inputs() below.
- *
- * We have observed this issue to affect CCS and HiZ sampling but whether or
- * not it also affects MCS is unknown. Because MCS has no concept of a
- * resolve (and doing one would be stupid expensive), we choose to simply
- * ignore the possibility and hope for the best.
- */
-static void
-gfx9_apply_astc5x5_wa_flush(struct brw_context *brw,
- enum gfx9_astc5x5_wa_tex_type curr_mask)
-{
- assert(brw->screen->devinfo.ver == 9);
-
- if (((brw->gfx9_astc5x5_wa_tex_mask & GFX9_ASTC5X5_WA_TEX_TYPE_ASTC5x5) &&
- (curr_mask & GFX9_ASTC5X5_WA_TEX_TYPE_AUX)) ||
- ((brw->gfx9_astc5x5_wa_tex_mask & GFX9_ASTC5X5_WA_TEX_TYPE_AUX) &&
- (curr_mask & GFX9_ASTC5X5_WA_TEX_TYPE_ASTC5x5))) {
- brw_emit_pipe_control_flush(brw, PIPE_CONTROL_CS_STALL);
- brw_emit_pipe_control_flush(brw, PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
- }
-
- brw->gfx9_astc5x5_wa_tex_mask = curr_mask;
-}
-
-static enum gfx9_astc5x5_wa_tex_type
-gfx9_astc5x5_wa_bits(mesa_format format, enum isl_aux_usage aux_usage)
-{
- if (aux_usage != ISL_AUX_USAGE_NONE &&
- aux_usage != ISL_AUX_USAGE_MCS)
- return GFX9_ASTC5X5_WA_TEX_TYPE_AUX;
-
- if (format == MESA_FORMAT_RGBA_ASTC_5x5 ||
- format == MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5)
- return GFX9_ASTC5X5_WA_TEX_TYPE_ASTC5x5;
-
- return 0;
-}
-
-/* Helper for the gfx9 ASTC 5x5 workaround. This version exists for BLORP's
- * use-cases where only a single texture is bound.
- */
-void
-gfx9_apply_single_tex_astc5x5_wa(struct brw_context *brw,
- mesa_format format,
- enum isl_aux_usage aux_usage)
-{
- gfx9_apply_astc5x5_wa_flush(brw, gfx9_astc5x5_wa_bits(format, aux_usage));
-}
-
-static void
-mark_textures_used_for_txf(BITSET_WORD *used_for_txf,
- const struct gl_program *prog)
-{
- if (!prog)
- return;
-
- unsigned s;
- BITSET_FOREACH_SET(s, prog->info.textures_used_by_txf, 32)
- BITSET_SET(used_for_txf, prog->SamplerUnits[s]);
-}
-
-/**
- * \brief Resolve buffers before drawing.
- *
- * Resolve the depth buffer's HiZ buffer, resolve the depth buffer of each
- * enabled depth texture, and flush the render cache for any dirty textures.
- */
-void
-brw_predraw_resolve_inputs(struct brw_context *brw, bool rendering,
- bool *draw_aux_buffer_disabled)
-{
- struct gl_context *ctx = &brw->ctx;
- struct brw_texture_object *tex_obj;
-
- BITSET_DECLARE(used_for_txf, MAX_COMBINED_TEXTURE_IMAGE_UNITS);
- memset(used_for_txf, 0, sizeof(used_for_txf));
- if (rendering) {
- mark_textures_used_for_txf(used_for_txf, ctx->VertexProgram._Current);
- mark_textures_used_for_txf(used_for_txf, ctx->TessCtrlProgram._Current);
- mark_textures_used_for_txf(used_for_txf, ctx->TessEvalProgram._Current);
- mark_textures_used_for_txf(used_for_txf, ctx->GeometryProgram._Current);
- mark_textures_used_for_txf(used_for_txf, ctx->FragmentProgram._Current);
- } else {
- mark_textures_used_for_txf(used_for_txf, ctx->ComputeProgram._Current);
- }
-
- int maxEnabledUnit = ctx->Texture._MaxEnabledTexImageUnit;
-
- enum gfx9_astc5x5_wa_tex_type astc5x5_wa_bits = 0;
- if (brw->screen->devinfo.ver == 9) {
- /* In order to properly implement the ASTC 5x5 workaround for an
- * arbitrary draw or dispatch call, we have to walk the entire list of
- * textures looking for ASTC 5x5. If there is any ASTC 5x5 in this draw
- * call, all aux compressed textures must be resolved and have aux
- * compression disabled while sampling.
- */
- for (int i = 0; i <= maxEnabledUnit; i++) {
- if (!ctx->Texture.Unit[i]._Current)
- continue;
- tex_obj = brw_texture_object(ctx->Texture.Unit[i]._Current);
- if (!tex_obj || !tex_obj->mt)
- continue;
-
- astc5x5_wa_bits |= gfx9_astc5x5_wa_bits(tex_obj->_Format,
- tex_obj->mt->aux_usage);
- }
- gfx9_apply_astc5x5_wa_flush(brw, astc5x5_wa_bits);
- }
-
- /* Resolve depth buffer and render cache of each enabled texture. */
- for (int i = 0; i <= maxEnabledUnit; i++) {
- if (!ctx->Texture.Unit[i]._Current)
- continue;
- tex_obj = brw_texture_object(ctx->Texture.Unit[i]._Current);
- if (!tex_obj || !tex_obj->mt)
- continue;
-
- struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, i);
- enum isl_format view_format =
- translate_tex_format(brw, tex_obj->_Format, sampler->Attrib.sRGBDecode);
-
- unsigned min_level, min_layer, num_levels, num_layers;
- if (tex_obj->base.Immutable) {
- min_level = tex_obj->base.Attrib.MinLevel;
- num_levels = MIN2(tex_obj->base.Attrib.NumLevels, tex_obj->_MaxLevel + 1);
- min_layer = tex_obj->base.Attrib.MinLayer;
- num_layers = tex_obj->base.Target != GL_TEXTURE_3D ?
- tex_obj->base.Attrib.NumLayers : INTEL_REMAINING_LAYERS;
- } else {
- min_level = tex_obj->base.Attrib.BaseLevel;
- num_levels = tex_obj->_MaxLevel - tex_obj->base.Attrib.BaseLevel + 1;
- min_layer = 0;
- num_layers = INTEL_REMAINING_LAYERS;
- }
-
- if (rendering) {
- brw_disable_rb_aux_buffer(brw, draw_aux_buffer_disabled,
- tex_obj->mt, min_level, num_levels,
- "for sampling");
- }
-
- brw_miptree_prepare_texture(brw, tex_obj->mt, view_format,
- min_level, num_levels,
- min_layer, num_layers,
- astc5x5_wa_bits);
-
- /* If any programs are using it with texelFetch, we may need to also do
- * a prepare with an sRGB format to ensure texelFetch works "properly".
- */
- if (BITSET_TEST(used_for_txf, i)) {
- enum isl_format txf_format =
- translate_tex_format(brw, tex_obj->_Format, GL_DECODE_EXT);
- if (txf_format != view_format) {
- brw_miptree_prepare_texture(brw, tex_obj->mt, txf_format,
- min_level, num_levels,
- min_layer, num_layers,
- astc5x5_wa_bits);
- }
- }
-
- brw_cache_flush_for_read(brw, tex_obj->mt->bo);
-
- if (tex_obj->base.StencilSampling ||
- tex_obj->mt->format == MESA_FORMAT_S_UINT8) {
- brw_update_r8stencil(brw, tex_obj->mt);
- }
-
- if (brw_miptree_has_etc_shadow(brw, tex_obj->mt) &&
- tex_obj->mt->shadow_needs_update) {
- brw_miptree_update_etc_shadow_levels(brw, tex_obj->mt);
- }
- }
-
- /* Resolve color for each active shader image. */
- for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
- const struct gl_program *prog = ctx->_Shader->CurrentProgram[i];
-
- if (unlikely(prog && prog->info.num_images)) {
- for (unsigned j = 0; j < prog->info.num_images; j++) {
- struct gl_image_unit *u =
- &ctx->ImageUnits[prog->sh.ImageUnits[j]];
- tex_obj = brw_texture_object(u->TexObj);
-
- if (tex_obj && tex_obj->mt) {
- if (rendering) {
- brw_disable_rb_aux_buffer(brw, draw_aux_buffer_disabled,
- tex_obj->mt, 0, ~0,
- "as a shader image");
- }
-
- brw_miptree_prepare_image(brw, tex_obj->mt);
-
- brw_cache_flush_for_read(brw, tex_obj->mt->bo);
- }
- }
- }
- }
-}
-
-static void
-brw_predraw_resolve_framebuffer(struct brw_context *brw,
- bool *draw_aux_buffer_disabled)
-{
- struct gl_context *ctx = &brw->ctx;
- struct brw_renderbuffer *depth_irb;
-
- /* Resolve the depth buffer's HiZ buffer. */
- depth_irb = brw_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH);
- if (depth_irb && depth_irb->mt) {
- brw_miptree_prepare_depth(brw, depth_irb->mt,
- depth_irb->mt_level,
- depth_irb->mt_layer,
- depth_irb->layer_count);
- }
-
- /* Resolve color buffers for non-coherent framebuffer fetch. */
- if (!ctx->Extensions.EXT_shader_framebuffer_fetch &&
- ctx->FragmentProgram._Current &&
- ctx->FragmentProgram._Current->info.outputs_read) {
- const struct gl_framebuffer *fb = ctx->DrawBuffer;
-
- /* This is only used for non-coherent framebuffer fetch, so we don't
- * need to worry about CCS_E and can simply pass 'false' below.
- */
- assert(brw->screen->devinfo.ver < 9);
-
- for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
- const struct brw_renderbuffer *irb =
- brw_renderbuffer(fb->_ColorDrawBuffers[i]);
-
- if (irb) {
- brw_miptree_prepare_texture(brw, irb->mt, irb->mt->surf.format,
- irb->mt_level, 1,
- irb->mt_layer, irb->layer_count,
- brw->gfx9_astc5x5_wa_tex_mask);
- }
- }
- }
-
- struct gl_framebuffer *fb = ctx->DrawBuffer;
- for (int i = 0; i < fb->_NumColorDrawBuffers; i++) {
- struct brw_renderbuffer *irb =
- brw_renderbuffer(fb->_ColorDrawBuffers[i]);
-
- if (irb == NULL || irb->mt == NULL)
- continue;
-
- mesa_format mesa_format =
- _mesa_get_render_format(ctx, brw_rb_format(irb));
- enum isl_format isl_format = brw_isl_format_for_mesa_format(mesa_format);
- bool blend_enabled = ctx->Color.BlendEnabled & (1 << i);
- enum isl_aux_usage aux_usage =
- brw_miptree_render_aux_usage(brw, irb->mt, isl_format,
- blend_enabled,
- draw_aux_buffer_disabled[i]);
- if (brw->draw_aux_usage[i] != aux_usage) {
- brw->ctx.NewDriverState |= BRW_NEW_AUX_STATE;
- brw->draw_aux_usage[i] = aux_usage;
- }
-
- brw_miptree_prepare_render(brw, irb->mt, irb->mt_level,
- irb->mt_layer, irb->layer_count,
- aux_usage);
-
- brw_cache_flush_for_render(brw, irb->mt->bo,
- isl_format, aux_usage);
- }
-}
-
-/**
- * \brief Call this after drawing to mark which buffers need resolving
- *
- * If the depth buffer was written to and if it has an accompanying HiZ
- * buffer, then mark that it needs a depth resolve.
- *
- * If the stencil buffer was written to then mark that it may need to be
- * copied to an R8 texture.
- *
- * If the color buffer is a multisample window system buffer, then
- * mark that it needs a downsample.
- *
- * Also mark any render targets which will be textured as needing a render
- * cache flush.
- */
-static void
-brw_postdraw_set_buffers_need_resolve(struct brw_context *brw)
-{
- struct gl_context *ctx = &brw->ctx;
- struct gl_framebuffer *fb = ctx->DrawBuffer;
-
- struct brw_renderbuffer *front_irb = NULL;
- struct brw_renderbuffer *back_irb = brw_get_renderbuffer(fb, BUFFER_BACK_LEFT);
- struct brw_renderbuffer *depth_irb = brw_get_renderbuffer(fb, BUFFER_DEPTH);
- struct brw_renderbuffer *stencil_irb = brw_get_renderbuffer(fb, BUFFER_STENCIL);
- struct gl_renderbuffer_attachment *depth_att = &fb->Attachment[BUFFER_DEPTH];
-
- if (_mesa_is_front_buffer_drawing(fb))
- front_irb = brw_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
-
- if (front_irb)
- front_irb->need_downsample = true;
- if (back_irb)
- back_irb->need_downsample = true;
- if (depth_irb) {
- bool depth_written = brw_depth_writes_enabled(brw);
- if (depth_att->Layered) {
- brw_miptree_finish_depth(brw, depth_irb->mt,
- depth_irb->mt_level,
- depth_irb->mt_layer,
- depth_irb->layer_count,
- depth_written);
- } else {
- brw_miptree_finish_depth(brw, depth_irb->mt,
- depth_irb->mt_level,
- depth_irb->mt_layer, 1,
- depth_written);
- }
- if (depth_written)
- brw_depth_cache_add_bo(brw, depth_irb->mt->bo);
- }
-
- if (stencil_irb && brw->stencil_write_enabled) {
- struct brw_mipmap_tree *stencil_mt =
- stencil_irb->mt->stencil_mt != NULL ?
- stencil_irb->mt->stencil_mt : stencil_irb->mt;
- brw_depth_cache_add_bo(brw, stencil_mt->bo);
- brw_miptree_finish_write(brw, stencil_mt, stencil_irb->mt_level,
- stencil_irb->mt_layer,
- stencil_irb->layer_count, ISL_AUX_USAGE_NONE);
- }
-
- for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
- struct brw_renderbuffer *irb =
- brw_renderbuffer(fb->_ColorDrawBuffers[i]);
-
- if (!irb)
- continue;
-
- mesa_format mesa_format =
- _mesa_get_render_format(ctx, brw_rb_format(irb));
- enum isl_format isl_format = brw_isl_format_for_mesa_format(mesa_format);
- enum isl_aux_usage aux_usage = brw->draw_aux_usage[i];
-
- brw_render_cache_add_bo(brw, irb->mt->bo, isl_format, aux_usage);
-
- brw_miptree_finish_render(brw, irb->mt, irb->mt_level,
- irb->mt_layer, irb->layer_count,
- aux_usage);
- }
-}
-
-static void
-brw_renderbuffer_move_temp_back(struct brw_context *brw,
- struct brw_renderbuffer *irb)
-{
- if (irb->align_wa_mt == NULL)
- return;
-
- brw_cache_flush_for_read(brw, irb->align_wa_mt->bo);
-
- brw_miptree_copy_slice(brw, irb->align_wa_mt, 0, 0,
- irb->mt,
- irb->Base.Base.TexImage->Level, irb->mt_layer);
-
- brw_miptree_reference(&irb->align_wa_mt, NULL);
-
- /* Finally restore the x,y to correspond to full miptree. */
- brw_renderbuffer_set_draw_offset(irb);
-
- /* Make sure render surface state gets re-emitted with updated miptree. */
- brw->NewGLState |= _NEW_BUFFERS;
-}
-
-static void
-brw_postdraw_reconcile_align_wa_slices(struct brw_context *brw)
-{
- struct gl_context *ctx = &brw->ctx;
- struct gl_framebuffer *fb = ctx->DrawBuffer;
-
- struct brw_renderbuffer *depth_irb =
- brw_get_renderbuffer(fb, BUFFER_DEPTH);
- struct brw_renderbuffer *stencil_irb =
- brw_get_renderbuffer(fb, BUFFER_STENCIL);
-
- if (depth_irb && depth_irb->align_wa_mt)
- brw_renderbuffer_move_temp_back(brw, depth_irb);
-
- if (stencil_irb && stencil_irb->align_wa_mt)
- brw_renderbuffer_move_temp_back(brw, stencil_irb);
-
- for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
- struct brw_renderbuffer *irb =
- brw_renderbuffer(fb->_ColorDrawBuffers[i]);
-
- if (!irb || irb->align_wa_mt == NULL)
- continue;
-
- brw_renderbuffer_move_temp_back(brw, irb);
- }
-}
-
-static void
-brw_prepare_drawing(struct gl_context *ctx,
- const struct _mesa_index_buffer *ib,
- bool index_bounds_valid,
- GLuint min_index,
- GLuint max_index)
-{
- struct brw_context *brw = brw_context(ctx);
-
- if (ctx->NewState)
- _mesa_update_state(ctx);
-
- /* We have to validate the textures *before* checking for fallbacks;
- * otherwise, the software fallback won't be able to rely on the
- * texture state, the firstLevel and lastLevel fields won't be
- * set in the intel texture object (they'll both be 0), and the
- * software fallback will segfault if it attempts to access any
- * texture level other than level 0.
- */
- brw_validate_textures(brw);
-
- /* Find the highest sampler unit used by each shader program. A bit-count
- * won't work since ARB programs use the texture unit number as the sampler
- * index.
- */
- brw->wm.base.sampler_count =
- BITSET_LAST_BIT(ctx->FragmentProgram._Current->info.textures_used);
- brw->gs.base.sampler_count = ctx->GeometryProgram._Current ?
- BITSET_LAST_BIT(ctx->GeometryProgram._Current->info.textures_used) : 0;
- brw->tes.base.sampler_count = ctx->TessEvalProgram._Current ?
- BITSET_LAST_BIT(ctx->TessEvalProgram._Current->info.textures_used) : 0;
- brw->tcs.base.sampler_count = ctx->TessCtrlProgram._Current ?
- BITSET_LAST_BIT(ctx->TessCtrlProgram._Current->info.textures_used) : 0;
- brw->vs.base.sampler_count =
- BITSET_LAST_BIT(ctx->VertexProgram._Current->info.textures_used);
-
- brw_prepare_render(brw);
-
- /* This workaround has to happen outside of brw_upload_render_state()
- * because it may flush the batchbuffer for a blit, affecting the state
- * flags.
- */
- brw_workaround_depthstencil_alignment(brw, 0);
-
- /* Resolves must occur after updating renderbuffers, updating context state,
- * and finalizing textures but before setting up any hardware state for
- * this draw call.
- */
- bool draw_aux_buffer_disabled[MAX_DRAW_BUFFERS] = { };
- brw_predraw_resolve_inputs(brw, true, draw_aux_buffer_disabled);
- brw_predraw_resolve_framebuffer(brw, draw_aux_buffer_disabled);
-
- /* Bind all inputs, derive varying and size information:
- */
- brw_clear_buffers(brw);
- brw_merge_inputs(brw);
-
- brw->ib.ib = ib;
- brw->ctx.NewDriverState |= BRW_NEW_INDICES;
-
- brw->vb.index_bounds_valid = index_bounds_valid;
- brw->vb.min_index = min_index;
- brw->vb.max_index = max_index;
- brw->ctx.NewDriverState |= BRW_NEW_VERTICES;
-}
-
-static void
-brw_finish_drawing(struct gl_context *ctx)
-{
- struct brw_context *brw = brw_context(ctx);
-
- if (brw->always_flush_batch)
- brw_batch_flush(brw);
-
- brw_program_cache_check_size(brw);
- brw_postdraw_reconcile_align_wa_slices(brw);
- brw_postdraw_set_buffers_need_resolve(brw);
-
- if (brw->draw.draw_params_count_bo) {
- brw_bo_unreference(brw->draw.draw_params_count_bo);
- brw->draw.draw_params_count_bo = NULL;
- }
-
- if (brw->draw.draw_params_bo) {
- brw_bo_unreference(brw->draw.draw_params_bo);
- brw->draw.draw_params_bo = NULL;
- }
-
- if (brw->draw.derived_draw_params_bo) {
- brw_bo_unreference(brw->draw.derived_draw_params_bo);
- brw->draw.derived_draw_params_bo = NULL;
- }
-}
-
-/**
- * Implement workarounds for preemption:
- * - WaDisableMidObjectPreemptionForGSLineStripAdj
- * - WaDisableMidObjectPreemptionForTrifanOrPolygon
- * - WaDisableMidObjectPreemptionForLineLoop
- * - WA#0798
- */
-static void
-gfx9_emit_preempt_wa(struct brw_context *brw,
- const struct _mesa_prim *prim, GLuint num_instances)
-{
- bool object_preemption = true;
- ASSERTED const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- /* Only apply these workarounds for gfx9 */
- assert(devinfo->ver == 9);
-
- /* WaDisableMidObjectPreemptionForGSLineStripAdj
- *
- * WA: Disable mid-draw preemption when draw-call is a linestrip_adj and
- * GS is enabled.
- */
- if (brw->primitive == _3DPRIM_LINESTRIP_ADJ && brw->gs.enabled)
- object_preemption = false;
-
- /* WaDisableMidObjectPreemptionForTrifanOrPolygon
- *
- * TriFan miscompare in Execlist Preemption test. Cut index that is on a
- * previous context. End the previous, the resume another context with a
- * tri-fan or polygon, and the vertex count is corrupted. If we prempt
- * again we will cause corruption.
- *
- * WA: Disable mid-draw preemption when draw-call has a tri-fan.
- */
- if (brw->primitive == _3DPRIM_TRIFAN)
- object_preemption = false;
-
- /* WaDisableMidObjectPreemptionForLineLoop
- *
- * VF Stats Counters Missing a vertex when preemption enabled.
- *
- * WA: Disable mid-draw preemption when the draw uses a lineloop
- * topology.
- */
- if (brw->primitive == _3DPRIM_LINELOOP)
- object_preemption = false;
-
- /* WA#0798
- *
- * VF is corrupting GAFS data when preempted on an instance boundary and
- * replayed with instancing enabled.
- *
- * WA: Disable preemption when using instanceing.
- */
- if (num_instances > 1)
- object_preemption = false;
-
- brw_enable_obj_preemption(brw, object_preemption);
-}
-
-/* May fail if out of video memory for texture or vbo upload, or on
- * fallback conditions.
- */
-static void
-brw_draw_single_prim(struct gl_context *ctx,
- const struct _mesa_prim *prim,
- unsigned prim_id,
- bool is_indexed,
- GLuint num_instances, GLuint base_instance,
- struct brw_transform_feedback_object *xfb_obj,
- unsigned stream,
- GLsizeiptr indirect_offset)
-{
- struct brw_context *brw = brw_context(ctx);
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- bool fail_next;
- bool is_indirect = brw->draw.draw_indirect_data != NULL;
-
- /* Flag BRW_NEW_DRAW_CALL on every draw. This allows us to have
- * atoms that happen on every draw call.
- */
- brw->ctx.NewDriverState |= BRW_NEW_DRAW_CALL;
-
- /* Flush the batch if the batch/state buffers are nearly full. We can
- * grow them if needed, but this is not free, so we'd like to avoid it.
- */
- brw_batch_require_space(brw, 1500);
- brw_require_statebuffer_space(brw, 2400);
- brw_batch_save_state(brw);
- fail_next = brw_batch_saved_state_is_empty(brw);
-
- if (brw->num_instances != num_instances ||
- brw->basevertex != prim->basevertex ||
- brw->baseinstance != base_instance) {
- brw->num_instances = num_instances;
- brw->basevertex = prim->basevertex;
- brw->baseinstance = base_instance;
- if (prim_id > 0) { /* For i == 0 we just did this before the loop */
- brw->ctx.NewDriverState |= BRW_NEW_VERTICES;
- brw_clear_buffers(brw);
- }
- }
-
- /* Determine if we need to flag BRW_NEW_VERTICES for updating the
- * gl_BaseVertexARB or gl_BaseInstanceARB values. For indirect draw, we
- * always flag if the shader uses one of the values. For direct draws,
- * we only flag if the values change.
- */
- const int new_firstvertex =
- is_indexed ? prim->basevertex : prim->start;
- const int new_baseinstance = base_instance;
- const struct brw_vs_prog_data *vs_prog_data =
- brw_vs_prog_data(brw->vs.base.prog_data);
- if (prim_id > 0) {
- const bool uses_draw_parameters =
- vs_prog_data->uses_firstvertex ||
- vs_prog_data->uses_baseinstance;
-
- if ((uses_draw_parameters && is_indirect) ||
- (vs_prog_data->uses_firstvertex &&
- brw->draw.params.firstvertex != new_firstvertex) ||
- (vs_prog_data->uses_baseinstance &&
- brw->draw.params.gl_baseinstance != new_baseinstance))
- brw->ctx.NewDriverState |= BRW_NEW_VERTICES;
- }
-
- brw->draw.params.firstvertex = new_firstvertex;
- brw->draw.params.gl_baseinstance = new_baseinstance;
- brw_bo_unreference(brw->draw.draw_params_bo);
-
- if (is_indirect) {
- /* Point draw_params_bo at the indirect buffer. */
- brw->draw.draw_params_bo =
- brw_buffer_object(ctx->DrawIndirectBuffer)->buffer;
- brw_bo_reference(brw->draw.draw_params_bo);
- brw->draw.draw_params_offset =
- indirect_offset + (is_indexed ? 12 : 8);
- } else {
- /* Set draw_params_bo to NULL so brw_prepare_vertices knows it
- * has to upload gl_BaseVertex and such if they're needed.
- */
- brw->draw.draw_params_bo = NULL;
- brw->draw.draw_params_offset = 0;
- }
-
- /* gl_DrawID always needs its own vertex buffer since it's not part of
- * the indirect parameter buffer. Same for is_indexed_draw, which shares
- * the buffer with gl_DrawID. If the program uses gl_DrawID, we need to
- * flag BRW_NEW_VERTICES. For the first iteration, we don't have valid
- * vs_prog_data, but we always flag BRW_NEW_VERTICES before the loop.
- */
- if (prim_id > 0 && vs_prog_data->uses_drawid)
- brw->ctx.NewDriverState |= BRW_NEW_VERTICES;
-
- brw->draw.derived_params.gl_drawid = prim->draw_id;
- brw->draw.derived_params.is_indexed_draw = is_indexed ? ~0 : 0;
-
- brw_bo_unreference(brw->draw.derived_draw_params_bo);
- brw->draw.derived_draw_params_bo = NULL;
- brw->draw.derived_draw_params_offset = 0;
-
- if (devinfo->ver < 6)
- brw_set_prim(brw, prim);
- else
- gfx6_set_prim(brw, prim);
-
-retry:
-
- /* Note that before the loop, brw->ctx.NewDriverState was set to != 0, and
- * that the state updated in the loop outside of this block is that in
- * *_set_prim or brw_batch_flush(), which only impacts
- * brw->ctx.NewDriverState.
- */
- if (brw->ctx.NewDriverState) {
- brw->batch.no_wrap = true;
- brw_upload_render_state(brw);
- }
-
- if (devinfo->ver == 9)
- gfx9_emit_preempt_wa(brw, prim, num_instances);
-
- brw_emit_prim(brw, prim, brw->primitive, is_indexed, num_instances,
- base_instance, xfb_obj, stream, is_indirect,
- indirect_offset);
-
- brw->batch.no_wrap = false;
-
- if (!brw_batch_has_aperture_space(brw, 0)) {
- if (!fail_next) {
- brw_batch_reset_to_saved(brw);
- brw_batch_flush(brw);
- fail_next = true;
- goto retry;
- } else {
- int ret = brw_batch_flush(brw);
- WARN_ONCE(ret == -ENOSPC,
- "i965: Single primitive emit exceeded "
- "available aperture space\n");
- }
- }
-
- /* Now that we know we haven't run out of aperture space, we can safely
- * reset the dirty bits.
- */
- if (brw->ctx.NewDriverState)
- brw_render_state_finished(brw);
-
- return;
-}
-
-
-
-void
-brw_draw_prims(struct gl_context *ctx,
- const struct _mesa_prim *prims,
- unsigned nr_prims,
- const struct _mesa_index_buffer *ib,
- bool index_bounds_valid,
- bool primitive_restart,
- unsigned restart_index,
- unsigned min_index,
- unsigned max_index,
- unsigned num_instances,
- unsigned base_instance)
-{
- unsigned i;
- struct brw_context *brw = brw_context(ctx);
- int predicate_state = brw->predicate.state;
-
- if (!brw_check_conditional_render(brw))
- return;
-
- /* Handle primitive restart if needed */
- if (brw_handle_primitive_restart(ctx, prims, nr_prims, ib, num_instances,
- base_instance, primitive_restart,
- restart_index)) {
- /* The draw was handled, so we can exit now */
- return;
- }
-
- /* Do GL_SELECT and GL_FEEDBACK rendering using swrast, even though it
- * won't support all the extensions we support.
- */
- if (ctx->RenderMode != GL_RENDER) {
- perf_debug("%s render mode not supported in hardware\n",
- _mesa_enum_to_string(ctx->RenderMode));
- _swsetup_Wakeup(ctx);
- _tnl_wakeup(ctx);
- _tnl_draw(ctx, prims, nr_prims, ib, index_bounds_valid,
- primitive_restart, restart_index, min_index,
- max_index, num_instances, base_instance);
- return;
- }
-
- /* If we're going to have to upload any of the user's vertex arrays, then
- * get the minimum and maximum of their index buffer so we know what range
- * to upload.
- */
- if (!index_bounds_valid && _mesa_draw_user_array_bits(ctx) != 0) {
- perf_debug("Scanning index buffer to compute index buffer bounds. "
- "Use glDrawRangeElements() to avoid this.\n");
- vbo_get_minmax_indices(ctx, prims, ib, &min_index, &max_index, nr_prims,
- primitive_restart, restart_index);
- index_bounds_valid = true;
- }
-
- brw_prepare_drawing(ctx, ib, index_bounds_valid, min_index, max_index);
- /* Try drawing with the hardware, but don't do anything else if we can't
- * manage it. swrast doesn't support our featureset, so we can't fall back
- * to it.
- */
-
- for (i = 0; i < nr_prims; i++) {
- /* Implementation of ARB_indirect_parameters via predicates */
- if (brw->draw.draw_params_count_bo) {
- brw_emit_pipe_control_flush(brw, PIPE_CONTROL_FLUSH_ENABLE);
-
- /* Upload the current draw count from the draw parameters buffer to
- * MI_PREDICATE_SRC0.
- */
- brw_load_register_mem(brw, MI_PREDICATE_SRC0,
- brw->draw.draw_params_count_bo,
- brw->draw.draw_params_count_offset);
- /* Zero the top 32-bits of MI_PREDICATE_SRC0 */
- brw_load_register_imm32(brw, MI_PREDICATE_SRC0 + 4, 0);
- /* Upload the id of the current primitive to MI_PREDICATE_SRC1. */
- brw_load_register_imm64(brw, MI_PREDICATE_SRC1, prims[i].draw_id);
-
- BEGIN_BATCH(1);
- if (i == 0 && brw->predicate.state != BRW_PREDICATE_STATE_USE_BIT) {
- OUT_BATCH(GFX7_MI_PREDICATE | MI_PREDICATE_LOADOP_LOADINV |
- MI_PREDICATE_COMBINEOP_SET |
- MI_PREDICATE_COMPAREOP_SRCS_EQUAL);
- } else {
- OUT_BATCH(GFX7_MI_PREDICATE |
- MI_PREDICATE_LOADOP_LOAD | MI_PREDICATE_COMBINEOP_XOR |
- MI_PREDICATE_COMPAREOP_SRCS_EQUAL);
- }
- ADVANCE_BATCH();
-
- brw->predicate.state = BRW_PREDICATE_STATE_USE_BIT;
- }
-
- brw_draw_single_prim(ctx, &prims[i], i, ib != NULL, num_instances,
- base_instance, NULL, 0,
- brw->draw.draw_indirect_offset +
- brw->draw.draw_indirect_stride * i);
- }
-
- brw_finish_drawing(ctx);
- brw->predicate.state = predicate_state;
-}
-
-static void
-brw_draw_transform_feedback(struct gl_context *ctx, GLenum mode,
- unsigned num_instances, unsigned stream,
- struct gl_transform_feedback_object *gl_xfb_obj)
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_transform_feedback_object *xfb_obj =
- (struct brw_transform_feedback_object *) gl_xfb_obj;
-
- if (!brw_check_conditional_render(brw))
- return;
-
- /* Do GL_SELECT and GL_FEEDBACK rendering using swrast, even though it
- * won't support all the extensions we support.
- */
- if (ctx->RenderMode != GL_RENDER) {
- perf_debug("%s render mode not supported in hardware\n",
- _mesa_enum_to_string(ctx->RenderMode));
- /* swrast doesn't support DrawTransformFeedback. Nothing to do. */
- return;
- }
-
- brw_prepare_drawing(ctx, NULL, false, 0, ~0);
-
- struct _mesa_prim prim;
- memset(&prim, 0, sizeof(prim));
- prim.begin = 1;
- prim.end = 1;
- prim.mode = mode;
-
- /* Try drawing with the hardware, but don't do anything else if we can't
- * manage it. swrast doesn't support our featureset, so we can't fall back
- * to it.
- */
- brw_draw_single_prim(ctx, &prim, 0, false, num_instances, 0, xfb_obj,
- stream, 0);
- brw_finish_drawing(ctx);
-}
-
-void
-brw_draw_indirect_prims(struct gl_context *ctx,
- GLuint mode,
- struct gl_buffer_object *indirect_data,
- GLsizeiptr indirect_offset,
- unsigned draw_count,
- unsigned stride,
- struct gl_buffer_object *indirect_params,
- GLsizeiptr indirect_params_offset,
- const struct _mesa_index_buffer *ib,
- bool primitive_restart,
- unsigned restart_index)
-{
- struct brw_context *brw = brw_context(ctx);
- struct _mesa_prim *prim;
- GLsizei i;
-
- prim = calloc(draw_count, sizeof(*prim));
- if (prim == NULL) {
- _mesa_error(ctx, GL_OUT_OF_MEMORY, "gl%sDraw%sIndirect%s",
- (draw_count > 1) ? "Multi" : "",
- ib ? "Elements" : "Arrays",
- indirect_params ? "CountARB" : "");
- return;
- }
-
- brw->draw.draw_indirect_stride = stride;
- brw->draw.draw_indirect_offset = indirect_offset;
-
- prim[0].begin = 1;
- prim[draw_count - 1].end = 1;
- for (i = 0; i < draw_count; ++i) {
- prim[i].mode = mode;
- prim[i].draw_id = i;
- }
-
- if (indirect_params) {
- brw->draw.draw_params_count_bo =
- brw_buffer_object(indirect_params)->buffer;
- brw_bo_reference(brw->draw.draw_params_count_bo);
- brw->draw.draw_params_count_offset = indirect_params_offset;
- }
-
- brw->draw.draw_indirect_data = indirect_data;
-
- brw_draw_prims(ctx, prim, draw_count, ib, false, primitive_restart,
- restart_index, 0, ~0, 0, 0);
-
- brw->draw.draw_indirect_data = NULL;
- free(prim);
-}
-
-void
-brw_init_draw_functions(struct dd_function_table *functions)
-{
- /* Register our drawing function:
- */
- functions->Draw = brw_draw_prims;
- functions->DrawTransformFeedback = brw_draw_transform_feedback;
- functions->DrawIndirect = brw_draw_indirect_prims;
-}
-
-void
-brw_draw_init(struct brw_context *brw)
-{
- for (int i = 0; i < VERT_ATTRIB_MAX; i++)
- brw->vb.inputs[i].buffer = -1;
- brw->vb.nr_buffers = 0;
- brw->vb.nr_enabled = 0;
-}
-
-void
-brw_draw_destroy(struct brw_context *brw)
-{
- unsigned i;
-
- for (i = 0; i < brw->vb.nr_buffers; i++) {
- brw_bo_unreference(brw->vb.buffers[i].bo);
- brw->vb.buffers[i].bo = NULL;
- }
- brw->vb.nr_buffers = 0;
-
- for (i = 0; i < brw->vb.nr_enabled; i++) {
- brw->vb.enabled[i]->buffer = -1;
- }
- brw->vb.nr_enabled = 0;
-
- brw_bo_unreference(brw->ib.bo);
- brw->ib.bo = NULL;
-}
+++ /dev/null
-/*
- * Copyright 2005 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef BRW_DRAW_H
-#define BRW_DRAW_H
-
-#include "main/mtypes.h"
-#include "brw_bufmgr.h"
-
-struct brw_context;
-
-uint32_t *
-brw_emit_vertex_buffer_state(struct brw_context *brw,
- unsigned buffer_nr,
- struct brw_bo *bo,
- unsigned start_offset,
- unsigned end_offset,
- unsigned stride,
- unsigned step_rate,
- uint32_t *__map);
-
-#define EMIT_VERTEX_BUFFER_STATE(...) __map = \
- brw_emit_vertex_buffer_state(__VA_ARGS__, __map)
-
-void brw_draw_prims(struct gl_context *ctx,
- const struct _mesa_prim *prims,
- unsigned nr_prims,
- const struct _mesa_index_buffer *ib,
- bool index_bounds_valid,
- bool primitive_restart,
- unsigned restart_index,
- unsigned min_index,
- unsigned max_index,
- unsigned num_instances,
- unsigned base_instance);
-
-void brw_init_draw_functions(struct dd_function_table *functions);
-void brw_draw_init( struct brw_context *brw );
-void brw_draw_destroy( struct brw_context *brw );
-
-void brw_prepare_shader_draw_parameters(struct brw_context *);
-
-/* brw_primitive_restart.c */
-GLboolean
-brw_handle_primitive_restart(struct gl_context *ctx,
- const struct _mesa_prim *prims,
- GLuint nr_prims,
- const struct _mesa_index_buffer *ib,
- GLuint num_instances, GLuint base_instance,
- bool primitive_restart,
- unsigned restart_index);
-
-void
-brw_draw_indirect_prims(struct gl_context *ctx,
- GLuint mode,
- struct gl_buffer_object *indirect_data,
- GLsizeiptr indirect_offset,
- unsigned draw_count,
- unsigned stride,
- struct gl_buffer_object *indirect_params,
- GLsizeiptr indirect_params_offset,
- const struct _mesa_index_buffer *ib,
- bool primitive_restart,
- unsigned restart_index);
-#endif
+++ /dev/null
-/*
- * Copyright 2003 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "main/arrayobj.h"
-#include "main/bufferobj.h"
-#include "main/context.h"
-#include "main/enums.h"
-#include "main/macros.h"
-#include "main/glformats.h"
-#include "nir.h"
-
-#include "brw_draw.h"
-#include "brw_defines.h"
-#include "brw_context.h"
-#include "brw_state.h"
-
-#include "brw_batch.h"
-#include "brw_buffer_objects.h"
-
-static const GLuint double_types_float[5] = {
- 0,
- ISL_FORMAT_R64_FLOAT,
- ISL_FORMAT_R64G64_FLOAT,
- ISL_FORMAT_R64G64B64_FLOAT,
- ISL_FORMAT_R64G64B64A64_FLOAT
-};
-
-static const GLuint double_types_passthru[5] = {
- 0,
- ISL_FORMAT_R64_PASSTHRU,
- ISL_FORMAT_R64G64_PASSTHRU,
- ISL_FORMAT_R64G64B64_PASSTHRU,
- ISL_FORMAT_R64G64B64A64_PASSTHRU
-};
-
-static const GLuint float_types[5] = {
- 0,
- ISL_FORMAT_R32_FLOAT,
- ISL_FORMAT_R32G32_FLOAT,
- ISL_FORMAT_R32G32B32_FLOAT,
- ISL_FORMAT_R32G32B32A32_FLOAT
-};
-
-static const GLuint half_float_types[5] = {
- 0,
- ISL_FORMAT_R16_FLOAT,
- ISL_FORMAT_R16G16_FLOAT,
- ISL_FORMAT_R16G16B16_FLOAT,
- ISL_FORMAT_R16G16B16A16_FLOAT
-};
-
-static const GLuint fixed_point_types[5] = {
- 0,
- ISL_FORMAT_R32_SFIXED,
- ISL_FORMAT_R32G32_SFIXED,
- ISL_FORMAT_R32G32B32_SFIXED,
- ISL_FORMAT_R32G32B32A32_SFIXED,
-};
-
-static const GLuint uint_types_direct[5] = {
- 0,
- ISL_FORMAT_R32_UINT,
- ISL_FORMAT_R32G32_UINT,
- ISL_FORMAT_R32G32B32_UINT,
- ISL_FORMAT_R32G32B32A32_UINT
-};
-
-static const GLuint uint_types_norm[5] = {
- 0,
- ISL_FORMAT_R32_UNORM,
- ISL_FORMAT_R32G32_UNORM,
- ISL_FORMAT_R32G32B32_UNORM,
- ISL_FORMAT_R32G32B32A32_UNORM
-};
-
-static const GLuint uint_types_scale[5] = {
- 0,
- ISL_FORMAT_R32_USCALED,
- ISL_FORMAT_R32G32_USCALED,
- ISL_FORMAT_R32G32B32_USCALED,
- ISL_FORMAT_R32G32B32A32_USCALED
-};
-
-static const GLuint int_types_direct[5] = {
- 0,
- ISL_FORMAT_R32_SINT,
- ISL_FORMAT_R32G32_SINT,
- ISL_FORMAT_R32G32B32_SINT,
- ISL_FORMAT_R32G32B32A32_SINT
-};
-
-static const GLuint int_types_norm[5] = {
- 0,
- ISL_FORMAT_R32_SNORM,
- ISL_FORMAT_R32G32_SNORM,
- ISL_FORMAT_R32G32B32_SNORM,
- ISL_FORMAT_R32G32B32A32_SNORM
-};
-
-static const GLuint int_types_scale[5] = {
- 0,
- ISL_FORMAT_R32_SSCALED,
- ISL_FORMAT_R32G32_SSCALED,
- ISL_FORMAT_R32G32B32_SSCALED,
- ISL_FORMAT_R32G32B32A32_SSCALED
-};
-
-static const GLuint ushort_types_direct[5] = {
- 0,
- ISL_FORMAT_R16_UINT,
- ISL_FORMAT_R16G16_UINT,
- ISL_FORMAT_R16G16B16_UINT,
- ISL_FORMAT_R16G16B16A16_UINT
-};
-
-static const GLuint ushort_types_norm[5] = {
- 0,
- ISL_FORMAT_R16_UNORM,
- ISL_FORMAT_R16G16_UNORM,
- ISL_FORMAT_R16G16B16_UNORM,
- ISL_FORMAT_R16G16B16A16_UNORM
-};
-
-static const GLuint ushort_types_scale[5] = {
- 0,
- ISL_FORMAT_R16_USCALED,
- ISL_FORMAT_R16G16_USCALED,
- ISL_FORMAT_R16G16B16_USCALED,
- ISL_FORMAT_R16G16B16A16_USCALED
-};
-
-static const GLuint short_types_direct[5] = {
- 0,
- ISL_FORMAT_R16_SINT,
- ISL_FORMAT_R16G16_SINT,
- ISL_FORMAT_R16G16B16_SINT,
- ISL_FORMAT_R16G16B16A16_SINT
-};
-
-static const GLuint short_types_norm[5] = {
- 0,
- ISL_FORMAT_R16_SNORM,
- ISL_FORMAT_R16G16_SNORM,
- ISL_FORMAT_R16G16B16_SNORM,
- ISL_FORMAT_R16G16B16A16_SNORM
-};
-
-static const GLuint short_types_scale[5] = {
- 0,
- ISL_FORMAT_R16_SSCALED,
- ISL_FORMAT_R16G16_SSCALED,
- ISL_FORMAT_R16G16B16_SSCALED,
- ISL_FORMAT_R16G16B16A16_SSCALED
-};
-
-static const GLuint ubyte_types_direct[5] = {
- 0,
- ISL_FORMAT_R8_UINT,
- ISL_FORMAT_R8G8_UINT,
- ISL_FORMAT_R8G8B8_UINT,
- ISL_FORMAT_R8G8B8A8_UINT
-};
-
-static const GLuint ubyte_types_norm[5] = {
- 0,
- ISL_FORMAT_R8_UNORM,
- ISL_FORMAT_R8G8_UNORM,
- ISL_FORMAT_R8G8B8_UNORM,
- ISL_FORMAT_R8G8B8A8_UNORM
-};
-
-static const GLuint ubyte_types_scale[5] = {
- 0,
- ISL_FORMAT_R8_USCALED,
- ISL_FORMAT_R8G8_USCALED,
- ISL_FORMAT_R8G8B8_USCALED,
- ISL_FORMAT_R8G8B8A8_USCALED
-};
-
-static const GLuint byte_types_direct[5] = {
- 0,
- ISL_FORMAT_R8_SINT,
- ISL_FORMAT_R8G8_SINT,
- ISL_FORMAT_R8G8B8_SINT,
- ISL_FORMAT_R8G8B8A8_SINT
-};
-
-static const GLuint byte_types_norm[5] = {
- 0,
- ISL_FORMAT_R8_SNORM,
- ISL_FORMAT_R8G8_SNORM,
- ISL_FORMAT_R8G8B8_SNORM,
- ISL_FORMAT_R8G8B8A8_SNORM
-};
-
-static const GLuint byte_types_scale[5] = {
- 0,
- ISL_FORMAT_R8_SSCALED,
- ISL_FORMAT_R8G8_SSCALED,
- ISL_FORMAT_R8G8B8_SSCALED,
- ISL_FORMAT_R8G8B8A8_SSCALED
-};
-
-static GLuint
-double_types(int size, GLboolean doubles)
-{
- /* From the BDW PRM, Volume 2d, page 588 (VERTEX_ELEMENT_STATE):
- * "When SourceElementFormat is set to one of the *64*_PASSTHRU formats,
- * 64-bit components are stored in the URB without any conversion."
- * Also included on BDW PRM, Volume 7, page 470, table "Source Element
- * Formats Supported in VF Unit"
- *
- * Previous PRMs don't include those references, so for gfx7 we can't use
- * PASSTHRU formats directly. But in any case, we prefer to return passthru
- * even in that case, because that reflects what we want to achieve, even
- * if we would need to workaround on gen < 8.
- */
- return (doubles
- ? double_types_passthru[size]
- : double_types_float[size]);
-}
-
-/**
- * Given vertex array type/size/format/normalized info, return
- * the appopriate hardware surface type.
- * Format will be GL_RGBA or possibly GL_BGRA for GLubyte[4] color arrays.
- */
-unsigned
-brw_get_vertex_surface_type(struct brw_context *brw,
- const struct gl_vertex_format *glformat)
-{
- int size = glformat->Size;
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- const bool is_ivybridge_or_older =
- devinfo->verx10 < 70 || devinfo->platform == INTEL_PLATFORM_IVB;
-
- if (INTEL_DEBUG(DEBUG_VERTS))
- fprintf(stderr, "type %s size %d normalized %d\n",
- _mesa_enum_to_string(glformat->Type),
- glformat->Size, glformat->Normalized);
-
- if (glformat->Integer) {
- assert(glformat->Format == GL_RGBA); /* sanity check */
- switch (glformat->Type) {
- case GL_INT: return int_types_direct[size];
- case GL_SHORT:
- if (is_ivybridge_or_older && size == 3)
- return short_types_direct[4];
- else
- return short_types_direct[size];
- case GL_BYTE:
- if (is_ivybridge_or_older && size == 3)
- return byte_types_direct[4];
- else
- return byte_types_direct[size];
- case GL_UNSIGNED_INT: return uint_types_direct[size];
- case GL_UNSIGNED_SHORT:
- if (is_ivybridge_or_older && size == 3)
- return ushort_types_direct[4];
- else
- return ushort_types_direct[size];
- case GL_UNSIGNED_BYTE:
- if (is_ivybridge_or_older && size == 3)
- return ubyte_types_direct[4];
- else
- return ubyte_types_direct[size];
- default: unreachable("not reached");
- }
- } else if (glformat->Type == GL_UNSIGNED_INT_10F_11F_11F_REV) {
- return ISL_FORMAT_R11G11B10_FLOAT;
- } else if (glformat->Normalized) {
- switch (glformat->Type) {
- case GL_DOUBLE: return double_types(size, glformat->Doubles);
- case GL_FLOAT: return float_types[size];
- case GL_HALF_FLOAT:
- case GL_HALF_FLOAT_OES:
- if (devinfo->ver < 6 && size == 3)
- return half_float_types[4];
- else
- return half_float_types[size];
- case GL_INT: return int_types_norm[size];
- case GL_SHORT: return short_types_norm[size];
- case GL_BYTE: return byte_types_norm[size];
- case GL_UNSIGNED_INT: return uint_types_norm[size];
- case GL_UNSIGNED_SHORT: return ushort_types_norm[size];
- case GL_UNSIGNED_BYTE:
- if (glformat->Format == GL_BGRA) {
- /* See GL_EXT_vertex_array_bgra */
- assert(size == 4);
- return ISL_FORMAT_B8G8R8A8_UNORM;
- }
- else {
- return ubyte_types_norm[size];
- }
- case GL_FIXED:
- if (devinfo->verx10 >= 75)
- return fixed_point_types[size];
-
- /* This produces GL_FIXED inputs as values between INT32_MIN and
- * INT32_MAX, which will be scaled down by 1/65536 by the VS.
- */
- return int_types_scale[size];
- /* See GL_ARB_vertex_type_2_10_10_10_rev.
- * W/A: Pre-Haswell, the hardware doesn't really support the formats we'd
- * like to use here, so upload everything as UINT and fix
- * it in the shader
- */
- case GL_INT_2_10_10_10_REV:
- assert(size == 4);
- if (devinfo->verx10 >= 75) {
- return glformat->Format == GL_BGRA
- ? ISL_FORMAT_B10G10R10A2_SNORM
- : ISL_FORMAT_R10G10B10A2_SNORM;
- }
- return ISL_FORMAT_R10G10B10A2_UINT;
- case GL_UNSIGNED_INT_2_10_10_10_REV:
- assert(size == 4);
- if (devinfo->verx10 >= 75) {
- return glformat->Format == GL_BGRA
- ? ISL_FORMAT_B10G10R10A2_UNORM
- : ISL_FORMAT_R10G10B10A2_UNORM;
- }
- return ISL_FORMAT_R10G10B10A2_UINT;
- default: unreachable("not reached");
- }
- }
- else {
- /* See GL_ARB_vertex_type_2_10_10_10_rev.
- * W/A: the hardware doesn't really support the formats we'd
- * like to use here, so upload everything as UINT and fix
- * it in the shader
- */
- if (glformat->Type == GL_INT_2_10_10_10_REV) {
- assert(size == 4);
- if (devinfo->verx10 >= 75) {
- return glformat->Format == GL_BGRA
- ? ISL_FORMAT_B10G10R10A2_SSCALED
- : ISL_FORMAT_R10G10B10A2_SSCALED;
- }
- return ISL_FORMAT_R10G10B10A2_UINT;
- } else if (glformat->Type == GL_UNSIGNED_INT_2_10_10_10_REV) {
- assert(size == 4);
- if (devinfo->verx10 >= 75) {
- return glformat->Format == GL_BGRA
- ? ISL_FORMAT_B10G10R10A2_USCALED
- : ISL_FORMAT_R10G10B10A2_USCALED;
- }
- return ISL_FORMAT_R10G10B10A2_UINT;
- }
- assert(glformat->Format == GL_RGBA); /* sanity check */
- switch (glformat->Type) {
- case GL_DOUBLE: return double_types(size, glformat->Doubles);
- case GL_FLOAT: return float_types[size];
- case GL_HALF_FLOAT:
- case GL_HALF_FLOAT_OES:
- if (devinfo->ver < 6 && size == 3)
- return half_float_types[4];
- else
- return half_float_types[size];
- case GL_INT: return int_types_scale[size];
- case GL_SHORT: return short_types_scale[size];
- case GL_BYTE: return byte_types_scale[size];
- case GL_UNSIGNED_INT: return uint_types_scale[size];
- case GL_UNSIGNED_SHORT: return ushort_types_scale[size];
- case GL_UNSIGNED_BYTE: return ubyte_types_scale[size];
- case GL_FIXED:
- if (devinfo->verx10 >= 75)
- return fixed_point_types[size];
-
- /* This produces GL_FIXED inputs as values between INT32_MIN and
- * INT32_MAX, which will be scaled down by 1/65536 by the VS.
- */
- return int_types_scale[size];
- default: unreachable("not reached");
- }
- }
-}
-
-static void
-copy_array_to_vbo_array(struct brw_context *brw,
- const uint8_t *const ptr, const int src_stride,
- int min, int max,
- struct brw_vertex_buffer *buffer,
- GLuint dst_stride)
-{
- const unsigned char *src = ptr + min * src_stride;
- int count = max - min + 1;
- GLuint size = count * dst_stride;
- uint8_t *dst = brw_upload_space(&brw->upload, size, dst_stride,
- &buffer->bo, &buffer->offset);
-
- /* The GL 4.5 spec says:
- * "If any enabled array’s buffer binding is zero when DrawArrays or
- * one of the other drawing commands defined in section 10.4 is called,
- * the result is undefined."
- *
- * In this case, let's the dst with undefined values
- */
- if (ptr != NULL) {
- if (dst_stride == src_stride) {
- memcpy(dst, src, size);
- } else {
- while (count--) {
- memcpy(dst, src, dst_stride);
- src += src_stride;
- dst += dst_stride;
- }
- }
- }
- buffer->stride = dst_stride;
- buffer->size = size;
-}
-
-void
-brw_prepare_vertices(struct brw_context *brw)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- struct gl_context *ctx = &brw->ctx;
- /* BRW_NEW_VERTEX_PROGRAM */
- const struct gl_program *vp = brw->programs[MESA_SHADER_VERTEX];
- /* BRW_NEW_VS_PROG_DATA */
- const struct brw_vs_prog_data *vs_prog_data =
- brw_vs_prog_data(brw->vs.base.prog_data);
- const uint64_t vs_inputs64 =
- nir_get_single_slot_attribs_mask(vs_prog_data->inputs_read,
- vp->DualSlotInputs);
- assert((vs_inputs64 & ~(uint64_t)VERT_BIT_ALL) == 0);
- unsigned vs_inputs = (unsigned)vs_inputs64;
- unsigned int min_index = brw->vb.min_index + brw->basevertex;
- unsigned int max_index = brw->vb.max_index + brw->basevertex;
- int delta, j;
-
- /* _NEW_POLYGON
- *
- * On gfx6+, edge flags don't end up in the VUE (either in or out of the
- * VS). Instead, they're uploaded as the last vertex element, and the data
- * is passed sideband through the fixed function units. So, we need to
- * prepare the vertex buffer for it, but it's not present in inputs_read.
- */
- if (devinfo->ver >= 6 && (ctx->Polygon.FrontMode != GL_FILL ||
- ctx->Polygon.BackMode != GL_FILL)) {
- vs_inputs |= VERT_BIT_EDGEFLAG;
- }
-
- if (0)
- fprintf(stderr, "%s %d..%d\n", __func__, min_index, max_index);
-
- /* Accumulate the list of enabled arrays. */
- brw->vb.nr_enabled = 0;
-
- unsigned mask = vs_inputs;
- while (mask) {
- const gl_vert_attrib attr = u_bit_scan(&mask);
- struct brw_vertex_element *input = &brw->vb.inputs[attr];
- brw->vb.enabled[brw->vb.nr_enabled++] = input;
- }
- assert(brw->vb.nr_enabled <= VERT_ATTRIB_MAX);
-
- if (brw->vb.nr_enabled == 0)
- return;
-
- if (brw->vb.nr_buffers)
- return;
-
- j = 0;
- const struct gl_vertex_array_object *vao = ctx->Array._DrawVAO;
-
- unsigned vbomask = vs_inputs & _mesa_draw_vbo_array_bits(ctx);
- while (vbomask) {
- const struct gl_vertex_buffer_binding *const glbinding =
- _mesa_draw_buffer_binding(vao, ffs(vbomask) - 1);
- const GLsizei stride = glbinding->Stride;
-
- assert(glbinding->BufferObj);
-
- /* Accumulate the range of a single vertex, start with inverted range */
- uint32_t vertex_range_start = ~(uint32_t)0;
- uint32_t vertex_range_end = 0;
-
- const unsigned boundmask = _mesa_draw_bound_attrib_bits(glbinding);
- unsigned attrmask = vbomask & boundmask;
- /* Mark the those attributes as processed */
- vbomask ^= attrmask;
- /* We can assume that we have an array for the binding */
- assert(attrmask);
- /* Walk attributes belonging to the binding */
- while (attrmask) {
- const gl_vert_attrib attr = u_bit_scan(&attrmask);
- const struct gl_array_attributes *const glattrib =
- _mesa_draw_array_attrib(vao, attr);
- const uint32_t rel_offset =
- _mesa_draw_attributes_relative_offset(glattrib);
- const uint32_t rel_end = rel_offset + glattrib->Format._ElementSize;
-
- vertex_range_start = MIN2(vertex_range_start, rel_offset);
- vertex_range_end = MAX2(vertex_range_end, rel_end);
-
- struct brw_vertex_element *input = &brw->vb.inputs[attr];
- input->glformat = &glattrib->Format;
- input->buffer = j;
- input->is_dual_slot = (vp->DualSlotInputs & BITFIELD64_BIT(attr)) != 0;
- input->offset = rel_offset;
- }
- assert(vertex_range_start <= vertex_range_end);
-
- struct brw_buffer_object *intel_buffer =
- brw_buffer_object(glbinding->BufferObj);
- struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];
-
- const uint32_t offset = _mesa_draw_binding_offset(glbinding);
-
- /* If nothing else is known take the buffer size and offset as a bound */
- uint32_t start = vertex_range_start;
- uint32_t range = intel_buffer->Base.Size - offset - vertex_range_start;
- /* Check if we can get a more narrow range */
- if (glbinding->InstanceDivisor) {
- if (brw->num_instances) {
- const uint32_t vertex_size = vertex_range_end - vertex_range_start;
- start = vertex_range_start + stride * brw->baseinstance;
- range = (stride * ((brw->num_instances - 1) /
- glbinding->InstanceDivisor) +
- vertex_size);
- }
- } else {
- if (brw->vb.index_bounds_valid) {
- const uint32_t vertex_size = vertex_range_end - vertex_range_start;
- start = vertex_range_start + stride * min_index;
- range = (stride * (max_index - min_index) +
- vertex_size);
-
- /**
- * Unreal Engine 4 has a bug in usage of glDrawRangeElements,
- * causing it to be called with a number of vertices in place
- * of "end" parameter (which specifies the maximum array index
- * contained in indices).
- *
- * Since there is unknown amount of games affected and we
- * could not identify that a game is built with UE4 - we are
- * forced to make a blanket workaround, disregarding max_index
- * in range calculations. Fortunately all such calls look like:
- * glDrawRangeElements(GL_TRIANGLES, 0, 3, 3, ...);
- * So we are able to narrow down this workaround.
- *
- * See: https://gitlab.freedesktop.org/mesa/mesa/-/issues/2917
- */
- if (unlikely(max_index == 3 && min_index == 0 &&
- brw->draw.derived_params.is_indexed_draw)) {
- range = intel_buffer->Base.Size - offset - start;
- }
- }
- }
-
- buffer->offset = offset;
- buffer->size = start + range;
- buffer->stride = stride;
- buffer->step_rate = glbinding->InstanceDivisor;
-
- buffer->bo = brw_bufferobj_buffer(brw, intel_buffer, offset + start,
- range, false);
- brw_bo_reference(buffer->bo);
-
- j++;
- }
-
- /* If we need to upload all the arrays, then we can trim those arrays to
- * only the used elements [min_index, max_index] so long as we adjust all
- * the values used in the 3DPRIMITIVE i.e. by setting the vertex bias.
- */
- brw->vb.start_vertex_bias = 0;
- delta = min_index;
- if ((vs_inputs & _mesa_draw_vbo_array_bits(ctx)) == 0) {
- brw->vb.start_vertex_bias = -delta;
- delta = 0;
- }
-
- unsigned usermask = vs_inputs & _mesa_draw_user_array_bits(ctx);
- while (usermask) {
- const struct gl_vertex_buffer_binding *const glbinding =
- _mesa_draw_buffer_binding(vao, ffs(usermask) - 1);
- const GLsizei stride = glbinding->Stride;
-
- assert(!glbinding->BufferObj);
- assert(brw->vb.index_bounds_valid);
-
- /* Accumulate the range of a single vertex, start with inverted range */
- uint32_t vertex_range_start = ~(uint32_t)0;
- uint32_t vertex_range_end = 0;
-
- const unsigned boundmask = _mesa_draw_bound_attrib_bits(glbinding);
- unsigned attrmask = usermask & boundmask;
- /* Mark the those attributes as processed */
- usermask ^= attrmask;
- /* We can assume that we have an array for the binding */
- assert(attrmask);
- /* Walk attributes belonging to the binding */
- while (attrmask) {
- const gl_vert_attrib attr = u_bit_scan(&attrmask);
- const struct gl_array_attributes *const glattrib =
- _mesa_draw_array_attrib(vao, attr);
- const uint32_t rel_offset =
- _mesa_draw_attributes_relative_offset(glattrib);
- const uint32_t rel_end = rel_offset + glattrib->Format._ElementSize;
-
- vertex_range_start = MIN2(vertex_range_start, rel_offset);
- vertex_range_end = MAX2(vertex_range_end, rel_end);
-
- struct brw_vertex_element *input = &brw->vb.inputs[attr];
- input->glformat = &glattrib->Format;
- input->buffer = j;
- input->is_dual_slot = (vp->DualSlotInputs & BITFIELD64_BIT(attr)) != 0;
- input->offset = rel_offset;
- }
- assert(vertex_range_start <= vertex_range_end);
-
- struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];
-
- const uint8_t *ptr = (const uint8_t*)_mesa_draw_binding_offset(glbinding);
- ptr += vertex_range_start;
- const uint32_t vertex_size = vertex_range_end - vertex_range_start;
- if (glbinding->Stride == 0) {
- /* If the source stride is zero, we just want to upload the current
- * attribute once and set the buffer's stride to 0. There's no need
- * to replicate it out.
- */
- copy_array_to_vbo_array(brw, ptr, 0, 0, 0, buffer, vertex_size);
- } else if (glbinding->InstanceDivisor == 0) {
- copy_array_to_vbo_array(brw, ptr, stride, min_index,
- max_index, buffer, vertex_size);
- } else {
- /* This is an instanced attribute, since its InstanceDivisor
- * is not zero. Therefore, its data will be stepped after the
- * instanced draw has been run InstanceDivisor times.
- */
- uint32_t instanced_attr_max_index =
- (brw->num_instances - 1) / glbinding->InstanceDivisor;
- copy_array_to_vbo_array(brw, ptr, stride, 0,
- instanced_attr_max_index, buffer, vertex_size);
- }
- buffer->offset -= delta * buffer->stride + vertex_range_start;
- buffer->size += delta * buffer->stride + vertex_range_start;
- buffer->step_rate = glbinding->InstanceDivisor;
-
- j++;
- }
-
- /* Upload the current values */
- unsigned curmask = vs_inputs & _mesa_draw_current_bits(ctx);
- if (curmask) {
- /* For each attribute, upload the maximum possible size. */
- uint8_t data[VERT_ATTRIB_MAX * sizeof(GLdouble) * 4];
- uint8_t *cursor = data;
-
- do {
- const gl_vert_attrib attr = u_bit_scan(&curmask);
- const struct gl_array_attributes *const glattrib =
- _mesa_draw_current_attrib(ctx, attr);
- const unsigned size = glattrib->Format._ElementSize;
- const unsigned alignment = align(size, sizeof(GLdouble));
- memcpy(cursor, glattrib->Ptr, size);
- if (alignment != size)
- memset(cursor + size, 0, alignment - size);
-
- struct brw_vertex_element *input = &brw->vb.inputs[attr];
- input->glformat = &glattrib->Format;
- input->buffer = j;
- input->is_dual_slot = (vp->DualSlotInputs & BITFIELD64_BIT(attr)) != 0;
- input->offset = cursor - data;
-
- cursor += alignment;
- } while (curmask);
-
- struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];
- const unsigned size = cursor - data;
- brw_upload_data(&brw->upload, data, size, size,
- &buffer->bo, &buffer->offset);
- buffer->stride = 0;
- buffer->size = size;
- buffer->step_rate = 0;
-
- j++;
- }
- brw->vb.nr_buffers = j;
-}
-
-void
-brw_prepare_shader_draw_parameters(struct brw_context *brw)
-{
- const struct brw_vs_prog_data *vs_prog_data =
- brw_vs_prog_data(brw->vs.base.prog_data);
-
- /* For non-indirect draws, upload the shader draw parameters */
- if ((vs_prog_data->uses_firstvertex || vs_prog_data->uses_baseinstance) &&
- brw->draw.draw_params_bo == NULL) {
- brw_upload_data(&brw->upload,
- &brw->draw.params, sizeof(brw->draw.params), 4,
- &brw->draw.draw_params_bo,
- &brw->draw.draw_params_offset);
- }
-
- if (vs_prog_data->uses_drawid || vs_prog_data->uses_is_indexed_draw) {
- brw_upload_data(&brw->upload,
- &brw->draw.derived_params, sizeof(brw->draw.derived_params), 4,
- &brw->draw.derived_draw_params_bo,
- &brw->draw.derived_draw_params_offset);
- }
-}
-
-static void
-brw_upload_indices(struct brw_context *brw)
-{
- const struct _mesa_index_buffer *index_buffer = brw->ib.ib;
- GLuint ib_size;
- struct brw_bo *old_bo = brw->ib.bo;
- struct gl_buffer_object *bufferobj;
- GLuint offset;
- GLuint ib_type_size;
-
- if (index_buffer == NULL)
- return;
-
- ib_type_size = 1 << index_buffer->index_size_shift;
- ib_size = index_buffer->count ? ib_type_size * index_buffer->count :
- index_buffer->obj->Size;
- bufferobj = index_buffer->obj;
-
- /* Turn into a proper VBO:
- */
- if (!bufferobj) {
- /* Get new bufferobj, offset:
- */
- brw_upload_data(&brw->upload, index_buffer->ptr, ib_size, ib_type_size,
- &brw->ib.bo, &offset);
- brw->ib.size = brw->ib.bo->size;
- } else {
- offset = (GLuint) (unsigned long) index_buffer->ptr;
-
- struct brw_bo *bo =
- brw_bufferobj_buffer(brw, brw_buffer_object(bufferobj),
- offset, ib_size, false);
- if (bo != brw->ib.bo) {
- brw_bo_unreference(brw->ib.bo);
- brw->ib.bo = bo;
- brw->ib.size = bufferobj->Size;
- brw_bo_reference(bo);
- }
- }
-
- /* Use 3DPRIMITIVE's start_vertex_offset to avoid re-uploading
- * the index buffer state when we're just moving the start index
- * of our drawing.
- */
- brw->ib.start_vertex_offset = offset / ib_type_size;
-
- if (brw->ib.bo != old_bo)
- brw->ctx.NewDriverState |= BRW_NEW_INDEX_BUFFER;
-
- unsigned index_size = 1 << index_buffer->index_size_shift;
- if (index_size != brw->ib.index_size) {
- brw->ib.index_size = index_size;
- brw->ctx.NewDriverState |= BRW_NEW_INDEX_BUFFER;
- }
-
- /* We need to re-emit an index buffer state each time
- * when cut index flag is changed
- */
- if (brw->prim_restart.enable_cut_index != brw->ib.enable_cut_index) {
- brw->ib.enable_cut_index = brw->prim_restart.enable_cut_index;
- brw->ctx.NewDriverState |= BRW_NEW_INDEX_BUFFER;
- }
-}
-
-const struct brw_tracked_state brw_indices = {
- .dirty = {
- .mesa = 0,
- .brw = BRW_NEW_BLORP |
- BRW_NEW_INDICES,
- },
- .emit = brw_upload_indices,
-};
+++ /dev/null
-/*
- * Copyright 2003 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "main/version.h"
-
-#include "brw_context.h"
-#include "brw_defines.h"
-#include "brw_batch.h"
-
-/**
- * Initializes potential list of extensions if ctx == NULL, or actually enables
- * extensions for a context.
- */
-void
-brw_init_extensions(struct gl_context *ctx)
-{
- struct brw_context *brw = brw_context(ctx);
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- assert(devinfo->ver >= 4);
-
- ctx->Extensions.ARB_arrays_of_arrays = true;
- ctx->Extensions.ARB_buffer_storage = true;
- ctx->Extensions.ARB_clear_texture = true;
- ctx->Extensions.ARB_clip_control = true;
- ctx->Extensions.ARB_copy_image = true;
- ctx->Extensions.ARB_depth_buffer_float = true;
- ctx->Extensions.ARB_depth_clamp = true;
- ctx->Extensions.ARB_depth_texture = true;
- ctx->Extensions.ARB_draw_elements_base_vertex = true;
- ctx->Extensions.ARB_draw_instanced = true;
- ctx->Extensions.ARB_ES2_compatibility = true;
- ctx->Extensions.ARB_explicit_attrib_location = true;
- ctx->Extensions.ARB_explicit_uniform_location = true;
- ctx->Extensions.ARB_fragment_coord_conventions = true;
- ctx->Extensions.ARB_fragment_program = true;
- ctx->Extensions.ARB_fragment_program_shadow = true;
- ctx->Extensions.ARB_fragment_shader = true;
- ctx->Extensions.ARB_framebuffer_object = true;
- ctx->Extensions.ARB_half_float_vertex = true;
- ctx->Extensions.ARB_instanced_arrays = true;
- ctx->Extensions.ARB_internalformat_query = true;
- ctx->Extensions.ARB_internalformat_query2 = true;
- ctx->Extensions.ARB_map_buffer_range = true;
- ctx->Extensions.ARB_occlusion_query = true;
- ctx->Extensions.ARB_occlusion_query2 = true;
- ctx->Extensions.ARB_point_sprite = true;
- ctx->Extensions.ARB_polygon_offset_clamp = true;
- ctx->Extensions.ARB_seamless_cube_map = true;
- ctx->Extensions.ARB_shader_bit_encoding = true;
- ctx->Extensions.ARB_shader_draw_parameters = true;
- ctx->Extensions.ARB_shader_group_vote = true;
- ctx->Extensions.ARB_shader_texture_lod = true;
- ctx->Extensions.ARB_shading_language_packing = true;
- ctx->Extensions.ARB_shadow = true;
- ctx->Extensions.ARB_sync = true;
- ctx->Extensions.ARB_texture_border_clamp = true;
- ctx->Extensions.ARB_texture_compression_rgtc = true;
- ctx->Extensions.ARB_texture_cube_map = true;
- ctx->Extensions.ARB_texture_env_combine = true;
- ctx->Extensions.ARB_texture_env_crossbar = true;
- ctx->Extensions.ARB_texture_env_dot3 = true;
- ctx->Extensions.ARB_texture_filter_anisotropic = true;
- ctx->Extensions.ARB_texture_float = true;
- ctx->Extensions.ARB_texture_mirror_clamp_to_edge = true;
- ctx->Extensions.ARB_texture_non_power_of_two = true;
- ctx->Extensions.ARB_texture_rg = true;
- ctx->Extensions.ARB_texture_rgb10_a2ui = true;
- ctx->Extensions.ARB_vertex_program = true;
- ctx->Extensions.ARB_vertex_shader = true;
- ctx->Extensions.ARB_vertex_type_2_10_10_10_rev = true;
- ctx->Extensions.ARB_vertex_type_10f_11f_11f_rev = true;
- ctx->Extensions.EXT_blend_color = true;
- ctx->Extensions.EXT_blend_equation_separate = true;
- ctx->Extensions.EXT_blend_func_separate = true;
- ctx->Extensions.EXT_blend_minmax = true;
- ctx->Extensions.EXT_color_buffer_half_float = true;
- ctx->Extensions.EXT_draw_buffers2 = true;
- ctx->Extensions.EXT_EGL_image_storage = true;
- ctx->Extensions.EXT_float_blend = true;
- ctx->Extensions.EXT_framebuffer_sRGB = true;
- ctx->Extensions.EXT_gpu_program_parameters = true;
- ctx->Extensions.EXT_packed_float = true;
- ctx->Extensions.EXT_pixel_buffer_object = true;
- ctx->Extensions.EXT_point_parameters = true;
- ctx->Extensions.EXT_provoking_vertex = true;
- ctx->Extensions.EXT_render_snorm = true;
- ctx->Extensions.EXT_sRGB = true;
- ctx->Extensions.EXT_stencil_two_side = true;
- ctx->Extensions.EXT_texture_array = true;
- ctx->Extensions.EXT_texture_env_dot3 = true;
- ctx->Extensions.EXT_texture_filter_anisotropic = true;
- ctx->Extensions.EXT_texture_integer = true;
- ctx->Extensions.EXT_texture_norm16 = true;
- ctx->Extensions.EXT_texture_shared_exponent = true;
- ctx->Extensions.EXT_texture_snorm = true;
- ctx->Extensions.EXT_texture_sRGB = true;
- ctx->Extensions.EXT_texture_sRGB_decode = true;
- ctx->Extensions.EXT_texture_sRGB_R8 = true;
- ctx->Extensions.EXT_texture_swizzle = true;
- ctx->Extensions.EXT_texture_type_2_10_10_10_REV = true;
- ctx->Extensions.EXT_vertex_array_bgra = true;
- ctx->Extensions.KHR_robustness = true;
- ctx->Extensions.AMD_seamless_cubemap_per_texture = true;
- ctx->Extensions.APPLE_object_purgeable = true;
- ctx->Extensions.ATI_texture_env_combine3 = true;
- ctx->Extensions.MESA_framebuffer_flip_y = true;
- ctx->Extensions.NV_conditional_render = true;
- ctx->Extensions.NV_fog_distance = true;
- ctx->Extensions.NV_primitive_restart = true;
- ctx->Extensions.NV_texture_barrier = true;
- ctx->Extensions.NV_texture_env_combine4 = true;
- ctx->Extensions.NV_texture_rectangle = true;
- ctx->Extensions.TDFX_texture_compression_FXT1 = true;
- ctx->Extensions.OES_compressed_ETC1_RGB8_texture = true;
- ctx->Extensions.OES_draw_texture = true;
- ctx->Extensions.OES_EGL_image = true;
- ctx->Extensions.OES_EGL_image_external = true;
- ctx->Extensions.OES_standard_derivatives = true;
- ctx->Extensions.OES_texture_float = true;
- ctx->Extensions.OES_texture_float_linear = true;
- ctx->Extensions.OES_texture_half_float = true;
- ctx->Extensions.OES_texture_half_float_linear = true;
-
- if (devinfo->ver >= 8)
- ctx->Const.GLSLVersion = 460;
- else if (devinfo->platform == INTEL_PLATFORM_HSW &&
- can_do_pipelined_register_writes(brw->screen))
- ctx->Const.GLSLVersion = 450;
- else if (devinfo->ver >= 7 && can_do_pipelined_register_writes(brw->screen))
- ctx->Const.GLSLVersion = 420;
- else if (devinfo->ver >= 6)
- ctx->Const.GLSLVersion = 330;
- else
- ctx->Const.GLSLVersion = 120;
-
- if (devinfo->ver >= 6)
- ctx->Const.GLSLVersionCompat = 130;
- else
- ctx->Const.GLSLVersionCompat = 120;
-
- _mesa_override_glsl_version(&ctx->Const);
-
- ctx->Extensions.EXT_shader_integer_mix = ctx->Const.GLSLVersion >= 130;
- ctx->Extensions.MESA_shader_integer_functions = ctx->Const.GLSLVersion >= 130;
-
- if (devinfo->verx10 >= 45) {
- ctx->Extensions.EXT_shader_framebuffer_fetch_non_coherent = true;
- ctx->Extensions.KHR_blend_equation_advanced = true;
- }
-
- if (devinfo->ver >= 5) {
- ctx->Extensions.ARB_texture_query_levels = ctx->Const.GLSLVersion >= 130;
- ctx->Extensions.ARB_texture_query_lod = true;
- ctx->Extensions.EXT_timer_query = true;
- }
-
- if (devinfo->ver == 6)
- ctx->Extensions.ARB_transform_feedback2 = true;
-
- if (devinfo->ver >= 6) {
- ctx->Extensions.ARB_blend_func_extended =
- !driQueryOptionb(&brw->screen->optionCache, "disable_blend_func_extended");
- ctx->Extensions.ARB_conditional_render_inverted = true;
- ctx->Extensions.ARB_cull_distance = true;
- ctx->Extensions.ARB_draw_buffers_blend = true;
- if (ctx->API != API_OPENGL_COMPAT ||
- ctx->Const.AllowHigherCompatVersion)
- ctx->Extensions.ARB_enhanced_layouts = true;
- ctx->Extensions.ARB_ES3_compatibility = true;
- ctx->Extensions.ARB_fragment_layer_viewport = true;
- ctx->Extensions.ARB_pipeline_statistics_query = true;
- ctx->Extensions.ARB_sample_shading = true;
- ctx->Extensions.ARB_shading_language_420pack = true;
- if (ctx->API != API_OPENGL_COMPAT ||
- ctx->Const.AllowHigherCompatVersion) {
- ctx->Extensions.ARB_texture_buffer_object = true;
- ctx->Extensions.ARB_texture_buffer_object_rgb32 = true;
- ctx->Extensions.ARB_texture_buffer_range = true;
- }
- ctx->Extensions.ARB_texture_cube_map_array = true;
- ctx->Extensions.ARB_texture_gather = true;
- ctx->Extensions.ARB_texture_multisample = true;
- ctx->Extensions.ARB_uniform_buffer_object = true;
- ctx->Extensions.EXT_gpu_shader4 = true;
- ctx->Extensions.EXT_texture_shadow_lod = true;
-
- if (ctx->API != API_OPENGL_COMPAT ||
- ctx->Const.AllowHigherCompatVersion)
- ctx->Extensions.AMD_vertex_shader_layer = true;
- ctx->Extensions.EXT_framebuffer_multisample = true;
- ctx->Extensions.EXT_framebuffer_multisample_blit_scaled = true;
- ctx->Extensions.EXT_transform_feedback = true;
- ctx->Extensions.ARB_transform_feedback_overflow_query = true;
- ctx->Extensions.OES_depth_texture_cube_map = true;
- ctx->Extensions.OES_sample_variables = true;
-
- ctx->Extensions.ARB_timer_query = brw->screen->hw_has_timestamp;
- ctx->Extensions.EXT_disjoint_timer_query =
- ctx->Extensions.ARB_timer_query;
-
- /* Only enable this in core profile because geometry shaders are
- * required, and Mesa only supports geometry shaders in OpenGL 3.2 and
- * later. In this driver, that currently means Core profile.
- */
- if (ctx->API == API_OPENGL_CORE ||
- ctx->Const.AllowHigherCompatVersion) {
- ctx->Extensions.ARB_shader_viewport_layer_array = true;
- ctx->Extensions.ARB_viewport_array = true;
- ctx->Extensions.AMD_vertex_shader_viewport_index = true;
- }
- }
-
- brw->predicate.supported = false;
-
- if (devinfo->ver >= 7) {
- ctx->Extensions.ARB_conservative_depth = true;
- ctx->Extensions.ARB_derivative_control = true;
- ctx->Extensions.ARB_framebuffer_no_attachments = true;
- if (ctx->API != API_OPENGL_COMPAT ||
- ctx->Const.AllowHigherCompatVersion) {
- ctx->Extensions.ARB_gpu_shader5 = true;
- ctx->Extensions.ARB_gpu_shader_fp64 = true;
- }
- ctx->Extensions.ARB_shader_atomic_counters = true;
- ctx->Extensions.ARB_shader_atomic_counter_ops = true;
- ctx->Extensions.ARB_shader_clock = true;
- ctx->Extensions.ARB_shader_image_load_store = true;
- ctx->Extensions.ARB_shader_image_size = true;
- ctx->Extensions.ARB_shader_precision = true;
- ctx->Extensions.ARB_shader_texture_image_samples = true;
- if (ctx->API != API_OPENGL_COMPAT ||
- ctx->Const.AllowHigherCompatVersion)
- ctx->Extensions.ARB_tessellation_shader = true;
- ctx->Extensions.ARB_texture_compression_bptc = true;
- ctx->Extensions.ARB_texture_view = true;
- ctx->Extensions.ARB_shader_storage_buffer_object = true;
- ctx->Extensions.ARB_vertex_attrib_64bit = true;
- ctx->Extensions.EXT_shader_samples_identical = true;
- ctx->Extensions.OES_primitive_bounding_box = true;
- ctx->Extensions.OES_texture_buffer = true;
-
- if (can_do_pipelined_register_writes(brw->screen)) {
- ctx->Extensions.ARB_draw_indirect = true;
- ctx->Extensions.ARB_transform_feedback2 = true;
- ctx->Extensions.ARB_transform_feedback3 = true;
- ctx->Extensions.ARB_transform_feedback_instanced = true;
-
- if (can_do_compute_dispatch(brw->screen) &&
- ctx->Const.MaxComputeWorkGroupSize[0] >= 1024) {
- ctx->Extensions.ARB_compute_shader = true;
- ctx->Extensions.ARB_ES3_1_compatibility =
- devinfo->verx10 >= 75;
- ctx->Extensions.NV_compute_shader_derivatives = true;
- ctx->Extensions.ARB_compute_variable_group_size = true;
- }
-
- if (can_do_predicate_writes(brw->screen)) {
- brw->predicate.supported = true;
- ctx->Extensions.ARB_indirect_parameters = true;
- }
- }
-
- ctx->Extensions.ARB_gl_spirv = true;
- ctx->Extensions.ARB_spirv_extensions = true;
- }
-
- if (devinfo->verx10 >= 75) {
- ctx->Extensions.ARB_stencil_texturing = true;
- ctx->Extensions.ARB_texture_stencil8 = true;
- ctx->Extensions.OES_geometry_shader = true;
- ctx->Extensions.OES_texture_cube_map_array = true;
- ctx->Extensions.OES_viewport_array = true;
- }
-
- if (devinfo->verx10 >= 75 || devinfo->platform == INTEL_PLATFORM_BYT) {
- ctx->Extensions.ARB_robust_buffer_access_behavior = true;
- }
-
- if (can_do_mi_math_and_lrr(brw->screen)) {
- ctx->Extensions.ARB_query_buffer_object = true;
- }
-
- if (devinfo->ver >= 8 || devinfo->platform == INTEL_PLATFORM_BYT) {
- /* For now, we can't enable OES_texture_view on Gen 7 because of
- * some piglit failures coming from
- * piglit/tests/spec/arb_texture_view/rendering-formats.c that need
- * investigation.
- */
- ctx->Extensions.OES_texture_view = true;
- }
-
- if (devinfo->ver >= 7) {
- /* We can safely enable OES_copy_image on Gen 7, since we emulate
- * the ETC2 support using the shadow_miptree to store the
- * compressed data.
- */
- ctx->Extensions.OES_copy_image = true;
- }
-
- /* Gen < 6 still uses the blitter. It's somewhat annoying to add support
- * for blackhole there... Does anybody actually care anymore anyway?
- */
- if (devinfo->ver >= 6)
- ctx->Extensions.INTEL_blackhole_render = true;
-
- if (devinfo->ver >= 8) {
- ctx->Extensions.ARB_gpu_shader_int64 = true;
- /* requires ARB_gpu_shader_int64 */
- ctx->Extensions.ARB_shader_ballot = true;
- ctx->Extensions.ARB_ES3_2_compatibility = true;
-
- /* Currently only implemented in the scalar backend, so only enable for
- * Gfx8+. Eventually Gfx6+ could be supported.
- */
- ctx->Extensions.INTEL_shader_integer_functions2 = true;
- }
-
- if (devinfo->ver >= 9) {
- ctx->Extensions.ANDROID_extension_pack_es31a = true;
- ctx->Extensions.AMD_depth_clamp_separate = true;
- ctx->Extensions.ARB_post_depth_coverage = true;
- ctx->Extensions.ARB_shader_stencil_export = true;
- ctx->Extensions.EXT_shader_framebuffer_fetch = true;
- ctx->Extensions.INTEL_conservative_rasterization = true;
- ctx->Extensions.INTEL_shader_atomic_float_minmax = true;
- ctx->Extensions.KHR_blend_equation_advanced_coherent = true;
- ctx->Extensions.KHR_texture_compression_astc_ldr = true;
- ctx->Extensions.KHR_texture_compression_astc_sliced_3d = true;
-
- /*
- * From the Skylake PRM Vol. 7 (Memory Fence Message, page 221):
- * "A memory fence message issued by a thread causes further messages
- * issued by the thread to be blocked until all previous data port
- * messages have completed, or the results can be globally observed from
- * the point of view of other threads in the system."
- *
- * From the Haswell PRM Vol. 7 (Memory Fence, page 256):
- * "A memory fence message issued by a thread causes further messages
- * issued by the thread to be blocked until all previous messages issued
- * by the thread to that data port (data cache or render cache) have
- * been globally observed from the point of view of other threads in the
- * system."
- *
- * Summarized: For ARB_fragment_shader_interlock to work, we need to
- * ensure memory access ordering for all messages to the dataport from
- * all threads. Memory fence messages prior to SKL only provide memory
- * access ordering for messages from the same thread, so we can only
- * support the feature from Gfx9 onwards.
- *
- */
-
- ctx->Extensions.ARB_fragment_shader_interlock = true;
- }
-
- if (intel_device_info_is_9lp(devinfo))
- ctx->Extensions.KHR_texture_compression_astc_hdr = true;
-
- if (devinfo->ver >= 6)
- ctx->Extensions.INTEL_performance_query = true;
-
- if (ctx->API != API_OPENGL_COMPAT ||
- ctx->Const.AllowHigherCompatVersion)
- ctx->Extensions.ARB_base_instance = true;
- if (ctx->API != API_OPENGL_CORE)
- ctx->Extensions.ARB_color_buffer_float = true;
-
- ctx->Extensions.EXT_texture_compression_s3tc = true;
- ctx->Extensions.EXT_texture_compression_s3tc_srgb = true;
- ctx->Extensions.ANGLE_texture_compression_dxt = true;
-
- ctx->Extensions.EXT_demote_to_helper_invocation = true;
-
- ctx->Const.PrimitiveRestartFixedIndex = true;
-
- if (devinfo->ver >= 7) {
- ctx->Extensions.EXT_memory_object_fd = true;
- ctx->Extensions.EXT_memory_object = true;
- ctx->Extensions.EXT_semaphore = true;
- ctx->Extensions.EXT_semaphore_fd = true;
- }
-}
+++ /dev/null
-/*
- * Copyright 2006 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "main/enums.h"
-#include "main/macros.h"
-#include "main/mtypes.h"
-#include "main/fbobject.h"
-#include "main/framebuffer.h"
-#include "main/renderbuffer.h"
-#include "main/context.h"
-#include "main/teximage.h"
-#include "main/image.h"
-#include "main/condrender.h"
-#include "util/hash_table.h"
-#include "util/set.h"
-#include "util/u_memory.h"
-
-#include "swrast/swrast.h"
-#include "drivers/common/meta.h"
-
-#include "brw_batch.h"
-#include "brw_buffers.h"
-#include "brw_blit.h"
-#include "brw_fbo.h"
-#include "brw_mipmap_tree.h"
-#include "brw_image.h"
-#include "brw_screen.h"
-#include "brw_tex.h"
-#include "brw_context.h"
-#include "brw_defines.h"
-
-#define FILE_DEBUG_FLAG DEBUG_FBO
-
-/** Called by gl_renderbuffer::Delete() */
-static void
-brw_delete_renderbuffer(struct gl_context *ctx, struct gl_renderbuffer *rb)
-{
- struct brw_renderbuffer *irb = brw_renderbuffer(rb);
-
- assert(irb);
-
- brw_miptree_release(&irb->mt);
- brw_miptree_release(&irb->singlesample_mt);
-
- _mesa_delete_renderbuffer(ctx, rb);
-}
-
-/**
- * \brief Downsample a winsys renderbuffer from mt to singlesample_mt.
- *
- * If the miptree needs no downsample, then skip.
- */
-void
-brw_renderbuffer_downsample(struct brw_context *brw,
- struct brw_renderbuffer *irb)
-{
- if (!irb->need_downsample)
- return;
- brw_miptree_updownsample(brw, irb->mt, irb->singlesample_mt);
- irb->need_downsample = false;
-}
-
-/**
- * \brief Upsample a winsys renderbuffer from singlesample_mt to mt.
- *
- * The upsample is done unconditionally.
- */
-void
-brw_renderbuffer_upsample(struct brw_context *brw,
- struct brw_renderbuffer *irb)
-{
- assert(!irb->need_downsample);
-
- brw_miptree_updownsample(brw, irb->singlesample_mt, irb->mt);
-}
-
-/**
- * \see dd_function_table::MapRenderbuffer
- */
-static void
-brw_map_renderbuffer(struct gl_context *ctx,
- struct gl_renderbuffer *rb,
- GLuint x, GLuint y, GLuint w, GLuint h,
- GLbitfield mode,
- GLubyte **out_map,
- GLint *out_stride,
- bool flip_y)
-{
- struct brw_context *brw = brw_context(ctx);
- struct swrast_renderbuffer *srb = (struct swrast_renderbuffer *)rb;
- struct brw_renderbuffer *irb = brw_renderbuffer(rb);
- struct brw_mipmap_tree *mt;
- void *map;
- ptrdiff_t stride;
-
- if (srb->Buffer) {
- /* this is a malloc'd renderbuffer (accum buffer), not an irb */
- GLint bpp = _mesa_get_format_bytes(rb->Format);
- GLint rowStride = srb->RowStride;
- *out_map = (GLubyte *) srb->Buffer + y * rowStride + x * bpp;
- *out_stride = rowStride;
- return;
- }
-
- brw_prepare_render(brw);
-
- /* The MapRenderbuffer API should always return a single-sampled mapping.
- * The case we are asked to map multisampled RBs is in glReadPixels() (or
- * swrast paths like glCopyTexImage()) from a window-system MSAA buffer,
- * and GL expects an automatic resolve to happen.
- *
- * If it's a color miptree, there is a ->singlesample_mt which wraps the
- * actual window system renderbuffer (which we may resolve to at any time),
- * while the miptree itself is our driver-private allocation. If it's a
- * depth or stencil miptree, we have a private MSAA buffer and no shared
- * singlesample buffer, and since we don't expect anybody to ever actually
- * resolve it, we just make a temporary singlesample buffer now when we
- * have to.
- */
- if (rb->NumSamples > 1) {
- if (!irb->singlesample_mt) {
- irb->singlesample_mt =
- brw_miptree_create_for_renderbuffer(brw, irb->mt->format,
- rb->Width, rb->Height,
- 1 /*num_samples*/);
- if (!irb->singlesample_mt)
- goto fail;
- irb->singlesample_mt_is_tmp = true;
- irb->need_downsample = true;
- }
-
- brw_renderbuffer_downsample(brw, irb);
- mt = irb->singlesample_mt;
-
- irb->need_map_upsample = mode & GL_MAP_WRITE_BIT;
- } else {
- mt = irb->mt;
- }
-
- /* For a window-system renderbuffer, we need to flip the mapping we receive
- * upside-down. So we need to ask for a rectangle on flipped vertically, and
- * we then return a pointer to the bottom of it with a negative stride.
- */
- if (flip_y) {
- y = rb->Height - y - h;
- }
-
- brw_miptree_map(brw, mt, irb->mt_level, irb->mt_layer,
- x, y, w, h, mode, &map, &stride);
-
- if (flip_y) {
- map += (h - 1) * stride;
- stride = -stride;
- }
-
- DBG("%s: rb %d (%s) mt mapped: (%d, %d) (%dx%d) -> %p/%"PRIdPTR"\n",
- __func__, rb->Name, _mesa_get_format_name(rb->Format),
- x, y, w, h, map, stride);
-
- *out_map = map;
- *out_stride = stride;
- return;
-
-fail:
- *out_map = NULL;
- *out_stride = 0;
-}
-
-/**
- * \see dd_function_table::UnmapRenderbuffer
- */
-static void
-brw_unmap_renderbuffer(struct gl_context *ctx, struct gl_renderbuffer *rb)
-{
- struct brw_context *brw = brw_context(ctx);
- struct swrast_renderbuffer *srb = (struct swrast_renderbuffer *)rb;
- struct brw_renderbuffer *irb = brw_renderbuffer(rb);
- struct brw_mipmap_tree *mt;
-
- DBG("%s: rb %d (%s)\n", __func__,
- rb->Name, _mesa_get_format_name(rb->Format));
-
- if (srb->Buffer) {
- /* this is a malloc'd renderbuffer (accum buffer) */
- /* nothing to do */
- return;
- }
-
- if (rb->NumSamples > 1) {
- mt = irb->singlesample_mt;
- } else {
- mt = irb->mt;
- }
-
- brw_miptree_unmap(brw, mt, irb->mt_level, irb->mt_layer);
-
- if (irb->need_map_upsample) {
- brw_renderbuffer_upsample(brw, irb);
- irb->need_map_upsample = false;
- }
-
- if (irb->singlesample_mt_is_tmp)
- brw_miptree_release(&irb->singlesample_mt);
-}
-
-
-/**
- * Round up the requested multisample count to the next supported sample size.
- */
-unsigned
-brw_quantize_num_samples(struct brw_screen *intel, unsigned num_samples)
-{
- const int *msaa_modes = brw_supported_msaa_modes(intel);
- int quantized_samples = 0;
-
- for (int i = 0; msaa_modes[i] != -1; ++i) {
- if (msaa_modes[i] >= num_samples)
- quantized_samples = msaa_modes[i];
- else
- break;
- }
-
- return quantized_samples;
-}
-
-static mesa_format
-brw_renderbuffer_format(struct gl_context * ctx, GLenum internalFormat)
-{
- struct brw_context *brw = brw_context(ctx);
- ASSERTED const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- switch (internalFormat) {
- default:
- /* Use the same format-choice logic as for textures.
- * Renderbuffers aren't any different from textures for us,
- * except they're less useful because you can't texture with
- * them.
- */
- return ctx->Driver.ChooseTextureFormat(ctx, GL_TEXTURE_2D,
- internalFormat,
- GL_NONE, GL_NONE);
- break;
- case GL_STENCIL_INDEX:
- case GL_STENCIL_INDEX1_EXT:
- case GL_STENCIL_INDEX4_EXT:
- case GL_STENCIL_INDEX8_EXT:
- case GL_STENCIL_INDEX16_EXT:
- /* These aren't actual texture formats, so force them here. */
- if (brw->has_separate_stencil) {
- return MESA_FORMAT_S_UINT8;
- } else {
- assert(!devinfo->must_use_separate_stencil);
- return MESA_FORMAT_Z24_UNORM_S8_UINT;
- }
- }
-}
-
-static GLboolean
-brw_alloc_private_renderbuffer_storage(struct gl_context *ctx,
- struct gl_renderbuffer *rb,
- GLenum internalFormat,
- GLuint width, GLuint height)
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_screen *screen = brw->screen;
- struct brw_renderbuffer *irb = brw_renderbuffer(rb);
-
- assert(rb->Format != MESA_FORMAT_NONE);
-
- rb->NumSamples = brw_quantize_num_samples(screen, rb->NumSamples);
- rb->NumStorageSamples = rb->NumSamples;
- rb->Width = width;
- rb->Height = height;
- rb->_BaseFormat = _mesa_get_format_base_format(rb->Format);
-
- brw_miptree_release(&irb->mt);
-
- DBG("%s: %s: %s (%dx%d)\n", __func__,
- _mesa_enum_to_string(internalFormat),
- _mesa_get_format_name(rb->Format), width, height);
-
- if (width == 0 || height == 0)
- return true;
-
- irb->mt = brw_miptree_create_for_renderbuffer(brw, rb->Format,
- width, height,
- MAX2(rb->NumSamples, 1));
- if (!irb->mt)
- return false;
-
- irb->layer_count = 1;
-
- return true;
-}
-
-/**
- * Called via glRenderbufferStorageEXT() to set the format and allocate
- * storage for a user-created renderbuffer.
- */
-static GLboolean
-brw_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer *rb,
- GLenum internalFormat,
- GLuint width, GLuint height)
-{
- rb->Format = brw_renderbuffer_format(ctx, internalFormat);
- return brw_alloc_private_renderbuffer_storage(ctx, rb, internalFormat, width, height);
-}
-
-static mesa_format
-fallback_rgbx_to_rgba(struct brw_screen *screen, struct gl_renderbuffer *rb,
- mesa_format original_format)
-{
- mesa_format format = original_format;
-
- /* The base format and internal format must be derived from the user-visible
- * format (that is, the gl_config's format), even if we internally use
- * choose a different format for the renderbuffer. Otherwise, rendering may
- * use incorrect channel write masks.
- */
- rb->_BaseFormat = _mesa_get_format_base_format(original_format);
- rb->InternalFormat = rb->_BaseFormat;
-
- if (!screen->mesa_format_supports_render[original_format]) {
- /* The glRenderbufferStorage paths in core Mesa detect if the driver
- * does not support the user-requested format, and then searches for
- * a fallback format. The DRI code bypasses core Mesa, though. So we do
- * the fallbacks here.
- *
- * We must support MESA_FORMAT_R8G8B8X8 on Android because the Android
- * framework requires HAL_PIXEL_FORMAT_RGBX8888 winsys surfaces.
- */
- format = _mesa_format_fallback_rgbx_to_rgba(original_format);
- assert(screen->mesa_format_supports_render[format]);
- }
- return format;
-}
-
-static void
-brw_image_target_renderbuffer_storage(struct gl_context *ctx,
- struct gl_renderbuffer *rb,
- void *image_handle)
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_renderbuffer *irb;
- __DRIscreen *dri_screen = brw->screen->driScrnPriv;
- __DRIimage *image;
-
- image = dri_screen->dri2.image->lookupEGLImage(dri_screen, image_handle,
- dri_screen->loaderPrivate);
- if (image == NULL)
- return;
-
- if (image->planar_format && image->planar_format->nplanes > 1) {
- _mesa_error(ctx, GL_INVALID_OPERATION,
- "glEGLImageTargetRenderbufferStorage(planar buffers are not "
- "supported as render targets.)");
- return;
- }
-
- rb->Format = fallback_rgbx_to_rgba(brw->screen, rb, image->format);
-
- mesa_format chosen_format = rb->Format == image->format ?
- image->format : rb->Format;
-
- /* __DRIimage is opaque to the core so it has to be checked here */
- if (!brw->mesa_format_supports_render[chosen_format]) {
- _mesa_error(ctx, GL_INVALID_OPERATION,
- "glEGLImageTargetRenderbufferStorage(unsupported image format)");
- return;
- }
-
- irb = brw_renderbuffer(rb);
- brw_miptree_release(&irb->mt);
-
- /* Disable creation of the miptree's aux buffers because the driver exposes
- * no EGL API to manage them. That is, there is no API for resolving the aux
- * buffer's content to the main buffer nor for invalidating the aux buffer's
- * content.
- */
- irb->mt = brw_miptree_create_for_dri_image(brw, image, GL_TEXTURE_2D,
- rb->Format, false);
- if (!irb->mt)
- return;
-
- rb->Width = image->width;
- rb->Height = image->height;
- rb->NeedsFinishRenderTexture = true;
- irb->layer_count = 1;
-}
-
-/**
- * Called by _mesa_resize_framebuffer() for each hardware renderbuffer when a
- * window system framebuffer is resized.
- *
- * Any actual buffer reallocations for hardware renderbuffers (which would
- * have triggered _mesa_resize_framebuffer()) were done by
- * brw_process_dri2_buffer().
- */
-static GLboolean
-brw_alloc_window_storage(struct gl_context *ctx, struct gl_renderbuffer *rb,
- GLenum internalFormat, GLuint width, GLuint height)
-{
- (void) ctx;
- assert(rb->Name == 0);
- rb->Width = width;
- rb->Height = height;
- rb->InternalFormat = internalFormat;
-
- return true;
-}
-
-/** Dummy function for gl_renderbuffer::AllocStorage() */
-static GLboolean
-brw_nop_alloc_storage(struct gl_context *ctx, struct gl_renderbuffer *rb,
- GLenum internalFormat, GLuint width, GLuint height)
-{
- (void) rb;
- (void) internalFormat;
- (void) width;
- (void) height;
- _mesa_problem(ctx, "brw_nop_alloc_storage should never be called.");
- return false;
-}
-
-/**
- * Create an brw_renderbuffer for a __DRIdrawable. This function is
- * unrelated to GL renderbuffers (that is, those created by
- * glGenRenderbuffers).
- *
- * \param num_samples must be quantized.
- */
-struct brw_renderbuffer *
-brw_create_winsys_renderbuffer(struct brw_screen *screen,
- mesa_format format, unsigned num_samples)
-{
- struct brw_renderbuffer *irb = CALLOC_STRUCT(brw_renderbuffer);
- if (!irb)
- return NULL;
-
- struct gl_renderbuffer *rb = &irb->Base.Base;
- irb->layer_count = 1;
-
- _mesa_init_renderbuffer(rb, 0);
- rb->ClassID = INTEL_RB_CLASS;
- rb->NumSamples = num_samples;
- rb->NumStorageSamples = num_samples;
-
- rb->Format = fallback_rgbx_to_rgba(screen, rb, format);
-
- /* intel-specific methods */
- rb->Delete = brw_delete_renderbuffer;
- rb->AllocStorage = brw_alloc_window_storage;
-
- return irb;
-}
-
-/**
- * Private window-system buffers (as opposed to ones shared with the display
- * server created with brw_create_winsys_renderbuffer()) are most similar in their
- * handling to user-created renderbuffers, but they have a resize handler that
- * may be called at brw_update_renderbuffers() time.
- *
- * \param num_samples must be quantized.
- */
-struct brw_renderbuffer *
-brw_create_private_renderbuffer(struct brw_screen *screen,
- mesa_format format, unsigned num_samples)
-{
- struct brw_renderbuffer *irb;
-
- irb = brw_create_winsys_renderbuffer(screen, format, num_samples);
- irb->Base.Base.AllocStorage = brw_alloc_private_renderbuffer_storage;
-
- return irb;
-}
-
-/**
- * Create a new renderbuffer object.
- * Typically called via glBindRenderbufferEXT().
- */
-static struct gl_renderbuffer *
-brw_new_renderbuffer(struct gl_context *ctx, GLuint name)
-{
- struct brw_renderbuffer *irb;
- struct gl_renderbuffer *rb;
-
- irb = CALLOC_STRUCT(brw_renderbuffer);
- if (!irb) {
- _mesa_error(ctx, GL_OUT_OF_MEMORY, "creating renderbuffer");
- return NULL;
- }
-
- rb = &irb->Base.Base;
-
- _mesa_init_renderbuffer(rb, name);
- rb->ClassID = INTEL_RB_CLASS;
-
- /* intel-specific methods */
- rb->Delete = brw_delete_renderbuffer;
- rb->AllocStorage = brw_alloc_renderbuffer_storage;
- /* span routines set in alloc_storage function */
-
- return rb;
-}
-
-static bool
-brw_renderbuffer_update_wrapper(struct brw_context *brw,
- struct brw_renderbuffer *irb,
- struct gl_texture_image *image,
- uint32_t layer,
- bool layered)
-{
- struct gl_renderbuffer *rb = &irb->Base.Base;
- struct brw_texture_image *intel_image = brw_texture_image(image);
- struct brw_mipmap_tree *mt = intel_image->mt;
- int level = image->Level;
-
- rb->AllocStorage = brw_nop_alloc_storage;
-
- /* adjust for texture view parameters */
- layer += image->TexObject->Attrib.MinLayer;
- level += image->TexObject->Attrib.MinLevel;
-
- brw_miptree_check_level_layer(mt, level, layer);
- irb->mt_level = level;
- irb->mt_layer = layer;
-
- if (!layered) {
- irb->layer_count = 1;
- } else if (mt->target != GL_TEXTURE_3D && image->TexObject->Attrib.NumLayers > 0) {
- irb->layer_count = image->TexObject->Attrib.NumLayers;
- } else {
- irb->layer_count = mt->surf.dim == ISL_SURF_DIM_3D ?
- minify(mt->surf.logical_level0_px.depth, level) :
- mt->surf.logical_level0_px.array_len;
- }
-
- brw_miptree_reference(&irb->mt, mt);
-
- brw_renderbuffer_set_draw_offset(irb);
-
- return true;
-}
-
-void
-brw_renderbuffer_set_draw_offset(struct brw_renderbuffer *irb)
-{
- unsigned int dst_x, dst_y;
-
- /* compute offset of the particular 2D image within the texture region */
- brw_miptree_get_image_offset(irb->mt, irb->mt_level, irb->mt_layer,
- &dst_x, &dst_y);
-
- irb->draw_x = dst_x;
- irb->draw_y = dst_y;
-}
-
-/**
- * Called by glFramebufferTexture[123]DEXT() (and other places) to
- * prepare for rendering into texture memory. This might be called
- * many times to choose different texture levels, cube faces, etc
- * before brw_finish_render_texture() is ever called.
- */
-static void
-brw_render_texture(struct gl_context * ctx,
- struct gl_framebuffer *fb,
- struct gl_renderbuffer_attachment *att)
-{
- struct brw_context *brw = brw_context(ctx);
- struct gl_renderbuffer *rb = att->Renderbuffer;
- struct brw_renderbuffer *irb = brw_renderbuffer(rb);
- struct gl_texture_image *image = rb->TexImage;
- struct brw_texture_image *intel_image = brw_texture_image(image);
- struct brw_mipmap_tree *mt = intel_image->mt;
- int layer;
-
- (void) fb;
-
- if (att->CubeMapFace > 0) {
- assert(att->Zoffset == 0);
- layer = att->CubeMapFace;
- } else {
- layer = att->Zoffset;
- }
-
- if (!intel_image->mt) {
- /* Fallback on drawing to a texture that doesn't have a miptree
- * (has a border, width/height 0, etc.)
- */
- _swrast_render_texture(ctx, fb, att);
- return;
- }
-
- brw_miptree_check_level_layer(mt, att->TextureLevel, layer);
-
- if (!brw_renderbuffer_update_wrapper(brw, irb, image, layer, att->Layered)) {
- _swrast_render_texture(ctx, fb, att);
- return;
- }
-
- DBG("Begin render %s texture tex=%u w=%d h=%d d=%d refcount=%d\n",
- _mesa_get_format_name(image->TexFormat),
- att->Texture->Name, image->Width, image->Height, image->Depth,
- rb->RefCount);
-}
-
-
-#define fbo_incomplete(fb, error_id, ...) do { \
- static GLuint msg_id = 0; \
- if (unlikely(ctx->Const.ContextFlags & GL_CONTEXT_FLAG_DEBUG_BIT)) { \
- _mesa_gl_debugf(ctx, &msg_id, \
- MESA_DEBUG_SOURCE_API, \
- MESA_DEBUG_TYPE_OTHER, \
- MESA_DEBUG_SEVERITY_MEDIUM, \
- __VA_ARGS__); \
- } \
- DBG(__VA_ARGS__); \
- fb->_Status = error_id; \
- } while (0)
-
-/**
- * Do additional "completeness" testing of a framebuffer object.
- */
-static void
-brw_validate_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb)
-{
- struct brw_context *brw = brw_context(ctx);
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- struct brw_renderbuffer *depthRb =
- brw_get_renderbuffer(fb, BUFFER_DEPTH);
- struct brw_renderbuffer *stencilRb =
- brw_get_renderbuffer(fb, BUFFER_STENCIL);
- struct brw_mipmap_tree *depth_mt = NULL, *stencil_mt = NULL;
- unsigned i;
-
- DBG("%s() on fb %p (%s)\n", __func__,
- fb, (fb == ctx->DrawBuffer ? "drawbuffer" :
- (fb == ctx->ReadBuffer ? "readbuffer" : "other buffer")));
-
- if (depthRb)
- depth_mt = depthRb->mt;
- if (stencilRb) {
- stencil_mt = stencilRb->mt;
- if (stencil_mt->stencil_mt)
- stencil_mt = stencil_mt->stencil_mt;
- }
-
- if (depth_mt && stencil_mt) {
- if (devinfo->ver >= 6) {
- const unsigned d_width = depth_mt->surf.phys_level0_sa.width;
- const unsigned d_height = depth_mt->surf.phys_level0_sa.height;
- const unsigned d_depth = depth_mt->surf.dim == ISL_SURF_DIM_3D ?
- depth_mt->surf.phys_level0_sa.depth :
- depth_mt->surf.phys_level0_sa.array_len;
-
- const unsigned s_width = stencil_mt->surf.phys_level0_sa.width;
- const unsigned s_height = stencil_mt->surf.phys_level0_sa.height;
- const unsigned s_depth = stencil_mt->surf.dim == ISL_SURF_DIM_3D ?
- stencil_mt->surf.phys_level0_sa.depth :
- stencil_mt->surf.phys_level0_sa.array_len;
-
- /* For gen >= 6, we are using the lod/minimum-array-element fields
- * and supporting layered rendering. This means that we must restrict
- * the depth & stencil attachments to match in various more retrictive
- * ways. (width, height, depth, LOD and layer)
- */
- if (d_width != s_width ||
- d_height != s_height ||
- d_depth != s_depth ||
- depthRb->mt_level != stencilRb->mt_level ||
- depthRb->mt_layer != stencilRb->mt_layer) {
- fbo_incomplete(fb, GL_FRAMEBUFFER_UNSUPPORTED,
- "FBO incomplete: depth and stencil must match in"
- "width, height, depth, LOD and layer\n");
- }
- }
- if (depth_mt == stencil_mt) {
- /* For true packed depth/stencil (not faked on prefers-separate-stencil
- * hardware) we need to be sure they're the same level/layer, since
- * we'll be emitting a single packet describing the packed setup.
- */
- if (depthRb->mt_level != stencilRb->mt_level ||
- depthRb->mt_layer != stencilRb->mt_layer) {
- fbo_incomplete(fb, GL_FRAMEBUFFER_UNSUPPORTED,
- "FBO incomplete: depth image level/layer %d/%d != "
- "stencil image %d/%d\n",
- depthRb->mt_level,
- depthRb->mt_layer,
- stencilRb->mt_level,
- stencilRb->mt_layer);
- }
- } else {
- if (!brw->has_separate_stencil) {
- fbo_incomplete(fb, GL_FRAMEBUFFER_UNSUPPORTED,
- "FBO incomplete: separate stencil unsupported\n");
- }
- if (stencil_mt->format != MESA_FORMAT_S_UINT8) {
- fbo_incomplete(fb, GL_FRAMEBUFFER_UNSUPPORTED,
- "FBO incomplete: separate stencil is %s "
- "instead of S8\n",
- _mesa_get_format_name(stencil_mt->format));
- }
- if (devinfo->ver < 7 && !brw_renderbuffer_has_hiz(depthRb)) {
- /* Before Gfx7, separate depth and stencil buffers can be used
- * only if HiZ is enabled. From the Sandybridge PRM, Volume 2,
- * Part 1, Bit 3DSTATE_DEPTH_BUFFER.SeparateStencilBufferEnable:
- * [DevSNB]: This field must be set to the same value (enabled
- * or disabled) as Hierarchical Depth Buffer Enable.
- */
- fbo_incomplete(fb, GL_FRAMEBUFFER_UNSUPPORTED,
- "FBO incomplete: separate stencil without HiZ\n");
- }
- }
- }
-
- for (i = 0; i < ARRAY_SIZE(fb->Attachment); i++) {
- struct gl_renderbuffer *rb;
- struct brw_renderbuffer *irb;
-
- if (fb->Attachment[i].Type == GL_NONE)
- continue;
-
- /* A supported attachment will have a Renderbuffer set either
- * from being a Renderbuffer or being a texture that got the
- * brw_wrap_texture() treatment.
- */
- rb = fb->Attachment[i].Renderbuffer;
- if (rb == NULL) {
- fbo_incomplete(fb, GL_FRAMEBUFFER_UNSUPPORTED,
- "FBO incomplete: attachment without "
- "renderbuffer\n");
- continue;
- }
-
- if (fb->Attachment[i].Type == GL_TEXTURE) {
- if (rb->TexImage->Border) {
- fbo_incomplete(fb, GL_FRAMEBUFFER_UNSUPPORTED,
- "FBO incomplete: texture with border\n");
- continue;
- }
- }
-
- irb = brw_renderbuffer(rb);
- if (irb == NULL) {
- fbo_incomplete(fb, GL_FRAMEBUFFER_UNSUPPORTED,
- "FBO incomplete: software rendering renderbuffer\n");
- continue;
- }
-
- if (rb->Format == MESA_FORMAT_R_SRGB8) {
- fbo_incomplete(fb, GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT,
- "FBO incomplete: Format not color renderable: %s\n",
- _mesa_get_format_name(rb->Format));
- continue;
- }
-
- if (!brw_render_target_supported(brw, rb)) {
- fbo_incomplete(fb, GL_FRAMEBUFFER_UNSUPPORTED,
- "FBO incomplete: Unsupported HW "
- "texture/renderbuffer format attached: %s\n",
- _mesa_get_format_name(brw_rb_format(irb)));
- }
- }
-}
-
-/**
- * Try to do a glBlitFramebuffer using glCopyTexSubImage2D
- * We can do this when the dst renderbuffer is actually a texture and
- * there is no scaling, mirroring or scissoring.
- *
- * \return new buffer mask indicating the buffers left to blit using the
- * normal path.
- */
-static GLbitfield
-brw_blit_framebuffer_with_blitter(struct gl_context *ctx,
- const struct gl_framebuffer *readFb,
- const struct gl_framebuffer *drawFb,
- GLint srcX0, GLint srcY0,
- GLint srcX1, GLint srcY1,
- GLint dstX0, GLint dstY0,
- GLint dstX1, GLint dstY1,
- GLbitfield mask)
-{
- struct brw_context *brw = brw_context(ctx);
-
- /* Sync up the state of window system buffers. We need to do this before
- * we go looking for the buffers.
- */
- brw_prepare_render(brw);
-
- if (mask & GL_COLOR_BUFFER_BIT) {
- unsigned i;
- struct gl_renderbuffer *src_rb = readFb->_ColorReadBuffer;
- struct brw_renderbuffer *src_irb = brw_renderbuffer(src_rb);
-
- if (!src_irb) {
- perf_debug("glBlitFramebuffer(): missing src renderbuffer. "
- "Falling back to software rendering.\n");
- return mask;
- }
-
- /* If the source and destination are the same size with no mirroring,
- * the rectangles are within the size of the texture and there is no
- * scissor, then we can probably use the blit engine.
- */
- if (!(srcX0 - srcX1 == dstX0 - dstX1 &&
- srcY0 - srcY1 == dstY0 - dstY1 &&
- srcX1 >= srcX0 &&
- srcY1 >= srcY0 &&
- srcX0 >= 0 && srcX1 <= readFb->Width &&
- srcY0 >= 0 && srcY1 <= readFb->Height &&
- dstX0 >= 0 && dstX1 <= drawFb->Width &&
- dstY0 >= 0 && dstY1 <= drawFb->Height &&
- !(ctx->Scissor.EnableFlags))) {
- perf_debug("glBlitFramebuffer(): non-1:1 blit. "
- "Falling back to software rendering.\n");
- return mask;
- }
-
- /* Blit to all active draw buffers. We don't do any pre-checking,
- * because we assume that copying to MRTs is rare, and failure midway
- * through copying is even more rare. Even if it was to occur, it's
- * safe to let meta start the copy over from scratch, because
- * glBlitFramebuffer completely overwrites the destination pixels, and
- * results are undefined if any destination pixels have a dependency on
- * source pixels.
- */
- for (i = 0; i < drawFb->_NumColorDrawBuffers; i++) {
- struct gl_renderbuffer *dst_rb = drawFb->_ColorDrawBuffers[i];
- struct brw_renderbuffer *dst_irb = brw_renderbuffer(dst_rb);
-
- if (!dst_irb) {
- perf_debug("glBlitFramebuffer(): missing dst renderbuffer. "
- "Falling back to software rendering.\n");
- return mask;
- }
-
- if (ctx->Color.sRGBEnabled &&
- _mesa_is_format_srgb(src_irb->mt->format) !=
- _mesa_is_format_srgb(dst_irb->mt->format)) {
- perf_debug("glBlitFramebuffer() with sRGB conversion cannot be "
- "handled by BLT path.\n");
- return mask;
- }
-
- if (!brw_miptree_blit(brw,
- src_irb->mt,
- src_irb->mt_level, src_irb->mt_layer,
- srcX0, srcY0, readFb->FlipY,
- dst_irb->mt,
- dst_irb->mt_level, dst_irb->mt_layer,
- dstX0, dstY0, drawFb->FlipY,
- dstX1 - dstX0, dstY1 - dstY0,
- COLOR_LOGICOP_COPY)) {
- perf_debug("glBlitFramebuffer(): unknown blit failure. "
- "Falling back to software rendering.\n");
- return mask;
- }
- }
-
- mask &= ~GL_COLOR_BUFFER_BIT;
- }
-
- return mask;
-}
-
-static void
-brw_blit_framebuffer(struct gl_context *ctx,
- struct gl_framebuffer *readFb,
- struct gl_framebuffer *drawFb,
- GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
- GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
- GLbitfield mask, GLenum filter)
-{
- struct brw_context *brw = brw_context(ctx);
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- /* Page 679 of OpenGL 4.4 spec says:
- * "Added BlitFramebuffer to commands affected by conditional rendering in
- * section 10.10 (Bug 9562)."
- */
- if (!_mesa_check_conditional_render(ctx))
- return;
-
- if (devinfo->ver < 6) {
- /* On gfx4-5, try BLT first.
- *
- * Gfx4-5 have a single ring for both 3D and BLT operations, so there's
- * no inter-ring synchronization issues like on Gfx6+. It is apparently
- * faster than using the 3D pipeline. Original Gfx4 also has to rebase
- * and copy miptree slices in order to render to unaligned locations.
- */
- mask = brw_blit_framebuffer_with_blitter(ctx, readFb, drawFb,
- srcX0, srcY0, srcX1, srcY1,
- dstX0, dstY0, dstX1, dstY1,
- mask);
- if (mask == 0x0)
- return;
- }
-
- mask = brw_blorp_framebuffer(brw, readFb, drawFb,
- srcX0, srcY0, srcX1, srcY1,
- dstX0, dstY0, dstX1, dstY1,
- mask, filter);
- if (mask == 0x0)
- return;
-
- /* brw_blorp_framebuffer should always be successful for color blits. */
- assert(!(mask & GL_COLOR_BUFFER_BIT));
-
- mask = _mesa_meta_BlitFramebuffer(ctx, readFb, drawFb,
- srcX0, srcY0, srcX1, srcY1,
- dstX0, dstY0, dstX1, dstY1,
- mask, filter);
- if (mask == 0x0)
- return;
-
- if (devinfo->ver >= 8 && (mask & GL_STENCIL_BUFFER_BIT)) {
- assert(!"Invalid blit");
- }
-
- _swrast_BlitFramebuffer(ctx, readFb, drawFb,
- srcX0, srcY0, srcX1, srcY1,
- dstX0, dstY0, dstX1, dstY1,
- mask, filter);
-}
-
-/**
- * Does the renderbuffer have hiz enabled?
- */
-bool
-brw_renderbuffer_has_hiz(struct brw_renderbuffer *irb)
-{
- return brw_miptree_level_has_hiz(irb->mt, irb->mt_level);
-}
-
-void
-brw_renderbuffer_move_to_temp(struct brw_context *brw,
- struct brw_renderbuffer *irb,
- bool invalidate)
-{
- struct gl_renderbuffer *rb =&irb->Base.Base;
- struct brw_texture_image *intel_image = brw_texture_image(rb->TexImage);
- struct brw_mipmap_tree *new_mt;
- int width, height, depth;
-
- brw_get_image_dims(rb->TexImage, &width, &height, &depth);
-
- assert(irb->align_wa_mt == NULL);
- new_mt = brw_miptree_create(brw, GL_TEXTURE_2D,
- intel_image->base.Base.TexFormat,
- 0, 0,
- width, height, 1,
- irb->mt->surf.samples,
- MIPTREE_CREATE_BUSY);
-
- if (!invalidate) {
- brw_miptree_copy_slice(brw, intel_image->mt,
- intel_image->base.Base.Level, irb->mt_layer,
- new_mt, 0, 0);
- }
-
- brw_miptree_reference(&irb->align_wa_mt, new_mt);
- brw_miptree_release(&new_mt);
-
- irb->draw_x = 0;
- irb->draw_y = 0;
-}
-
-void
-brw_cache_sets_clear(struct brw_context *brw)
-{
- hash_table_foreach(brw->render_cache, render_entry)
- _mesa_hash_table_remove(brw->render_cache, render_entry);
-
- set_foreach(brw->depth_cache, depth_entry)
- _mesa_set_remove(brw->depth_cache, depth_entry);
-}
-
-/**
- * Emits an appropriate flush for a BO if it has been rendered to within the
- * same batchbuffer as a read that's about to be emitted.
- *
- * The GPU has separate, incoherent caches for the render cache and the
- * sampler cache, along with other caches. Usually data in the different
- * caches don't interact (e.g. we don't render to our driver-generated
- * immediate constant data), but for render-to-texture in FBOs we definitely
- * do. When a batchbuffer is flushed, the kernel will ensure that everything
- * necessary is flushed before another use of that BO, but for reuse from
- * different caches within a batchbuffer, it's all our responsibility.
- */
-static void
-flush_depth_and_render_caches(struct brw_context *brw, struct brw_bo *bo)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- if (devinfo->ver >= 6) {
- brw_emit_pipe_control_flush(brw,
- PIPE_CONTROL_DEPTH_CACHE_FLUSH |
- PIPE_CONTROL_RENDER_TARGET_FLUSH |
- PIPE_CONTROL_CS_STALL);
-
- brw_emit_pipe_control_flush(brw,
- PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
- PIPE_CONTROL_CONST_CACHE_INVALIDATE);
- } else {
- brw_emit_mi_flush(brw);
- }
-
- brw_cache_sets_clear(brw);
-}
-
-void
-brw_cache_flush_for_read(struct brw_context *brw, struct brw_bo *bo)
-{
- if (_mesa_hash_table_search(brw->render_cache, bo) ||
- _mesa_set_search(brw->depth_cache, bo))
- flush_depth_and_render_caches(brw, bo);
-}
-
-static void *
-format_aux_tuple(enum isl_format format, enum isl_aux_usage aux_usage)
-{
- return (void *)(uintptr_t)((uint32_t)format << 8 | aux_usage);
-}
-
-void
-brw_cache_flush_for_render(struct brw_context *brw, struct brw_bo *bo,
- enum isl_format format,
- enum isl_aux_usage aux_usage)
-{
- if (_mesa_set_search(brw->depth_cache, bo))
- flush_depth_and_render_caches(brw, bo);
-
- /* Check to see if this bo has been used by a previous rendering operation
- * but with a different format or aux usage. If it has, flush the render
- * cache so we ensure that it's only in there with one format or aux usage
- * at a time.
- *
- * Even though it's not obvious, this can easily happen in practice.
- * Suppose a client is blending on a surface with sRGB encode enabled on
- * gfx9. This implies that you get AUX_USAGE_CCS_D at best. If the client
- * then disables sRGB decode and continues blending we will flip on
- * AUX_USAGE_CCS_E without doing any sort of resolve in-between (this is
- * perfectly valid since CCS_E is a subset of CCS_D). However, this means
- * that we have fragments in-flight which are rendering with UNORM+CCS_E
- * and other fragments in-flight with SRGB+CCS_D on the same surface at the
- * same time and the pixel scoreboard and color blender are trying to sort
- * it all out. This ends badly (i.e. GPU hangs).
- *
- * To date, we have never observed GPU hangs or even corruption to be
- * associated with switching the format, only the aux usage. However,
- * there are comments in various docs which indicate that the render cache
- * isn't 100% resilient to format changes. We may as well be conservative
- * and flush on format changes too. We can always relax this later if we
- * find it to be a performance problem.
- */
- struct hash_entry *entry = _mesa_hash_table_search(brw->render_cache, bo);
- if (entry && entry->data != format_aux_tuple(format, aux_usage))
- flush_depth_and_render_caches(brw, bo);
-}
-
-void
-brw_render_cache_add_bo(struct brw_context *brw, struct brw_bo *bo,
- enum isl_format format,
- enum isl_aux_usage aux_usage)
-{
-#ifndef NDEBUG
- struct hash_entry *entry = _mesa_hash_table_search(brw->render_cache, bo);
- if (entry) {
- /* Otherwise, someone didn't do a flush_for_render and that would be
- * very bad indeed.
- */
- assert(entry->data == format_aux_tuple(format, aux_usage));
- }
-#endif
-
- _mesa_hash_table_insert(brw->render_cache, bo,
- format_aux_tuple(format, aux_usage));
-}
-
-void
-brw_cache_flush_for_depth(struct brw_context *brw, struct brw_bo *bo)
-{
- if (_mesa_hash_table_search(brw->render_cache, bo))
- flush_depth_and_render_caches(brw, bo);
-}
-
-void
-brw_depth_cache_add_bo(struct brw_context *brw, struct brw_bo *bo)
-{
- _mesa_set_add(brw->depth_cache, bo);
-}
-
-/**
- * Do one-time context initializations related to GL_EXT_framebuffer_object.
- * Hook in device driver functions.
- */
-void
-brw_fbo_init(struct brw_context *brw)
-{
- struct dd_function_table *dd = &brw->ctx.Driver;
- dd->NewRenderbuffer = brw_new_renderbuffer;
- dd->MapRenderbuffer = brw_map_renderbuffer;
- dd->UnmapRenderbuffer = brw_unmap_renderbuffer;
- dd->RenderTexture = brw_render_texture;
- dd->ValidateFramebuffer = brw_validate_framebuffer;
- dd->BlitFramebuffer = brw_blit_framebuffer;
- dd->EGLImageTargetRenderbufferStorage =
- brw_image_target_renderbuffer_storage;
-
- brw->render_cache = _mesa_hash_table_create(brw->mem_ctx, _mesa_hash_pointer,
- _mesa_key_pointer_equal);
- brw->depth_cache = _mesa_set_create(brw->mem_ctx, _mesa_hash_pointer,
- _mesa_key_pointer_equal);
- util_dynarray_init(&brw->batch.exec_fences, NULL);
-}
+++ /dev/null
-/*
- * Copyright 2006 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef BRW_FBO_H
-#define BRW_FBO_H
-
-#include <stdbool.h>
-#include <assert.h>
-#include "main/formats.h"
-#include "main/macros.h"
-#include "brw_context.h"
-#include "brw_mipmap_tree.h"
-#include "brw_screen.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct brw_mipmap_tree;
-
-/**
- * Intel renderbuffer, derived from gl_renderbuffer.
- */
-struct brw_renderbuffer
-{
- struct swrast_renderbuffer Base;
- /**
- * The real renderbuffer storage.
- *
- * This is multisampled if NumSamples is > 1.
- */
- struct brw_mipmap_tree *mt;
-
- /**
- * Downsampled contents for window-system MSAA renderbuffers.
- *
- * For window system MSAA color buffers, the singlesample_mt is shared with
- * other processes in DRI2 (and in DRI3, it's the image buffer managed by
- * glx_dri3.c), while mt is private to our process. To do a swapbuffers,
- * we have to downsample out of mt into singlesample_mt. For depth and
- * stencil buffers, the singlesample_mt is also private, and since we don't
- * expect to need to do resolves (except if someone does a glReadPixels()
- * or glCopyTexImage()), we just temporarily allocate singlesample_mt when
- * asked to map the renderbuffer.
- */
- struct brw_mipmap_tree *singlesample_mt;
-
- /* Gen < 6 doesn't have layer specifier for render targets or depth. Driver
- * needs to manually offset surfaces to correct level/layer. There are,
- * however, alignment restrictions to respect as well and in come cases
- * the only option is to use temporary single slice surface which driver
- * copies after rendering to the full miptree.
- *
- * See brw_renderbuffer_move_to_temp().
- */
- struct brw_mipmap_tree *align_wa_mt;
-
- /**
- * \name Miptree view
- * \{
- *
- * Multiple renderbuffers may simultaneously wrap a single texture and each
- * provide a different view into that texture. The fields below indicate
- * which miptree slice is wrapped by this renderbuffer. The fields' values
- * are consistent with the 'level' and 'layer' parameters of
- * glFramebufferTextureLayer().
- *
- * For renderbuffers not created with glFramebufferTexture*(), mt_level and
- * mt_layer are 0.
- */
- unsigned int mt_level;
- unsigned int mt_layer;
-
- /* The number of attached logical layers. */
- unsigned int layer_count;
- /** \} */
-
- GLuint draw_x, draw_y; /**< Offset of drawing within the region */
-
- /**
- * Set to true at every draw call, to indicate if a window-system
- * renderbuffer needs to be downsampled before using singlesample_mt.
- */
- bool need_downsample;
-
- /**
- * Set to true when doing an brw_renderbuffer_map()/unmap() that requires
- * an upsample at the end.
- */
- bool need_map_upsample;
-
- /**
- * Set to true if singlesample_mt is temporary storage that persists only
- * for the duration of a mapping.
- */
- bool singlesample_mt_is_tmp;
-
- /**
- * Set to true when application specifically asked for a sRGB visual.
- */
- bool need_srgb;
-};
-
-
-/**
- * gl_renderbuffer is a base class which we subclass. The Class field
- * is used for simple run-time type checking.
- */
-#define INTEL_RB_CLASS 0x12345678
-
-
-/**
- * Return a gl_renderbuffer ptr casted to brw_renderbuffer.
- * NULL will be returned if the rb isn't really an brw_renderbuffer.
- * This is determined by checking the ClassID.
- */
-static inline struct brw_renderbuffer *
-brw_renderbuffer(struct gl_renderbuffer *rb)
-{
- struct brw_renderbuffer *irb = (struct brw_renderbuffer *) rb;
- if (irb && irb->Base.Base.ClassID == INTEL_RB_CLASS)
- return irb;
- else
- return NULL;
-}
-
-static inline struct brw_mipmap_tree *
-brw_renderbuffer_get_mt(struct brw_renderbuffer *irb)
-{
- if (!irb)
- return NULL;
-
- return (irb->align_wa_mt) ? irb->align_wa_mt : irb->mt;
-}
-
-/**
- * \brief Return the framebuffer attachment specified by attIndex.
- *
- * If the framebuffer lacks the specified attachment, then return null.
- *
- * If the attached renderbuffer is a wrapper, then return wrapped
- * renderbuffer.
- */
-static inline struct brw_renderbuffer *
-brw_get_renderbuffer(struct gl_framebuffer *fb, gl_buffer_index attIndex)
-{
- struct gl_renderbuffer *rb;
-
- assert((unsigned)attIndex < ARRAY_SIZE(fb->Attachment));
-
- rb = fb->Attachment[attIndex].Renderbuffer;
- if (!rb)
- return NULL;
-
- return brw_renderbuffer(rb);
-}
-
-
-static inline mesa_format
-brw_rb_format(const struct brw_renderbuffer *rb)
-{
- return rb->Base.Base.Format;
-}
-
-extern struct brw_renderbuffer *
-brw_create_winsys_renderbuffer(struct brw_screen *screen,
- mesa_format format, unsigned num_samples);
-
-struct brw_renderbuffer *
-brw_create_private_renderbuffer(struct brw_screen *screen,
- mesa_format format, unsigned num_samples);
-
-struct gl_renderbuffer*
-brw_create_wrapped_renderbuffer(struct gl_context *ctx,
- int width, int height,
- mesa_format format);
-
-extern void
-brw_fbo_init(struct brw_context *brw);
-
-void
-brw_renderbuffer_set_draw_offset(struct brw_renderbuffer *irb);
-
-static inline uint32_t
-brw_renderbuffer_get_tile_offsets(struct brw_renderbuffer *irb,
- uint32_t *tile_x,
- uint32_t *tile_y)
-{
- if (irb->align_wa_mt) {
- *tile_x = 0;
- *tile_y = 0;
- return 0;
- }
-
- return brw_miptree_get_tile_offsets(irb->mt, irb->mt_level, irb->mt_layer,
- tile_x, tile_y);
-}
-
-bool
-brw_renderbuffer_has_hiz(struct brw_renderbuffer *irb);
-
-
-void brw_renderbuffer_move_to_temp(struct brw_context *brw,
- struct brw_renderbuffer *irb,
- bool invalidate);
-
-void
-brw_renderbuffer_downsample(struct brw_context *brw,
- struct brw_renderbuffer *irb);
-
-void
-brw_renderbuffer_upsample(struct brw_context *brw,
- struct brw_renderbuffer *irb);
-
-void brw_cache_sets_clear(struct brw_context *brw);
-void brw_cache_flush_for_read(struct brw_context *brw, struct brw_bo *bo);
-void brw_cache_flush_for_render(struct brw_context *brw, struct brw_bo *bo,
- enum isl_format format,
- enum isl_aux_usage aux_usage);
-void brw_cache_flush_for_depth(struct brw_context *brw, struct brw_bo *bo);
-void brw_render_cache_add_bo(struct brw_context *brw, struct brw_bo *bo,
- enum isl_format format,
- enum isl_aux_usage aux_usage);
-void brw_depth_cache_add_bo(struct brw_context *brw, struct brw_bo *bo);
-
-unsigned
-brw_quantize_num_samples(struct brw_screen *intel, unsigned num_samples);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* BRW_FBO_H */
+++ /dev/null
-/*
- Copyright (C) Intel Corp. 2006. All Rights Reserved.
- Intel funded Tungsten Graphics to
- develop this 3D driver.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
- **********************************************************************/
- /*
- * Authors:
- * Keith Whitwell <keithw@vmware.com>
- */
-
-#include "main/macros.h"
-#include "main/enums.h"
-#include "main/transformfeedback.h"
-
-#include "brw_batch.h"
-
-#include "brw_defines.h"
-#include "brw_context.h"
-#include "brw_util.h"
-#include "brw_state.h"
-#include "brw_ff_gs.h"
-#include "util/ralloc.h"
-
-static void
-compile_ff_gs_prog(struct brw_context *brw,
- struct brw_ff_gs_prog_key *key)
-{
- const GLuint *program;
- void *mem_ctx;
- GLuint program_size;
-
- mem_ctx = ralloc_context(NULL);
-
- struct brw_ff_gs_prog_data prog_data;
- program = brw_compile_ff_gs_prog(brw->screen->compiler, mem_ctx, key,
- &prog_data,
- &brw_vue_prog_data(brw->vs.base.prog_data)->vue_map,
- &program_size);
-
- brw_upload_cache(&brw->cache, BRW_CACHE_FF_GS_PROG,
- key, sizeof(*key),
- program, program_size,
- &prog_data, sizeof(prog_data),
- &brw->ff_gs.prog_offset, &brw->ff_gs.prog_data);
- ralloc_free(mem_ctx);
-}
-
-static bool
-brw_ff_gs_state_dirty(const struct brw_context *brw)
-{
- return brw_state_dirty(brw,
- _NEW_LIGHT,
- BRW_NEW_PRIMITIVE |
- BRW_NEW_TRANSFORM_FEEDBACK |
- BRW_NEW_VS_PROG_DATA);
-}
-
-static void
-brw_ff_gs_populate_key(struct brw_context *brw,
- struct brw_ff_gs_prog_key *key)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- static const unsigned swizzle_for_offset[4] = {
- BRW_SWIZZLE4(0, 1, 2, 3),
- BRW_SWIZZLE4(1, 2, 3, 3),
- BRW_SWIZZLE4(2, 3, 3, 3),
- BRW_SWIZZLE4(3, 3, 3, 3)
- };
-
- struct gl_context *ctx = &brw->ctx;
-
- assert(devinfo->ver < 7);
-
- memset(key, 0, sizeof(*key));
-
- /* BRW_NEW_VS_PROG_DATA (part of VUE map) */
- key->attrs = brw_vue_prog_data(brw->vs.base.prog_data)->vue_map.slots_valid;
-
- /* BRW_NEW_PRIMITIVE */
- key->primitive = brw->primitive;
-
- /* _NEW_LIGHT */
- key->pv_first = (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION);
- if (key->primitive == _3DPRIM_QUADLIST && ctx->Light.ShadeModel != GL_FLAT) {
- /* Provide consistent primitive order with brw_set_prim's
- * optimization of single quads to trifans.
- */
- key->pv_first = true;
- }
-
- if (devinfo->ver == 6) {
- /* On Gfx6, GS is used for transform feedback. */
- /* BRW_NEW_TRANSFORM_FEEDBACK */
- if (_mesa_is_xfb_active_and_unpaused(ctx)) {
- const struct gl_program *prog =
- ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX];
- const struct gl_transform_feedback_info *linked_xfb_info =
- prog->sh.LinkedTransformFeedback;
- int i;
-
- /* Make sure that the VUE slots won't overflow the unsigned chars in
- * key->transform_feedback_bindings[].
- */
- STATIC_ASSERT(BRW_VARYING_SLOT_COUNT <= 256);
-
- /* Make sure that we don't need more binding table entries than we've
- * set aside for use in transform feedback. (We shouldn't, since we
- * set aside enough binding table entries to have one per component).
- */
- assert(linked_xfb_info->NumOutputs <= BRW_MAX_SOL_BINDINGS);
-
- key->need_gs_prog = true;
- key->num_transform_feedback_bindings = linked_xfb_info->NumOutputs;
- for (i = 0; i < key->num_transform_feedback_bindings; ++i) {
- key->transform_feedback_bindings[i] =
- linked_xfb_info->Outputs[i].OutputRegister;
- key->transform_feedback_swizzles[i] =
- swizzle_for_offset[linked_xfb_info->Outputs[i].ComponentOffset];
- }
- }
- } else {
- /* Pre-gfx6, GS is used to transform QUADLIST, QUADSTRIP, and LINELOOP
- * into simpler primitives.
- */
- key->need_gs_prog = (brw->primitive == _3DPRIM_QUADLIST ||
- brw->primitive == _3DPRIM_QUADSTRIP ||
- brw->primitive == _3DPRIM_LINELOOP);
- }
-}
-
-/* Calculate interpolants for triangle and line rasterization.
- */
-void
-brw_upload_ff_gs_prog(struct brw_context *brw)
-{
- struct brw_ff_gs_prog_key key;
-
- if (!brw_ff_gs_state_dirty(brw))
- return;
-
- /* Populate the key:
- */
- brw_ff_gs_populate_key(brw, &key);
-
- if (brw->ff_gs.prog_active != key.need_gs_prog) {
- brw->ctx.NewDriverState |= BRW_NEW_FF_GS_PROG_DATA;
- brw->ff_gs.prog_active = key.need_gs_prog;
- }
-
- if (brw->ff_gs.prog_active) {
- if (!brw_search_cache(&brw->cache, BRW_CACHE_FF_GS_PROG, &key,
- sizeof(key), &brw->ff_gs.prog_offset,
- &brw->ff_gs.prog_data, true)) {
- compile_ff_gs_prog(brw, &key);
- }
- }
-}
+++ /dev/null
-/*
- Copyright (C) Intel Corp. 2006. All Rights Reserved.
- Intel funded Tungsten Graphics to
- develop this 3D driver.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
- **********************************************************************/
- /*
- * Authors:
- * Keith Whitwell <keithw@vmware.com>
- */
-
-
-#ifndef BRW_GS_H
-#define BRW_GS_H
-
-#include "brw_context.h"
-#include "compiler/brw_eu.h"
-
-void
-brw_upload_ff_gs_prog(struct brw_context *brw);
-
-#endif
+++ /dev/null
-/*
- * Copyright © 2015 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "brw_context.h"
-#include "brw_state.h"
-#include "main/context.h"
-#include "main/formatquery.h"
-#include "main/glformats.h"
-
-static size_t
-brw_query_samples_for_format(struct gl_context *ctx, GLenum target,
- GLenum internalFormat, int samples[16])
-{
- struct brw_context *brw = brw_context(ctx);
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- (void) target;
- (void) internalFormat;
-
- switch (devinfo->ver) {
- case 11:
- case 10:
- case 9:
- samples[0] = 16;
- samples[1] = 8;
- samples[2] = 4;
- samples[3] = 2;
- return 4;
-
- case 8:
- samples[0] = 8;
- samples[1] = 4;
- samples[2] = 2;
- return 3;
-
- case 7:
- if (internalFormat == GL_RGBA32F && _mesa_is_gles(ctx)) {
- /* For GLES, we are allowed to return a smaller number of samples for
- * GL_RGBA32F. See OpenGLES 3.2 spec, section 20.3.1 Internal Format
- * Query Parameters, under SAMPLES:
- *
- * "A value less than or equal to the value of MAX_SAMPLES, if
- * internalformat is RGBA16F, R32F, RG32F, or RGBA32F."
- *
- * In brw_render_target_supported, we prevent formats with a size
- * greater than 8 bytes from using 8x MSAA on gfx7.
- */
- samples[0] = 4;
- return 1;
- } else {
- samples[0] = 8;
- samples[1] = 4;
- return 2;
- }
-
- case 6:
- samples[0] = 4;
- return 1;
-
- default:
- assert(devinfo->ver < 6);
- samples[0] = 1;
- return 1;
- }
-}
-
-void
-brw_query_internal_format(struct gl_context *ctx, GLenum target,
- GLenum internalFormat, GLenum pname, GLint *params)
-{
- /* The Mesa layer gives us a temporary params buffer that is guaranteed
- * to be non-NULL, and have at least 16 elements.
- */
- assert(params != NULL);
-
- switch (pname) {
- case GL_SAMPLES:
- brw_query_samples_for_format(ctx, target, internalFormat, params);
- break;
-
- case GL_NUM_SAMPLE_COUNTS: {
- size_t num_samples;
- GLint dummy_buffer[16];
-
- num_samples = brw_query_samples_for_format(ctx, target, internalFormat,
- dummy_buffer);
- params[0] = (GLint) num_samples;
- break;
- }
-
- default:
- /* By default, we call the driver hook's fallback function from the frontend,
- * which has generic implementation for all pnames.
- */
- _mesa_query_internal_format_default(ctx, target, internalFormat, pname,
- params);
- break;
- }
-}
+++ /dev/null
-/*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "main/mipmap.h"
-#include "main/teximage.h"
-#include "brw_blorp.h"
-#include "brw_context.h"
-#include "brw_tex.h"
-#include "drivers/common/meta.h"
-
-#define FILE_DEBUG_FLAG DEBUG_BLORP
-
-
-/**
- * The GenerateMipmap() driver hook.
- */
-void
-brw_generate_mipmap(struct gl_context *ctx, GLenum target,
- struct gl_texture_object *tex_obj)
-{
- struct brw_context *brw = brw_context(ctx);
- struct intel_device_info *devinfo = &brw->screen->devinfo;
- struct brw_texture_object *intel_obj = brw_texture_object(tex_obj);
- const unsigned base_level = tex_obj->Attrib.BaseLevel;
- unsigned last_level, first_layer, last_layer;
-
- /* Blorp doesn't handle combined depth/stencil surfaces on Gfx4-5 yet. */
- if (devinfo->ver <= 5 &&
- (tex_obj->Image[0][base_level]->_BaseFormat == GL_DEPTH_COMPONENT ||
- tex_obj->Image[0][base_level]->_BaseFormat == GL_DEPTH_STENCIL)) {
- _mesa_meta_GenerateMipmap(ctx, target, tex_obj);
- return;
- }
-
- /* find expected last mipmap level to generate */
- last_level = _mesa_compute_num_levels(ctx, tex_obj, target) - 1;
-
- if (last_level == 0)
- return;
-
- /* The texture isn't in a "complete" state yet so set the expected
- * last_level here; we're not going through normal texture validation.
- */
- intel_obj->_MaxLevel = last_level;
-
- if (!tex_obj->Immutable) {
- _mesa_prepare_mipmap_levels(ctx, tex_obj, base_level, last_level);
-
- /* At this point, memory for all the texture levels has been
- * allocated. However, the base level image may be in one resource
- * while the subsequent/smaller levels may be in another resource.
- * Finalizing the texture will copy the base images from the former
- * resource to the latter.
- *
- * After this, we'll have all mipmap levels in one resource.
- */
- brw_finalize_mipmap_tree(brw, tex_obj);
- }
-
- struct brw_mipmap_tree *mt = intel_obj->mt;
- if (!mt) {
- _mesa_error(ctx, GL_OUT_OF_MEMORY, "mipmap generation");
- return;
- }
-
- const mesa_format format = intel_obj->_Format;
-
- /* Fall back to the CPU for non-renderable cases.
- *
- * TODO: 3D textures require blending data from multiple slices,
- * which means we need custom shaders. For now, fall back.
- */
- if (!brw->mesa_format_supports_render[format] || target == GL_TEXTURE_3D) {
- _mesa_generate_mipmap(ctx, target, tex_obj);
- return;
- }
-
- const struct isl_extent4d *base_size = &mt->surf.logical_level0_px;
-
- if (mt->target == GL_TEXTURE_CUBE_MAP) {
- first_layer = _mesa_tex_target_to_face(target);
- last_layer = first_layer;
- } else {
- first_layer = 0;
- last_layer = base_size->array_len - 1;
- }
-
- /* The GL_EXT_texture_sRGB_decode extension's issues section says:
- *
- * "10) How is mipmap generation of sRGB textures affected by the
- * TEXTURE_SRGB_DECODE_EXT parameter?
- *
- * RESOLVED: When the TEXTURE_SRGB_DECODE parameter is DECODE_EXT
- * for an sRGB texture, mipmap generation should decode sRGB texels
- * to a linear RGB color space, perform downsampling, then encode
- * back to an sRGB color space. (Issue 24 in the EXT_texture_sRGB
- * specification provides a rationale for why.) When the parameter
- * is SKIP_DECODE_EXT instead, mipmap generation skips the encode
- * and decode steps during mipmap generation. By skipping the
- * encode and decode steps, sRGB mipmap generation should match
- * the mipmap generation for a non-sRGB texture."
- */
- bool do_srgb = tex_obj->Sampler.Attrib.sRGBDecode == GL_DECODE_EXT;
-
- for (unsigned dst_level = base_level + 1;
- dst_level <= last_level;
- dst_level++) {
-
- const unsigned src_level = dst_level - 1;
-
- for (unsigned layer = first_layer; layer <= last_layer; layer++) {
- brw_blorp_blit_miptrees(brw, mt, src_level, layer, format,
- SWIZZLE_XYZW, mt, dst_level, layer, format,
- 0, 0,
- minify(base_size->width, src_level),
- minify(base_size->height, src_level),
- 0, 0,
- minify(base_size->width, dst_level),
- minify(base_size->height, dst_level),
- GL_LINEAR, false, false,
- do_srgb, do_srgb);
- }
- }
-}
+++ /dev/null
-/*
- * Copyright © 2013 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * \file brw_vec4_gs.c
- *
- * State atom for client-programmable geometry shaders, and support code.
- */
-
-#include "brw_gs.h"
-#include "brw_context.h"
-#include "brw_state.h"
-#include "brw_ff_gs.h"
-#include "compiler/brw_nir.h"
-#include "brw_program.h"
-#include "compiler/glsl/ir_uniform.h"
-
-static void
-assign_gs_binding_table_offsets(const struct intel_device_info *devinfo,
- const struct gl_program *prog,
- struct brw_gs_prog_data *prog_data)
-{
- /* In gfx6 we reserve the first BRW_MAX_SOL_BINDINGS entries for transform
- * feedback surfaces.
- */
- uint32_t reserved = devinfo->ver == 6 ? BRW_MAX_SOL_BINDINGS : 0;
-
- brw_assign_common_binding_table_offsets(devinfo, prog,
- &prog_data->base.base, reserved);
-}
-
-static void
-brw_gfx6_xfb_setup(const struct gl_transform_feedback_info *linked_xfb_info,
- struct brw_gs_prog_data *gs_prog_data)
-{
- static const unsigned swizzle_for_offset[4] = {
- BRW_SWIZZLE4(0, 1, 2, 3),
- BRW_SWIZZLE4(1, 2, 3, 3),
- BRW_SWIZZLE4(2, 3, 3, 3),
- BRW_SWIZZLE4(3, 3, 3, 3)
- };
-
- int i;
-
- /* Make sure that the VUE slots won't overflow the unsigned chars in
- * prog_data->transform_feedback_bindings[].
- */
- STATIC_ASSERT(BRW_VARYING_SLOT_COUNT <= 256);
-
- /* Make sure that we don't need more binding table entries than we've
- * set aside for use in transform feedback. (We shouldn't, since we
- * set aside enough binding table entries to have one per component).
- */
- assert(linked_xfb_info->NumOutputs <= BRW_MAX_SOL_BINDINGS);
-
- gs_prog_data->num_transform_feedback_bindings = linked_xfb_info->NumOutputs;
- for (i = 0; i < gs_prog_data->num_transform_feedback_bindings; i++) {
- gs_prog_data->transform_feedback_bindings[i] =
- linked_xfb_info->Outputs[i].OutputRegister;
- gs_prog_data->transform_feedback_swizzles[i] =
- swizzle_for_offset[linked_xfb_info->Outputs[i].ComponentOffset];
- }
-}
-static bool
-brw_codegen_gs_prog(struct brw_context *brw,
- struct brw_program *gp,
- struct brw_gs_prog_key *key)
-{
- struct brw_compiler *compiler = brw->screen->compiler;
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- struct brw_stage_state *stage_state = &brw->gs.base;
- struct brw_gs_prog_data prog_data;
- bool start_busy = false;
- double start_time = 0;
-
- memset(&prog_data, 0, sizeof(prog_data));
-
- void *mem_ctx = ralloc_context(NULL);
-
- nir_shader *nir = nir_shader_clone(mem_ctx, gp->program.nir);
-
- assign_gs_binding_table_offsets(devinfo, &gp->program, &prog_data);
-
- brw_nir_setup_glsl_uniforms(mem_ctx, nir, &gp->program,
- &prog_data.base.base,
- compiler->scalar_stage[MESA_SHADER_GEOMETRY]);
- if (brw->can_push_ubos) {
- brw_nir_analyze_ubo_ranges(compiler, nir, NULL,
- prog_data.base.base.ubo_ranges);
- }
-
- uint64_t outputs_written = nir->info.outputs_written;
-
- brw_compute_vue_map(devinfo,
- &prog_data.base.vue_map, outputs_written,
- gp->program.info.separate_shader, 1);
-
- if (devinfo->ver == 6)
- brw_gfx6_xfb_setup(gp->program.sh.LinkedTransformFeedback,
- &prog_data);
-
- int st_index = -1;
- if (INTEL_DEBUG(DEBUG_SHADER_TIME))
- st_index = brw_get_shader_time_index(brw, &gp->program, ST_GS, true);
-
- if (unlikely(brw->perf_debug)) {
- start_busy = brw->batch.last_bo && brw_bo_busy(brw->batch.last_bo);
- start_time = get_time();
- }
-
- char *error_str;
- const unsigned *program =
- brw_compile_gs(brw->screen->compiler, brw, mem_ctx, key,
- &prog_data, nir, st_index,
- NULL, &error_str);
- if (program == NULL) {
- ralloc_strcat(&gp->program.sh.data->InfoLog, error_str);
- _mesa_problem(NULL, "Failed to compile geometry shader: %s\n", error_str);
-
- ralloc_free(mem_ctx);
- return false;
- }
-
- if (unlikely(brw->perf_debug)) {
- if (gp->compiled_once) {
- brw_debug_recompile(brw, MESA_SHADER_GEOMETRY, gp->program.Id,
- &key->base);
- }
- if (start_busy && !brw_bo_busy(brw->batch.last_bo)) {
- perf_debug("GS compile took %.03f ms and stalled the GPU\n",
- (get_time() - start_time) * 1000);
- }
- gp->compiled_once = true;
- }
-
- /* Scratch space is used for register spilling */
- brw_alloc_stage_scratch(brw, stage_state,
- prog_data.base.base.total_scratch);
-
- /* The param and pull_param arrays will be freed by the shader cache. */
- ralloc_steal(NULL, prog_data.base.base.param);
- ralloc_steal(NULL, prog_data.base.base.pull_param);
- brw_upload_cache(&brw->cache, BRW_CACHE_GS_PROG,
- key, sizeof(*key),
- program, prog_data.base.base.program_size,
- &prog_data, sizeof(prog_data),
- &stage_state->prog_offset, &brw->gs.base.prog_data);
- ralloc_free(mem_ctx);
-
- return true;
-}
-
-static bool
-brw_gs_state_dirty(const struct brw_context *brw)
-{
- return brw_state_dirty(brw,
- _NEW_TEXTURE,
- BRW_NEW_GEOMETRY_PROGRAM |
- BRW_NEW_TRANSFORM_FEEDBACK);
-}
-
-void
-brw_gs_populate_key(struct brw_context *brw,
- struct brw_gs_prog_key *key)
-{
- struct gl_context *ctx = &brw->ctx;
- struct brw_program *gp =
- (struct brw_program *) brw->programs[MESA_SHADER_GEOMETRY];
-
- memset(key, 0, sizeof(*key));
-
- brw_populate_base_prog_key(ctx, gp, &key->base);
-}
-
-void
-brw_upload_gs_prog(struct brw_context *brw)
-{
- struct brw_stage_state *stage_state = &brw->gs.base;
- struct brw_gs_prog_key key;
- /* BRW_NEW_GEOMETRY_PROGRAM */
- struct brw_program *gp =
- (struct brw_program *) brw->programs[MESA_SHADER_GEOMETRY];
-
- if (!brw_gs_state_dirty(brw))
- return;
-
- brw_gs_populate_key(brw, &key);
-
- if (brw_search_cache(&brw->cache, BRW_CACHE_GS_PROG, &key, sizeof(key),
- &stage_state->prog_offset, &brw->gs.base.prog_data,
- true))
- return;
-
- if (brw_disk_cache_upload_program(brw, MESA_SHADER_GEOMETRY))
- return;
-
- gp = (struct brw_program *) brw->programs[MESA_SHADER_GEOMETRY];
- gp->id = key.base.program_string_id;
-
- ASSERTED bool success = brw_codegen_gs_prog(brw, gp, &key);
- assert(success);
-}
-
-void
-brw_gs_populate_default_key(const struct brw_compiler *compiler,
- struct brw_gs_prog_key *key,
- struct gl_program *prog)
-{
- const struct intel_device_info *devinfo = compiler->devinfo;
-
- memset(key, 0, sizeof(*key));
-
- brw_populate_default_base_prog_key(devinfo, brw_program(prog),
- &key->base);
-}
-
-bool
-brw_gs_precompile(struct gl_context *ctx, struct gl_program *prog)
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_gs_prog_key key;
- uint32_t old_prog_offset = brw->gs.base.prog_offset;
- struct brw_stage_prog_data *old_prog_data = brw->gs.base.prog_data;
- bool success;
-
- struct brw_program *bgp = brw_program(prog);
-
- brw_gs_populate_default_key(brw->screen->compiler, &key, prog);
-
- success = brw_codegen_gs_prog(brw, bgp, &key);
-
- brw->gs.base.prog_offset = old_prog_offset;
- brw->gs.base.prog_data = old_prog_data;
-
- return success;
-}
+++ /dev/null
-/*
- * Copyright © 2013 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef BRW_VEC4_GS_H
-#define BRW_VEC4_GS_H
-
-#include <stdbool.h>
-
-#include "brw_context.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct gl_shader_program;
-
-void
-brw_upload_gs_prog(struct brw_context *brw);
-
-void
-brw_gs_populate_key(struct brw_context *brw,
- struct brw_gs_prog_key *key);
-void
-brw_gs_populate_default_key(const struct brw_compiler *compiler,
- struct brw_gs_prog_key *key,
- struct gl_program *prog);
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
-#endif /* BRW_VEC4_GS_H */
+++ /dev/null
-/*
- * Copyright © 2013 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "main/mtypes.h"
-#include "program/prog_parameter.h"
-#include "main/shaderapi.h"
-
-#include "brw_context.h"
-#include "brw_state.h"
-
-
-/* Creates a new GS constant buffer reflecting the current GS program's
- * constants, if needed by the GS program.
- *
- * Otherwise, constants go through the CURBEs using the brw_constant_buffer
- * state atom.
- */
-static void
-brw_upload_gs_pull_constants(struct brw_context *brw)
-{
- struct brw_stage_state *stage_state = &brw->gs.base;
-
- /* BRW_NEW_GEOMETRY_PROGRAM */
- struct brw_program *gp =
- (struct brw_program *) brw->programs[MESA_SHADER_GEOMETRY];
-
- if (!gp)
- return;
-
- /* BRW_NEW_GS_PROG_DATA */
- const struct brw_stage_prog_data *prog_data = brw->gs.base.prog_data;
-
- _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_GEOMETRY);
- /* _NEW_PROGRAM_CONSTANTS */
- brw_upload_pull_constants(brw, BRW_NEW_GS_CONSTBUF, &gp->program,
- stage_state, prog_data);
-}
-
-const struct brw_tracked_state brw_gs_pull_constants = {
- .dirty = {
- .mesa = _NEW_PROGRAM_CONSTANTS,
- .brw = BRW_NEW_BATCH |
- BRW_NEW_GEOMETRY_PROGRAM |
- BRW_NEW_GS_PROG_DATA,
- },
- .emit = brw_upload_gs_pull_constants,
-};
-
-static void
-brw_upload_gs_ubo_surfaces(struct brw_context *brw)
-{
- struct gl_context *ctx = &brw->ctx;
-
- /* _NEW_PROGRAM */
- struct gl_program *prog =
- ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY];
-
- /* BRW_NEW_GS_PROG_DATA */
- struct brw_stage_prog_data *prog_data = brw->gs.base.prog_data;
-
- brw_upload_ubo_surfaces(brw, prog, &brw->gs.base, prog_data);
-}
-
-const struct brw_tracked_state brw_gs_ubo_surfaces = {
- .dirty = {
- .mesa = _NEW_PROGRAM,
- .brw = BRW_NEW_BATCH |
- BRW_NEW_GS_PROG_DATA |
- BRW_NEW_UNIFORM_BUFFER,
- },
- .emit = brw_upload_gs_ubo_surfaces,
-};
-
-static void
-brw_upload_gs_image_surfaces(struct brw_context *brw)
-{
- /* BRW_NEW_GEOMETRY_PROGRAM */
- const struct gl_program *gp = brw->programs[MESA_SHADER_GEOMETRY];
-
- if (gp) {
- /* BRW_NEW_GS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
- brw_upload_image_surfaces(brw, gp, &brw->gs.base,
- brw->gs.base.prog_data);
- }
-}
-
-const struct brw_tracked_state brw_gs_image_surfaces = {
- .dirty = {
- .mesa = _NEW_TEXTURE,
- .brw = BRW_NEW_BATCH |
- BRW_NEW_AUX_STATE |
- BRW_NEW_GEOMETRY_PROGRAM |
- BRW_NEW_GS_PROG_DATA |
- BRW_NEW_IMAGE_UNITS,
- },
- .emit = brw_upload_gs_image_surfaces,
-};
+++ /dev/null
-/*
- * Copyright 2006 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef BRW_IMAGE_H
-#define BRW_IMAGE_H
-
-/** @file intel_image.h
- *
- * Structure definitions and prototypes for __DRIimage, the driver-private
- * structure backing EGLImage or a drawable in DRI3.
- *
- * The __DRIimage is passed around the loader code (src/glx and src/egl), but
- * it's opaque to that code and may only be accessed by loader extensions
- * (mostly located in brw_screen.c).
- */
-
-#include <stdbool.h>
-#include <xf86drm.h>
-
-#include "main/mtypes.h"
-#include "brw_bufmgr.h"
-#include <GL/internal/dri_interface.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/**
- * Used with images created with image_from_names
- * to help support planar images.
- */
-struct brw_image_format {
- int fourcc;
- int components;
- int nplanes;
- struct {
- int buffer_index;
- int width_shift;
- int height_shift;
- uint32_t dri_format;
- int cpp;
- } planes[3];
- float scaling_factor;
-};
-
-struct __DRIimageRec {
- struct brw_screen *screen;
- struct brw_bo *bo;
- uint32_t pitch; /**< in bytes */
- GLenum internal_format;
- uint32_t dri_format;
- GLuint format; /**< mesa_format or mesa_array_format */
- uint64_t modifier; /**< fb modifier (fourcc) */
- uint32_t offset;
-
- /*
- * Need to save these here between calls to
- * image_from_names and calls to image_from_planar.
- */
- uint32_t strides[3];
- uint32_t offsets[3];
- const struct brw_image_format *planar_format;
-
- /* particular miptree level */
- GLuint width;
- GLuint height;
- GLuint tile_x;
- GLuint tile_y;
- bool has_depthstencil;
- bool imported_dmabuf;
-
- /** Offset of the auxiliary compression surface in the bo. */
- uint32_t aux_offset;
-
- /** Pitch of the auxiliary compression surface. */
- uint32_t aux_pitch;
-
- /** Total size in bytes of the auxiliary compression surface. */
- uint32_t aux_size;
-
- /**
- * Provided by EGL_EXT_image_dma_buf_import.
- * \{
- */
- enum __DRIYUVColorSpace yuv_color_space;
- enum __DRISampleRange sample_range;
- enum __DRIChromaSiting horizontal_siting;
- enum __DRIChromaSiting vertical_siting;
- /* \} */
-
- __DRIscreen *driScrnPriv;
-
- void *loader_private;
-};
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
+++ /dev/null
-/*
- * Copyright © 2015 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "brw_context.h"
-#include "compiler/brw_nir.h"
-#include "brw_program.h"
-#include "compiler/glsl/gl_nir.h"
-#include "compiler/glsl/gl_nir_linker.h"
-#include "compiler/glsl/ir.h"
-#include "compiler/glsl/ir_optimization.h"
-#include "compiler/glsl/program.h"
-#include "compiler/nir/nir_serialize.h"
-#include "program/program.h"
-#include "main/glspirv.h"
-#include "main/mtypes.h"
-#include "main/shaderapi.h"
-#include "main/shaderobj.h"
-#include "main/uniforms.h"
-
-/**
- * Performs a compile of the shader stages even when we don't know
- * what non-orthogonal state will be set, in the hope that it reflects
- * the eventual NOS used, and thus allows us to produce link failures.
- */
-static bool
-brw_shader_precompile(struct gl_context *ctx,
- struct gl_shader_program *sh_prog)
-{
- struct gl_linked_shader *vs = sh_prog->_LinkedShaders[MESA_SHADER_VERTEX];
- struct gl_linked_shader *tcs = sh_prog->_LinkedShaders[MESA_SHADER_TESS_CTRL];
- struct gl_linked_shader *tes = sh_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL];
- struct gl_linked_shader *gs = sh_prog->_LinkedShaders[MESA_SHADER_GEOMETRY];
- struct gl_linked_shader *fs = sh_prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
- struct gl_linked_shader *cs = sh_prog->_LinkedShaders[MESA_SHADER_COMPUTE];
-
- if (fs && !brw_fs_precompile(ctx, fs->Program))
- return false;
-
- if (gs && !brw_gs_precompile(ctx, gs->Program))
- return false;
-
- if (tes && !brw_tes_precompile(ctx, sh_prog, tes->Program))
- return false;
-
- if (tcs && !brw_tcs_precompile(ctx, sh_prog, tcs->Program))
- return false;
-
- if (vs && !brw_vs_precompile(ctx, vs->Program))
- return false;
-
- if (cs && !brw_cs_precompile(ctx, cs->Program))
- return false;
-
- return true;
-}
-
-static void
-brw_lower_packing_builtins(struct brw_context *brw,
- exec_list *ir)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- /* Gens < 7 don't have instructions to convert to or from half-precision,
- * and Gens < 6 don't expose that functionality.
- */
- if (devinfo->ver != 6)
- return;
-
- lower_packing_builtins(ir, LOWER_PACK_HALF_2x16 | LOWER_UNPACK_HALF_2x16);
-}
-
-static void
-process_glsl_ir(struct brw_context *brw,
- struct gl_shader_program *shader_prog,
- struct gl_linked_shader *shader)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- struct gl_context *ctx = &brw->ctx;
-
- /* Temporary memory context for any new IR. */
- void *mem_ctx = ralloc_context(NULL);
-
- ralloc_adopt(mem_ctx, shader->ir);
-
- if (shader->Stage == MESA_SHADER_FRAGMENT) {
- lower_blend_equation_advanced(
- shader, ctx->Extensions.KHR_blend_equation_advanced_coherent);
- }
-
- /* lower_packing_builtins() inserts arithmetic instructions, so it
- * must precede lower_instructions().
- */
- brw_lower_packing_builtins(brw, shader->ir);
- do_mat_op_to_vec(shader->ir);
-
- unsigned instructions_to_lower = (DIV_TO_MUL_RCP |
- SUB_TO_ADD_NEG |
- EXP_TO_EXP2 |
- LOG_TO_LOG2 |
- DFREXP_DLDEXP_TO_ARITH);
- if (devinfo->ver < 7) {
- instructions_to_lower |= BIT_COUNT_TO_MATH |
- EXTRACT_TO_SHIFTS |
- INSERT_TO_SHIFTS |
- REVERSE_TO_SHIFTS;
- }
-
- lower_instructions(shader->ir, instructions_to_lower);
-
- /* Pre-gfx6 HW can only nest if-statements 16 deep. Beyond this,
- * if-statements need to be flattened.
- */
- if (devinfo->ver < 6)
- lower_if_to_cond_assign(shader->Stage, shader->ir, 16);
-
- do_vec_index_to_cond_assign(shader->ir);
- lower_vector_insert(shader->ir, true);
- lower_offset_arrays(shader->ir);
- lower_quadop_vector(shader->ir, false);
-
- validate_ir_tree(shader->ir);
-
- /* Now that we've finished altering the linked IR, reparent any live IR back
- * to the permanent memory context, and free the temporary one (discarding any
- * junk we optimized away).
- */
- reparent_ir(shader->ir, shader->ir);
- ralloc_free(mem_ctx);
-
- if (ctx->_Shader->Flags & GLSL_DUMP) {
- fprintf(stderr, "\n");
- if (shader->ir) {
- fprintf(stderr, "GLSL IR for linked %s program %d:\n",
- _mesa_shader_stage_to_string(shader->Stage),
- shader_prog->Name);
- _mesa_print_ir(stderr, shader->ir, NULL);
- } else {
- fprintf(stderr, "No GLSL IR for linked %s program %d (shader may be "
- "from cache)\n", _mesa_shader_stage_to_string(shader->Stage),
- shader_prog->Name);
- }
- fprintf(stderr, "\n");
- }
-}
-
-static void
-unify_interfaces(struct shader_info **infos)
-{
- struct shader_info *prev_info = NULL;
-
- for (unsigned i = MESA_SHADER_VERTEX; i < MESA_SHADER_FRAGMENT; i++) {
- if (!infos[i])
- continue;
-
- if (prev_info) {
- prev_info->outputs_written |= infos[i]->inputs_read &
- ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
- infos[i]->inputs_read |= prev_info->outputs_written &
- ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
-
- prev_info->patch_outputs_written |= infos[i]->patch_inputs_read;
- infos[i]->patch_inputs_read |= prev_info->patch_outputs_written;
- }
- prev_info = infos[i];
- }
-}
-
-static void
-update_xfb_info(struct gl_transform_feedback_info *xfb_info,
- struct shader_info *info)
-{
- if (!xfb_info)
- return;
-
- for (unsigned i = 0; i < xfb_info->NumOutputs; i++) {
- struct gl_transform_feedback_output *output = &xfb_info->Outputs[i];
-
- /* The VUE header contains three scalar fields packed together:
- * - gl_PointSize is stored in VARYING_SLOT_PSIZ.w
- * - gl_Layer is stored in VARYING_SLOT_PSIZ.y
- * - gl_ViewportIndex is stored in VARYING_SLOT_PSIZ.z
- */
- switch (output->OutputRegister) {
- case VARYING_SLOT_LAYER:
- assert(output->NumComponents == 1);
- output->OutputRegister = VARYING_SLOT_PSIZ;
- output->ComponentOffset = 1;
- break;
- case VARYING_SLOT_VIEWPORT:
- assert(output->NumComponents == 1);
- output->OutputRegister = VARYING_SLOT_PSIZ;
- output->ComponentOffset = 2;
- break;
- case VARYING_SLOT_PSIZ:
- assert(output->NumComponents == 1);
- output->ComponentOffset = 3;
- break;
- }
-
- info->outputs_written |= 1ull << output->OutputRegister;
- }
-}
-
-extern "C" GLboolean
-brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
-{
- struct brw_context *brw = brw_context(ctx);
- const struct brw_compiler *compiler = brw->screen->compiler;
- unsigned int stage;
- struct shader_info *infos[MESA_SHADER_STAGES] = { 0, };
-
- if (shProg->data->LinkStatus == LINKING_SKIPPED)
- return GL_TRUE;
-
- for (stage = 0; stage < ARRAY_SIZE(shProg->_LinkedShaders); stage++) {
- struct gl_linked_shader *shader = shProg->_LinkedShaders[stage];
- if (!shader)
- continue;
-
- struct gl_program *prog = shader->Program;
- prog->Parameters = _mesa_new_parameter_list();
-
- if (!shader->spirv_data)
- process_glsl_ir(brw, shProg, shader);
-
- _mesa_copy_linked_program_data(shProg, shader);
-
- prog->ShadowSamplers = shader->shadow_samplers;
-
- bool debug_enabled =
- INTEL_DEBUG(intel_debug_flag_for_shader_stage(shader->Stage));
-
- if (debug_enabled && shader->ir) {
- fprintf(stderr, "GLSL IR for native %s shader %d:\n",
- _mesa_shader_stage_to_string(shader->Stage), shProg->Name);
- _mesa_print_ir(stderr, shader->ir, NULL);
- fprintf(stderr, "\n\n");
- }
-
- prog->nir = brw_create_nir(brw, shProg, prog, (gl_shader_stage) stage,
- compiler->scalar_stage[stage]);
- }
-
- /* TODO: Verify if its feasible to split up the NIR linking work into a
- * per-stage part (that fill out information we need for the passes) and a
- * actual linking part, so that we could fold back brw_nir_lower_resources
- * back into brw_create_nir.
- */
-
- /* SPIR-V programs use a NIR linker */
- if (shProg->data->spirv) {
- static const gl_nir_linker_options opts = {
- .fill_parameters = false,
- };
- if (!gl_nir_link_spirv(ctx, shProg, &opts))
- return GL_FALSE;
- }
-
- for (stage = 0; stage < ARRAY_SIZE(shProg->_LinkedShaders); stage++) {
- struct gl_linked_shader *shader = shProg->_LinkedShaders[stage];
- if (!shader)
- continue;
-
- struct gl_program *prog = shader->Program;
-
- brw_nir_lower_resources(prog->nir, shProg, prog, &brw->screen->devinfo);
-
- NIR_PASS_V(prog->nir, brw_nir_lower_gl_images, prog);
- }
-
- /* Determine first and last stage. */
- unsigned first = MESA_SHADER_STAGES;
- unsigned last = 0;
- for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
- if (!shProg->_LinkedShaders[i])
- continue;
- if (first == MESA_SHADER_STAGES)
- first = i;
- last = i;
- }
-
- /* Linking the stages in the opposite order (from fragment to vertex)
- * ensures that inter-shader outputs written to in an earlier stage
- * are eliminated if they are (transitively) not used in a later
- * stage.
- *
- * TODO: Look into Shadow of Mordor regressions on HSW and enable this for
- * all platforms. See: https://bugs.freedesktop.org/show_bug.cgi?id=103537
- */
- if (first != last && brw->screen->devinfo.ver >= 8) {
- int next = last;
- for (int i = next - 1; i >= 0; i--) {
- if (shProg->_LinkedShaders[i] == NULL)
- continue;
-
- brw_nir_link_shaders(compiler,
- shProg->_LinkedShaders[i]->Program->nir,
- shProg->_LinkedShaders[next]->Program->nir);
- next = i;
- }
- }
-
- for (stage = 0; stage < ARRAY_SIZE(shProg->_LinkedShaders); stage++) {
- struct gl_linked_shader *shader = shProg->_LinkedShaders[stage];
- if (!shader)
- continue;
-
- struct gl_program *prog = shader->Program;
-
- _mesa_update_shader_textures_used(shProg, prog);
-
- brw_shader_gather_info(prog->nir, prog);
-
- NIR_PASS_V(prog->nir, gl_nir_lower_atomics, shProg, false);
- NIR_PASS_V(prog->nir, nir_lower_atomics_to_ssbo);
-
- nir_sweep(prog->nir);
-
- infos[stage] = &prog->nir->info;
-
- update_xfb_info(prog->sh.LinkedTransformFeedback, infos[stage]);
-
- /* Make a pass over the IR to add state references for any built-in
- * uniforms that are used. This has to be done now (during linking).
- * Code generation doesn't happen until the first time this shader is
- * used for rendering. Waiting until then to generate the parameters is
- * too late. At that point, the values for the built-in uniforms won't
- * get sent to the shader.
- */
- nir_foreach_uniform_variable(var, prog->nir) {
- const nir_state_slot *const slots = var->state_slots;
- for (unsigned int i = 0; i < var->num_state_slots; i++) {
- assert(slots != NULL);
- _mesa_add_state_reference(prog->Parameters, slots[i].tokens);
- }
- }
- }
-
- /* The linker tries to dead code eliminate unused varying components,
- * and make sure interfaces match. But it isn't able to do so in all
- * cases. So, explicitly make the interfaces match by OR'ing together
- * the inputs_read/outputs_written bitfields of adjacent stages.
- */
- if (!shProg->SeparateShader)
- unify_interfaces(infos);
-
- if ((ctx->_Shader->Flags & GLSL_DUMP) && shProg->Name != 0) {
- for (unsigned i = 0; i < shProg->NumShaders; i++) {
- const struct gl_shader *sh = shProg->Shaders[i];
- if (!sh)
- continue;
-
- fprintf(stderr, "GLSL %s shader %d source for linked program %d:\n",
- _mesa_shader_stage_to_string(sh->Stage),
- i, shProg->Name);
- fprintf(stderr, "%s", sh->Source);
- fprintf(stderr, "\n");
- }
- }
-
- if (brw->precompile && !brw_shader_precompile(ctx, shProg))
- return GL_FALSE;
-
- /* SPIR-V programs build its resource list from linked NIR shaders. */
- if (!shProg->data->spirv)
- build_program_resource_list(ctx, shProg, false);
- else
- nir_build_program_resource_list(ctx, shProg, true);
-
- for (stage = 0; stage < ARRAY_SIZE(shProg->_LinkedShaders); stage++) {
- struct gl_linked_shader *shader = shProg->_LinkedShaders[stage];
- if (!shader)
- continue;
-
- /* The GLSL IR won't be needed anymore. */
- ralloc_free(shader->ir);
- shader->ir = NULL;
- }
-
- return GL_TRUE;
-}
+++ /dev/null
-/*
- * Copyright © 2014 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "brw_context.h"
-#include "brw_defines.h"
-#include "brw_fbo.h"
-#include "brw_meta_util.h"
-#include "brw_state.h"
-#include "main/blend.h"
-#include "main/fbobject.h"
-#include "util/format_srgb.h"
-
-/**
- * Helper function for handling mirror image blits.
- *
- * If coord0 > coord1, swap them and invert the "mirror" boolean.
- */
-static inline void
-fixup_mirroring(bool *mirror, float *coord0, float *coord1)
-{
- if (*coord0 > *coord1) {
- *mirror = !*mirror;
- float tmp = *coord0;
- *coord0 = *coord1;
- *coord1 = tmp;
- }
-}
-
-/**
- * Compute the number of pixels to clip for each side of a rect
- *
- * \param x0 The rect's left coordinate
- * \param y0 The rect's bottom coordinate
- * \param x1 The rect's right coordinate
- * \param y1 The rect's top coordinate
- * \param min_x The clipping region's left coordinate
- * \param min_y The clipping region's bottom coordinate
- * \param max_x The clipping region's right coordinate
- * \param max_y The clipping region's top coordinate
- * \param clipped_x0 The number of pixels to clip from the left side
- * \param clipped_y0 The number of pixels to clip from the bottom side
- * \param clipped_x1 The number of pixels to clip from the right side
- * \param clipped_y1 The number of pixels to clip from the top side
- *
- * \return false if we clip everything away, true otherwise
- */
-static inline bool
-compute_pixels_clipped(float x0, float y0, float x1, float y1,
- float min_x, float min_y, float max_x, float max_y,
- float *clipped_x0, float *clipped_y0, float *clipped_x1, float *clipped_y1)
-{
- /* If we are going to clip everything away, stop. */
- if (!(min_x <= max_x &&
- min_y <= max_y &&
- x0 <= max_x &&
- y0 <= max_y &&
- min_x <= x1 &&
- min_y <= y1 &&
- x0 <= x1 &&
- y0 <= y1)) {
- return false;
- }
-
- if (x0 < min_x)
- *clipped_x0 = min_x - x0;
- else
- *clipped_x0 = 0;
- if (max_x < x1)
- *clipped_x1 = x1 - max_x;
- else
- *clipped_x1 = 0;
-
- if (y0 < min_y)
- *clipped_y0 = min_y - y0;
- else
- *clipped_y0 = 0;
- if (max_y < y1)
- *clipped_y1 = y1 - max_y;
- else
- *clipped_y1 = 0;
-
- return true;
-}
-
-/**
- * Clips a coordinate (left, right, top or bottom) for the src or dst rect
- * (whichever requires the largest clip) and adjusts the coordinate
- * for the other rect accordingly.
- *
- * \param mirror true if mirroring is required
- * \param src the source rect coordinate (for example srcX0)
- * \param dst0 the dst rect coordinate (for example dstX0)
- * \param dst1 the opposite dst rect coordinate (for example dstX1)
- * \param clipped_src0 number of pixels to clip from the src coordinate
- * \param clipped_dst0 number of pixels to clip from the dst coordinate
- * \param clipped_dst1 number of pixels to clip from the opposite dst coordinate
- * \param scale the src vs dst scale involved for that coordinate
- * \param isLeftOrBottom true if we are clipping the left or bottom sides
- * of the rect.
- */
-static inline void
-clip_coordinates(bool mirror,
- float *src, float *dst0, float *dst1,
- float clipped_src0,
- float clipped_dst0,
- float clipped_dst1,
- float scale,
- bool isLeftOrBottom)
-{
- /* When clipping we need to add or subtract pixels from the original
- * coordinates depending on whether we are acting on the left/bottom
- * or right/top sides of the rect respectively. We assume we have to
- * add them in the code below, and multiply by -1 when we should
- * subtract.
- */
- int mult = isLeftOrBottom ? 1 : -1;
-
- if (!mirror) {
- if (clipped_src0 >= clipped_dst0 * scale) {
- *src += clipped_src0 * mult;
- *dst0 += clipped_src0 / scale * mult;
- } else {
- *dst0 += clipped_dst0 * mult;
- *src += clipped_dst0 * scale * mult;
- }
- } else {
- if (clipped_src0 >= clipped_dst1 * scale) {
- *src += clipped_src0 * mult;
- *dst1 -= clipped_src0 / scale * mult;
- } else {
- *dst1 -= clipped_dst1 * mult;
- *src += clipped_dst1 * scale * mult;
- }
- }
-}
-
-bool
-brw_meta_mirror_clip_and_scissor(const struct gl_context *ctx,
- const struct gl_framebuffer *read_fb,
- const struct gl_framebuffer *draw_fb,
- GLfloat *srcX0, GLfloat *srcY0,
- GLfloat *srcX1, GLfloat *srcY1,
- GLfloat *dstX0, GLfloat *dstY0,
- GLfloat *dstX1, GLfloat *dstY1,
- bool *mirror_x, bool *mirror_y)
-{
- *mirror_x = false;
- *mirror_y = false;
-
- /* Detect if the blit needs to be mirrored */
- fixup_mirroring(mirror_x, srcX0, srcX1);
- fixup_mirroring(mirror_x, dstX0, dstX1);
- fixup_mirroring(mirror_y, srcY0, srcY1);
- fixup_mirroring(mirror_y, dstY0, dstY1);
-
- /* Compute number of pixels to clip for each side of both rects. Return
- * early if we are going to clip everything away.
- */
- float clip_src_x0;
- float clip_src_x1;
- float clip_src_y0;
- float clip_src_y1;
- float clip_dst_x0;
- float clip_dst_x1;
- float clip_dst_y0;
- float clip_dst_y1;
-
- if (!compute_pixels_clipped(*srcX0, *srcY0, *srcX1, *srcY1,
- 0, 0, read_fb->Width, read_fb->Height,
- &clip_src_x0, &clip_src_y0, &clip_src_x1, &clip_src_y1))
- return true;
-
- if (!compute_pixels_clipped(*dstX0, *dstY0, *dstX1, *dstY1,
- draw_fb->_Xmin, draw_fb->_Ymin, draw_fb->_Xmax, draw_fb->_Ymax,
- &clip_dst_x0, &clip_dst_y0, &clip_dst_x1, &clip_dst_y1))
- return true;
-
- /* When clipping any of the two rects we need to adjust the coordinates in
- * the other rect considering the scaling factor involved. To obtain the best
- * precision we want to make sure that we only clip once per side to avoid
- * accumulating errors due to the scaling adjustment.
- *
- * For example, if srcX0 and dstX0 need both to be clipped we want to avoid
- * the situation where we clip srcX0 first, then adjust dstX0 accordingly
- * but then we realize that the resulting dstX0 still needs to be clipped,
- * so we clip dstX0 and adjust srcX0 again. Because we are applying scaling
- * factors to adjust the coordinates in each clipping pass we lose some
- * precision and that can affect the results of the blorp blit operation
- * slightly. What we want to do here is detect the rect that we should
- * clip first for each side so that when we adjust the other rect we ensure
- * the resulting coordinate does not need to be clipped again.
- *
- * The code below implements this by comparing the number of pixels that
- * we need to clip for each side of both rects considering the scales
- * involved. For example, clip_src_x0 represents the number of pixels to be
- * clipped for the src rect's left side, so if clip_src_x0 = 5,
- * clip_dst_x0 = 4 and scaleX = 2 it means that we are clipping more from
- * the dst rect so we should clip dstX0 only and adjust srcX0. This is
- * because clipping 4 pixels in the dst is equivalent to clipping
- * 4 * 2 = 8 > 5 in the src.
- */
-
- if (*srcX0 == *srcX1 || *srcY0 == *srcY1
- || *dstX0 == *dstX1 || *dstY0 == *dstY1)
- return true;
-
- float scaleX = (float) (*srcX1 - *srcX0) / (*dstX1 - *dstX0);
- float scaleY = (float) (*srcY1 - *srcY0) / (*dstY1 - *dstY0);
-
- /* Clip left side */
- clip_coordinates(*mirror_x,
- srcX0, dstX0, dstX1,
- clip_src_x0, clip_dst_x0, clip_dst_x1,
- scaleX, true);
-
- /* Clip right side */
- clip_coordinates(*mirror_x,
- srcX1, dstX1, dstX0,
- clip_src_x1, clip_dst_x1, clip_dst_x0,
- scaleX, false);
-
- /* Clip bottom side */
- clip_coordinates(*mirror_y,
- srcY0, dstY0, dstY1,
- clip_src_y0, clip_dst_y0, clip_dst_y1,
- scaleY, true);
-
- /* Clip top side */
- clip_coordinates(*mirror_y,
- srcY1, dstY1, dstY0,
- clip_src_y1, clip_dst_y1, clip_dst_y0,
- scaleY, false);
-
- /* Account for the fact that in the system framebuffer, the origin is at
- * the lower left.
- */
- if (read_fb->FlipY) {
- GLint tmp = read_fb->Height - *srcY0;
- *srcY0 = read_fb->Height - *srcY1;
- *srcY1 = tmp;
- *mirror_y = !*mirror_y;
- }
- if (draw_fb->FlipY) {
- GLint tmp = draw_fb->Height - *dstY0;
- *dstY0 = draw_fb->Height - *dstY1;
- *dstY1 = tmp;
- *mirror_y = !*mirror_y;
- }
-
- /* Check for invalid bounds
- * Can't blit for 0-dimensions
- */
- return *srcX0 == *srcX1 || *srcY0 == *srcY1
- || *dstX0 == *dstX1 || *dstY0 == *dstY1;
-}
-
-/**
- * Determine if fast color clear supports the given clear color.
- *
- * Fast color clear can only clear to color values of 1.0 or 0.0. At the
- * moment we only support floating point, unorm, and snorm buffers.
- */
-bool
-brw_is_color_fast_clear_compatible(struct brw_context *brw,
- const struct brw_mipmap_tree *mt,
- const union gl_color_union *color)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- const struct gl_context *ctx = &brw->ctx;
-
- /* If we're mapping the render format to a different format than the
- * format we use for texturing then it is a bit questionable whether it
- * should be possible to use a fast clear. Although we only actually
- * render using a renderable format, without the override workaround it
- * wouldn't be possible to have a non-renderable surface in a fast clear
- * state so the hardware probably legitimately doesn't need to support
- * this case. At least on Gfx9 this really does seem to cause problems.
- */
- if (devinfo->ver >= 9 &&
- brw_isl_format_for_mesa_format(mt->format) !=
- brw->mesa_to_isl_render_format[mt->format])
- return false;
-
- const mesa_format format = _mesa_get_render_format(ctx, mt->format);
- if (_mesa_is_format_integer_color(format)) {
- if (devinfo->ver >= 8) {
- perf_debug("Integer fast clear not enabled for (%s)",
- _mesa_get_format_name(format));
- }
- return false;
- }
-
- for (int i = 0; i < 4; i++) {
- if (!_mesa_format_has_color_component(format, i)) {
- continue;
- }
-
- if (devinfo->ver < 9 &&
- color->f[i] != 0.0f && color->f[i] != 1.0f) {
- return false;
- }
- }
- return true;
-}
-
-/**
- * Convert the given color to a bitfield suitable for ORing into DWORD 7 of
- * SURFACE_STATE (DWORD 12-15 on SKL+).
- */
-union isl_color_value
-brw_meta_convert_fast_clear_color(const struct brw_context *brw,
- const struct brw_mipmap_tree *mt,
- const union gl_color_union *color)
-{
- union isl_color_value override_color = {
- .u32 = {
- color->ui[0],
- color->ui[1],
- color->ui[2],
- color->ui[3],
- },
- };
-
- /* The sampler doesn't look at the format of the surface when the fast
- * clear color is used so we need to implement luminance, intensity and
- * missing components manually.
- */
- switch (_mesa_get_format_base_format(mt->format)) {
- case GL_INTENSITY:
- override_color.u32[3] = override_color.u32[0];
- FALLTHROUGH;
- case GL_LUMINANCE:
- case GL_LUMINANCE_ALPHA:
- override_color.u32[1] = override_color.u32[0];
- override_color.u32[2] = override_color.u32[0];
- break;
- default:
- for (int i = 0; i < 3; i++) {
- if (!_mesa_format_has_color_component(mt->format, i))
- override_color.u32[i] = 0;
- }
- break;
- }
-
- switch (_mesa_get_format_datatype(mt->format)) {
- case GL_UNSIGNED_NORMALIZED:
- for (int i = 0; i < 4; i++)
- override_color.f32[i] = SATURATE(override_color.f32[i]);
- break;
-
- case GL_SIGNED_NORMALIZED:
- for (int i = 0; i < 4; i++)
- override_color.f32[i] = CLAMP(override_color.f32[i], -1.0f, 1.0f);
- break;
-
- case GL_UNSIGNED_INT:
- for (int i = 0; i < 4; i++) {
- unsigned bits = _mesa_get_format_bits(mt->format, GL_RED_BITS + i);
- if (bits < 32) {
- uint32_t max = (1u << bits) - 1;
- override_color.u32[i] = MIN2(override_color.u32[i], max);
- }
- }
- break;
-
- case GL_INT:
- for (int i = 0; i < 4; i++) {
- unsigned bits = _mesa_get_format_bits(mt->format, GL_RED_BITS + i);
- if (bits < 32) {
- int32_t max = (1 << (bits - 1)) - 1;
- int32_t min = -(1 << (bits - 1));
- override_color.i32[i] = CLAMP(override_color.i32[i], min, max);
- }
- }
- break;
-
- case GL_FLOAT:
- if (!_mesa_is_format_signed(mt->format)) {
- for (int i = 0; i < 4; i++)
- override_color.f32[i] = MAX2(override_color.f32[i], 0.0f);
- }
- break;
- }
-
- if (!_mesa_format_has_color_component(mt->format, 3)) {
- if (_mesa_is_format_integer_color(mt->format))
- override_color.u32[3] = 1;
- else
- override_color.f32[3] = 1.0f;
- }
-
- /* Handle linear to SRGB conversion */
- if (brw->ctx.Color.sRGBEnabled &&
- _mesa_get_srgb_format_linear(mt->format) != mt->format) {
- for (int i = 0; i < 3; i++) {
- override_color.f32[i] =
- util_format_linear_to_srgb_float(override_color.f32[i]);
- }
- }
-
- return override_color;
-}
+++ /dev/null
-/*
- * Copyright © 2014 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#ifndef BRW_META_UTIL_H
-#define BRW_META_UTIL_H
-
-#include <stdbool.h>
-#include "main/mtypes.h"
-#include "brw_mipmap_tree.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-bool
-brw_meta_mirror_clip_and_scissor(const struct gl_context *ctx,
- const struct gl_framebuffer *read_fb,
- const struct gl_framebuffer *draw_fb,
- GLfloat *srcX0, GLfloat *srcY0,
- GLfloat *srcX1, GLfloat *srcY1,
- GLfloat *dstX0, GLfloat *dstY0,
- GLfloat *dstX1, GLfloat *dstY1,
- bool *mirror_x, bool *mirror_y);
-
-union isl_color_value
-brw_meta_convert_fast_clear_color(const struct brw_context *brw,
- const struct brw_mipmap_tree *mt,
- const union gl_color_union *color);
-
-bool
-brw_is_color_fast_clear_compatible(struct brw_context *brw,
- const struct brw_mipmap_tree *mt,
- const union gl_color_union *color);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* BRW_META_UTIL_H */
+++ /dev/null
-/*
- * Copyright 2006 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include <GL/gl.h>
-#include <GL/internal/dri_interface.h>
-#include "drm-uapi/drm_fourcc.h"
-
-#include "brw_batch.h"
-#include "brw_image.h"
-#include "brw_mipmap_tree.h"
-#include "brw_tex.h"
-#include "brw_blit.h"
-#include "brw_fbo.h"
-
-#include "brw_blorp.h"
-#include "brw_context.h"
-#include "brw_state.h"
-
-#include "main/enums.h"
-#include "main/fbobject.h"
-#include "main/formats.h"
-#include "main/glformats.h"
-#include "main/texcompress_etc.h"
-#include "main/teximage.h"
-#include "main/streaming-load-memcpy.h"
-
-#include "util/format_srgb.h"
-#include "util/u_memory.h"
-
-#include "x86/common_x86_asm.h"
-
-#define FILE_DEBUG_FLAG DEBUG_MIPTREE
-
-static void *brw_miptree_map_raw(struct brw_context *brw,
- struct brw_mipmap_tree *mt,
- GLbitfield mode);
-
-static void brw_miptree_unmap_raw(struct brw_mipmap_tree *mt);
-
-/**
- * Return true if the format that will be used to access the miptree is
- * CCS_E-compatible with the miptree's linear/non-sRGB format.
- *
- * Why use the linear format? Well, although the miptree may be specified with
- * an sRGB format, the usage of that color space/format can be toggled. Since
- * our HW tends to support more linear formats than sRGB ones, we use this
- * format variant for check for CCS_E compatibility.
- */
-static bool
-format_ccs_e_compat_with_miptree(const struct intel_device_info *devinfo,
- const struct brw_mipmap_tree *mt,
- enum isl_format access_format)
-{
- assert(mt->aux_usage == ISL_AUX_USAGE_CCS_E);
-
- mesa_format linear_format = _mesa_get_srgb_format_linear(mt->format);
- enum isl_format isl_format = brw_isl_format_for_mesa_format(linear_format);
- return isl_formats_are_ccs_e_compatible(devinfo, isl_format, access_format);
-}
-
-/* Determine if CCS_E is supported for a given platform and mesa format. */
-static bool
-format_supports_ccs_e(const struct brw_context *brw, mesa_format format)
-{
- /* For now compression is only enabled for integer formats even though
- * there exist supported floating point formats also. This is a heuristic
- * decision based on current public benchmarks. In none of the cases these
- * formats provided any improvement but a few cases were seen to regress.
- * Hence these are left to to be enabled in the future when they are known
- * to improve things.
- */
- if (_mesa_get_format_datatype(format) == GL_FLOAT)
- return false;
-
- /* Many window system buffers are sRGB even if they are never rendered as
- * sRGB. For those, we want CCS_E for when sRGBEncode is false. When the
- * surface is used as sRGB, we fall back to CCS_D.
- */
- mesa_format linear_format = _mesa_get_srgb_format_linear(format);
- enum isl_format isl_format = brw_isl_format_for_mesa_format(linear_format);
- return isl_format_supports_ccs_e(&brw->screen->devinfo, isl_format);
-}
-
-/**
- * Determine depth format corresponding to a depth+stencil format,
- * for separate stencil.
- */
-mesa_format
-brw_depth_format_for_depthstencil_format(mesa_format format) {
- switch (format) {
- case MESA_FORMAT_Z24_UNORM_S8_UINT:
- return MESA_FORMAT_Z24_UNORM_X8_UINT;
- case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
- return MESA_FORMAT_Z_FLOAT32;
- default:
- return format;
- }
-}
-
-static bool
-create_mapping_table(GLenum target, unsigned first_level, unsigned last_level,
- unsigned depth0, struct brw_mipmap_level *table)
-{
- for (unsigned level = first_level; level <= last_level; level++) {
- const unsigned d =
- target == GL_TEXTURE_3D ? minify(depth0, level) : depth0;
-
- table[level].slice = calloc(d, sizeof(*table[0].slice));
- if (!table[level].slice)
- goto unwind;
- }
-
- return true;
-
-unwind:
- for (unsigned level = first_level; level <= last_level; level++)
- free(table[level].slice);
-
- return false;
-}
-
-static bool
-needs_separate_stencil(const struct brw_context *brw,
- struct brw_mipmap_tree *mt,
- mesa_format format)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- if (_mesa_get_format_base_format(format) != GL_DEPTH_STENCIL)
- return false;
-
- if (devinfo->must_use_separate_stencil)
- return true;
-
- return brw->has_separate_stencil && brw->has_hiz;
-}
-
-/**
- * Choose the aux usage for this miptree. This function must be called fairly
- * late in the miptree create process after we have a tiling.
- */
-static void
-brw_miptree_choose_aux_usage(struct brw_context *brw,
- struct brw_mipmap_tree *mt)
-{
- assert(mt->aux_usage == ISL_AUX_USAGE_NONE);
-
- if (_mesa_is_format_color_format(mt->format)) {
- if (mt->surf.samples > 1) {
- mt->aux_usage = ISL_AUX_USAGE_MCS;
- } else if (!INTEL_DEBUG(DEBUG_NO_RBC) &&
- format_supports_ccs_e(brw, mt->format)) {
- mt->aux_usage = ISL_AUX_USAGE_CCS_E;
- } else if (brw->mesa_format_supports_render[mt->format]) {
- mt->aux_usage = ISL_AUX_USAGE_CCS_D;
- }
- } else if (isl_surf_usage_is_depth(mt->surf.usage) && brw->has_hiz) {
- mt->aux_usage = ISL_AUX_USAGE_HIZ;
- }
-
- /* We can do fast-clear on all auxiliary surface types that are
- * allocated through the normal texture creation paths.
- */
- if (mt->aux_usage != ISL_AUX_USAGE_NONE)
- mt->supports_fast_clear = true;
-}
-
-
-/**
- * Choose an appropriate uncompressed format for a requested
- * compressed format, if unsupported.
- */
-mesa_format
-brw_lower_compressed_format(struct brw_context *brw, mesa_format format)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- /* No need to lower ETC formats on these platforms,
- * they are supported natively.
- */
- if (devinfo->ver >= 8 || devinfo->platform == INTEL_PLATFORM_BYT)
- return format;
-
- switch (format) {
- case MESA_FORMAT_ETC1_RGB8:
- return MESA_FORMAT_R8G8B8X8_UNORM;
- case MESA_FORMAT_ETC2_RGB8:
- return MESA_FORMAT_R8G8B8X8_UNORM;
- case MESA_FORMAT_ETC2_SRGB8:
- case MESA_FORMAT_ETC2_SRGB8_ALPHA8_EAC:
- case MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1:
- return MESA_FORMAT_B8G8R8A8_SRGB;
- case MESA_FORMAT_ETC2_RGBA8_EAC:
- case MESA_FORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1:
- return MESA_FORMAT_R8G8B8A8_UNORM;
- case MESA_FORMAT_ETC2_R11_EAC:
- return MESA_FORMAT_R_UNORM16;
- case MESA_FORMAT_ETC2_SIGNED_R11_EAC:
- return MESA_FORMAT_R_SNORM16;
- case MESA_FORMAT_ETC2_RG11_EAC:
- return MESA_FORMAT_RG_UNORM16;
- case MESA_FORMAT_ETC2_SIGNED_RG11_EAC:
- return MESA_FORMAT_RG_SNORM16;
- default:
- /* Non ETC1 / ETC2 format */
- return format;
- }
-}
-
-unsigned
-brw_get_num_logical_layers(const struct brw_mipmap_tree *mt, unsigned level)
-{
- if (mt->surf.dim == ISL_SURF_DIM_3D)
- return minify(mt->surf.logical_level0_px.depth, level);
- else
- return mt->surf.logical_level0_px.array_len;
-}
-
-UNUSED static unsigned
-get_num_phys_layers(const struct isl_surf *surf, unsigned level)
-{
- /* In case of physical dimensions one needs to consider also the layout.
- * See isl_calc_phys_level0_extent_sa().
- */
- if (surf->dim != ISL_SURF_DIM_3D)
- return surf->phys_level0_sa.array_len;
-
- if (surf->dim_layout == ISL_DIM_LAYOUT_GFX4_2D)
- return minify(surf->phys_level0_sa.array_len, level);
-
- return minify(surf->phys_level0_sa.depth, level);
-}
-
-/** \brief Assert that the level and layer are valid for the miptree. */
-void
-brw_miptree_check_level_layer(const struct brw_mipmap_tree *mt,
- uint32_t level,
- uint32_t layer)
-{
- (void) mt;
- (void) level;
- (void) layer;
-
- assert(level >= mt->first_level);
- assert(level <= mt->last_level);
- assert(layer < get_num_phys_layers(&mt->surf, level));
-}
-
-static enum isl_aux_state **
-create_aux_state_map(struct brw_mipmap_tree *mt,
- enum isl_aux_state initial)
-{
- const uint32_t levels = mt->last_level + 1;
-
- uint32_t total_slices = 0;
- for (uint32_t level = 0; level < levels; level++)
- total_slices += brw_get_num_logical_layers(mt, level);
-
- const size_t per_level_array_size = levels * sizeof(enum isl_aux_state *);
-
- /* We're going to allocate a single chunk of data for both the per-level
- * reference array and the arrays of aux_state. This makes cleanup
- * significantly easier.
- */
- const size_t total_size = per_level_array_size +
- total_slices * sizeof(enum isl_aux_state);
- void *data = malloc(total_size);
- if (data == NULL)
- return NULL;
-
- enum isl_aux_state **per_level_arr = data;
- enum isl_aux_state *s = data + per_level_array_size;
- for (uint32_t level = 0; level < levels; level++) {
- per_level_arr[level] = s;
- const unsigned level_layers = brw_get_num_logical_layers(mt, level);
- for (uint32_t a = 0; a < level_layers; a++)
- *(s++) = initial;
- }
- assert((void *)s == data + total_size);
-
- return per_level_arr;
-}
-
-static void
-free_aux_state_map(enum isl_aux_state **state)
-{
- free(state);
-}
-
-static bool
-need_to_retile_as_linear(struct brw_context *brw, unsigned blt_pitch,
- enum isl_tiling tiling, unsigned samples)
-{
- if (samples > 1)
- return false;
-
- if (tiling == ISL_TILING_LINEAR)
- return false;
-
- if (blt_pitch >= 32768) {
- perf_debug("blt pitch %u too large to blit, falling back to untiled",
- blt_pitch);
- return true;
- }
-
- return false;
-}
-
-static bool
-need_to_retile_as_x(const struct brw_context *brw, uint64_t size,
- enum isl_tiling tiling)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- /* If the BO is too large to fit in the aperture, we need to use the
- * BLT engine to support it. Prior to Sandybridge, the BLT paths can't
- * handle Y-tiling, so we need to fall back to X.
- */
- if (devinfo->ver < 6 && size >= brw->max_gtt_map_object_size &&
- tiling == ISL_TILING_Y0)
- return true;
-
- return false;
-}
-
-static struct brw_mipmap_tree *
-make_surface(struct brw_context *brw, GLenum target, mesa_format format,
- unsigned first_level, unsigned last_level,
- unsigned width0, unsigned height0, unsigned depth0,
- unsigned num_samples, isl_tiling_flags_t tiling_flags,
- isl_surf_usage_flags_t isl_usage_flags, uint32_t alloc_flags,
- unsigned row_pitch_B, struct brw_bo *bo)
-{
- struct brw_mipmap_tree *mt = calloc(sizeof(*mt), 1);
- if (!mt)
- return NULL;
-
- if (!create_mapping_table(target, first_level, last_level, depth0,
- mt->level)) {
- free(mt);
- return NULL;
- }
-
- mt->refcount = 1;
-
- if (target == GL_TEXTURE_CUBE_MAP ||
- target == GL_TEXTURE_CUBE_MAP_ARRAY)
- isl_usage_flags |= ISL_SURF_USAGE_CUBE_BIT;
-
- DBG("%s: %s %s %ux %u:%u:%u %d..%d <-- %p\n",
- __func__,
- _mesa_enum_to_string(target),
- _mesa_get_format_name(format),
- num_samples, width0, height0, depth0,
- first_level, last_level, mt);
-
- struct isl_surf_init_info init_info = {
- .dim = get_isl_surf_dim(target),
- .format = translate_tex_format(brw, format, false),
- .width = width0,
- .height = height0,
- .depth = target == GL_TEXTURE_3D ? depth0 : 1,
- .levels = last_level - first_level + 1,
- .array_len = target == GL_TEXTURE_3D ? 1 : depth0,
- .samples = num_samples,
- .row_pitch_B = row_pitch_B,
- .usage = isl_usage_flags,
- .tiling_flags = tiling_flags,
- };
-
- if (!isl_surf_init_s(&brw->isl_dev, &mt->surf, &init_info))
- goto fail;
-
- /* Depth surfaces are always Y-tiled and stencil is always W-tiled, although
- * on gfx7 platforms we also need to create Y-tiled copies of stencil for
- * texturing since the hardware can't sample from W-tiled surfaces. For
- * everything else, check for corner cases needing special treatment.
- */
- bool is_depth_stencil =
- mt->surf.usage & (ISL_SURF_USAGE_STENCIL_BIT | ISL_SURF_USAGE_DEPTH_BIT);
- if (!is_depth_stencil) {
- if (need_to_retile_as_linear(brw, brw_miptree_blt_pitch(mt),
- mt->surf.tiling, mt->surf.samples)) {
- init_info.tiling_flags = 1u << ISL_TILING_LINEAR;
- if (!isl_surf_init_s(&brw->isl_dev, &mt->surf, &init_info))
- goto fail;
- } else if (need_to_retile_as_x(brw, mt->surf.size_B, mt->surf.tiling)) {
- init_info.tiling_flags = 1u << ISL_TILING_X;
- if (!isl_surf_init_s(&brw->isl_dev, &mt->surf, &init_info))
- goto fail;
- }
- }
-
- /* In case of linear the buffer gets padded by fixed 64 bytes and therefore
- * the size may not be multiple of row_pitch.
- * See isl_apply_surface_padding().
- */
- if (mt->surf.tiling != ISL_TILING_LINEAR)
- assert(mt->surf.size_B % mt->surf.row_pitch_B == 0);
-
- if (!bo) {
- mt->bo = brw_bo_alloc_tiled(brw->bufmgr, "isl-miptree",
- mt->surf.size_B,
- BRW_MEMZONE_OTHER,
- isl_tiling_to_i915_tiling(
- mt->surf.tiling),
- mt->surf.row_pitch_B, alloc_flags);
- if (!mt->bo)
- goto fail;
- } else {
- mt->bo = bo;
- }
-
- mt->first_level = first_level;
- mt->last_level = last_level;
- mt->target = target;
- mt->format = format;
- mt->aux_state = NULL;
- mt->cpp = isl_format_get_layout(mt->surf.format)->bpb / 8;
- mt->compressed = _mesa_is_format_compressed(format);
- mt->drm_modifier = DRM_FORMAT_MOD_INVALID;
-
- return mt;
-
-fail:
- brw_miptree_release(&mt);
- return NULL;
-}
-
-/* Return the usual surface usage flags for the given format. */
-static isl_surf_usage_flags_t
-mt_surf_usage(mesa_format format)
-{
- switch(_mesa_get_format_base_format(format)) {
- case GL_DEPTH_COMPONENT:
- return ISL_SURF_USAGE_DEPTH_BIT | ISL_SURF_USAGE_TEXTURE_BIT;
- case GL_DEPTH_STENCIL:
- return ISL_SURF_USAGE_DEPTH_BIT | ISL_SURF_USAGE_STENCIL_BIT |
- ISL_SURF_USAGE_TEXTURE_BIT;
- case GL_STENCIL_INDEX:
- return ISL_SURF_USAGE_STENCIL_BIT | ISL_SURF_USAGE_TEXTURE_BIT;
- default:
- return ISL_SURF_USAGE_RENDER_TARGET_BIT | ISL_SURF_USAGE_TEXTURE_BIT;
- }
-}
-
-static struct brw_mipmap_tree *
-miptree_create(struct brw_context *brw,
- GLenum target,
- mesa_format format,
- GLuint first_level,
- GLuint last_level,
- GLuint width0,
- GLuint height0,
- GLuint depth0,
- GLuint num_samples,
- enum brw_miptree_create_flags flags)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- const uint32_t alloc_flags =
- (flags & MIPTREE_CREATE_BUSY || num_samples > 1) ? BO_ALLOC_BUSY : 0;
- isl_tiling_flags_t tiling_flags = ISL_TILING_ANY_MASK;
-
- /* TODO: This used to be because there wasn't BLORP to handle Y-tiling. */
- if (devinfo->ver < 6 && _mesa_is_format_color_format(format))
- tiling_flags &= ~ISL_TILING_Y0_BIT;
-
- mesa_format mt_fmt = format;
- if (!_mesa_is_format_color_format(format) && devinfo->ver >= 6) {
- /* Fix up the Z miptree format for how we're splitting out separate
- * stencil. Gfx7 expects there to be no stencil bits in its depth buffer.
- */
- mt_fmt = brw_depth_format_for_depthstencil_format(format);
- }
-
- struct brw_mipmap_tree *mt =
- make_surface(brw, target, mt_fmt, first_level, last_level,
- width0, height0, depth0, num_samples,
- tiling_flags, mt_surf_usage(mt_fmt),
- alloc_flags, 0, NULL);
-
- if (mt == NULL)
- return NULL;
-
- if (brw_miptree_needs_fake_etc(brw, mt)) {
- mesa_format decomp_format = brw_lower_compressed_format(brw, format);
- mt->shadow_mt = make_surface(brw, target, decomp_format, first_level,
- last_level, width0, height0, depth0,
- num_samples, tiling_flags,
- mt_surf_usage(decomp_format),
- alloc_flags, 0, NULL);
-
- if (mt->shadow_mt == NULL) {
- brw_miptree_release(&mt);
- return NULL;
- }
- }
-
- if (needs_separate_stencil(brw, mt, format)) {
- mt->stencil_mt =
- make_surface(brw, target, MESA_FORMAT_S_UINT8, first_level, last_level,
- width0, height0, depth0, num_samples,
- ISL_TILING_W_BIT, mt_surf_usage(MESA_FORMAT_S_UINT8),
- alloc_flags, 0, NULL);
- if (mt->stencil_mt == NULL) {
- brw_miptree_release(&mt);
- return NULL;
- }
- }
-
- if (!(flags & MIPTREE_CREATE_NO_AUX))
- brw_miptree_choose_aux_usage(brw, mt);
-
- return mt;
-}
-
-struct brw_mipmap_tree *
-brw_miptree_create(struct brw_context *brw,
- GLenum target,
- mesa_format format,
- GLuint first_level,
- GLuint last_level,
- GLuint width0,
- GLuint height0,
- GLuint depth0,
- GLuint num_samples,
- enum brw_miptree_create_flags flags)
-{
- assert(num_samples > 0);
-
- struct brw_mipmap_tree *mt = miptree_create(
- brw, target, format,
- first_level, last_level,
- width0, height0, depth0, num_samples,
- flags);
- if (!mt)
- return NULL;
-
- mt->offset = 0;
-
- /* Create the auxiliary surface up-front. CCS_D, on the other hand, can only
- * compress clear color so we wait until an actual fast-clear to allocate
- * it.
- */
- if (mt->aux_usage != ISL_AUX_USAGE_CCS_D &&
- !brw_miptree_alloc_aux(brw, mt)) {
- mt->aux_usage = ISL_AUX_USAGE_NONE;
- mt->supports_fast_clear = false;
- }
-
- return mt;
-}
-
-struct brw_mipmap_tree *
-brw_miptree_create_for_bo(struct brw_context *brw,
- struct brw_bo *bo,
- mesa_format format,
- uint32_t offset,
- uint32_t width,
- uint32_t height,
- uint32_t depth,
- int pitch,
- enum isl_tiling tiling,
- enum brw_miptree_create_flags flags)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- struct brw_mipmap_tree *mt;
- const GLenum target = depth > 1 ? GL_TEXTURE_2D_ARRAY : GL_TEXTURE_2D;
- const GLenum base_format = _mesa_get_format_base_format(format);
-
- if ((base_format == GL_DEPTH_COMPONENT ||
- base_format == GL_DEPTH_STENCIL)) {
- const mesa_format mt_fmt = (devinfo->ver < 6) ? format :
- brw_depth_format_for_depthstencil_format(format);
- mt = make_surface(brw, target, mt_fmt,
- 0, 0, width, height, depth, 1, ISL_TILING_Y0_BIT,
- mt_surf_usage(mt_fmt),
- 0, pitch, bo);
- if (!mt)
- return NULL;
-
- brw_bo_reference(bo);
-
- if (!(flags & MIPTREE_CREATE_NO_AUX))
- brw_miptree_choose_aux_usage(brw, mt);
-
- return mt;
- } else if (format == MESA_FORMAT_S_UINT8) {
- mt = make_surface(brw, target, MESA_FORMAT_S_UINT8,
- 0, 0, width, height, depth, 1,
- ISL_TILING_W_BIT,
- mt_surf_usage(MESA_FORMAT_S_UINT8),
- 0, pitch, bo);
- if (!mt)
- return NULL;
-
- assert(bo->size >= mt->surf.size_B);
-
- brw_bo_reference(bo);
- return mt;
- }
-
- /* Nothing will be able to use this miptree with the BO if the offset isn't
- * aligned.
- */
- if (tiling != ISL_TILING_LINEAR)
- assert(offset % 4096 == 0);
-
- /* miptrees can't handle negative pitch. If you need flipping of images,
- * that's outside of the scope of the mt.
- */
- assert(pitch >= 0);
-
- mt = make_surface(brw, target, format,
- 0, 0, width, height, depth, 1,
- 1lu << tiling,
- mt_surf_usage(format),
- 0, pitch, bo);
- if (!mt)
- return NULL;
-
- brw_bo_reference(bo);
- mt->bo = bo;
- mt->offset = offset;
-
- if (!(flags & MIPTREE_CREATE_NO_AUX)) {
- brw_miptree_choose_aux_usage(brw, mt);
-
- /* Create the auxiliary surface up-front. CCS_D, on the other hand, can
- * only compress clear color so we wait until an actual fast-clear to
- * allocate it.
- */
- if (mt->aux_usage != ISL_AUX_USAGE_CCS_D &&
- !brw_miptree_alloc_aux(brw, mt)) {
- mt->aux_usage = ISL_AUX_USAGE_NONE;
- mt->supports_fast_clear = false;
- }
- }
-
- return mt;
-}
-
-static struct brw_mipmap_tree *
-miptree_create_for_planar_image(struct brw_context *brw,
- __DRIimage *image, GLenum target,
- enum isl_tiling tiling)
-{
- const struct brw_image_format *f = image->planar_format;
- struct brw_mipmap_tree *planar_mt = NULL;
-
- for (int i = 0; i < f->nplanes; i++) {
- const int index = f->planes[i].buffer_index;
- const uint32_t dri_format = f->planes[i].dri_format;
- const mesa_format format = driImageFormatToGLFormat(dri_format);
- const uint32_t width = image->width >> f->planes[i].width_shift;
- const uint32_t height = image->height >> f->planes[i].height_shift;
-
- /* Disable creation of the texture's aux buffers because the driver
- * exposes no EGL API to manage them. That is, there is no API for
- * resolving the aux buffer's content to the main buffer nor for
- * invalidating the aux buffer's content.
- */
- struct brw_mipmap_tree *mt =
- brw_miptree_create_for_bo(brw, image->bo, format,
- image->offsets[index],
- width, height, 1,
- image->strides[index],
- tiling,
- MIPTREE_CREATE_NO_AUX);
- if (mt == NULL) {
- brw_miptree_release(&planar_mt);
- return NULL;
- }
-
- mt->target = target;
-
- if (i == 0)
- planar_mt = mt;
- else
- planar_mt->plane[i - 1] = mt;
- }
-
- planar_mt->drm_modifier = image->modifier;
-
- return planar_mt;
-}
-
-static bool
-create_ccs_buf_for_image(struct brw_context *brw,
- __DRIimage *image,
- struct brw_mipmap_tree *mt,
- enum isl_aux_state initial_state)
-{
- struct isl_surf temp_ccs_surf = {0,};
-
- /* CCS is only supported for very simple miptrees */
- assert(image->aux_offset != 0 && image->aux_pitch != 0);
- assert(image->tile_x == 0 && image->tile_y == 0);
- assert(mt->surf.samples == 1);
- assert(mt->surf.levels == 1);
- assert(mt->surf.logical_level0_px.depth == 1);
- assert(mt->surf.logical_level0_px.array_len == 1);
- assert(mt->first_level == 0);
- assert(mt->last_level == 0);
-
- /* We shouldn't already have a CCS */
- assert(!mt->aux_buf);
-
- if (!isl_surf_get_ccs_surf(&brw->isl_dev, &mt->surf, NULL,
- &temp_ccs_surf, image->aux_pitch))
- return false;
-
- assert(image->aux_offset < image->bo->size);
- assert(temp_ccs_surf.size_B <= image->bo->size - image->aux_offset);
-
- mt->aux_buf = calloc(sizeof(*mt->aux_buf), 1);
- if (mt->aux_buf == NULL)
- return false;
-
- mt->aux_state = create_aux_state_map(mt, initial_state);
- if (!mt->aux_state) {
- free(mt->aux_buf);
- mt->aux_buf = NULL;
- return false;
- }
-
- /* On gfx10+ we start using an extra space in the aux buffer to store the
- * indirect clear color. However, if we imported an image from the window
- * system with CCS, we don't have the extra space at the end of the aux
- * buffer. So create a new bo here that will store that clear color.
- */
- if (brw->isl_dev.ss.clear_color_state_size > 0) {
- mt->aux_buf->clear_color_bo =
- brw_bo_alloc_tiled(brw->bufmgr, "clear_color_bo",
- brw->isl_dev.ss.clear_color_state_size,
- BRW_MEMZONE_OTHER, I915_TILING_NONE, 0,
- BO_ALLOC_ZEROED);
- if (!mt->aux_buf->clear_color_bo) {
- free(mt->aux_buf);
- mt->aux_buf = NULL;
- return false;
- }
- }
-
- mt->aux_buf->bo = image->bo;
- brw_bo_reference(image->bo);
-
- mt->aux_buf->offset = image->aux_offset;
- mt->aux_buf->surf = temp_ccs_surf;
-
- return true;
-}
-
-struct brw_mipmap_tree *
-brw_miptree_create_for_dri_image(struct brw_context *brw,
- __DRIimage *image, GLenum target,
- mesa_format format,
- bool allow_internal_aux)
-{
- uint32_t bo_tiling, bo_swizzle;
- brw_bo_get_tiling(image->bo, &bo_tiling, &bo_swizzle);
-
- const struct isl_drm_modifier_info *mod_info =
- isl_drm_modifier_get_info(image->modifier);
-
- const enum isl_tiling tiling =
- mod_info ? mod_info->tiling : isl_tiling_from_i915_tiling(bo_tiling);
-
- if (image->planar_format && image->planar_format->nplanes > 1)
- return miptree_create_for_planar_image(brw, image, target, tiling);
-
- if (image->planar_format)
- assert(image->planar_format->planes[0].dri_format == image->dri_format);
-
- if (!brw->ctx.TextureFormatSupported[format]) {
- /* The texture storage paths in core Mesa detect if the driver does not
- * support the user-requested format, and then searches for a
- * fallback format. The DRIimage code bypasses core Mesa, though. So we
- * do the fallbacks here for important formats.
- *
- * We must support DRM_FOURCC_XBGR8888 textures because the Android
- * framework produces HAL_PIXEL_FORMAT_RGBX8888 winsys surfaces, which
- * the Chrome OS compositor consumes as dma_buf EGLImages.
- */
- format = _mesa_format_fallback_rgbx_to_rgba(format);
- }
-
- if (!brw->ctx.TextureFormatSupported[format])
- return NULL;
-
- enum brw_miptree_create_flags mt_create_flags = 0;
-
- /* If this image comes in from a window system, we have different
- * requirements than if it comes in via an EGL import operation. Window
- * system images can use any form of auxiliary compression we wish because
- * they get "flushed" before being handed off to the window system and we
- * have the opportunity to do resolves. Non window-system images, on the
- * other hand, have no resolve point so we can't have aux without a
- * modifier.
- */
- if (!allow_internal_aux)
- mt_create_flags |= MIPTREE_CREATE_NO_AUX;
-
- /* If we have a modifier which specifies aux, don't create one yet */
- if (mod_info && mod_info->aux_usage != ISL_AUX_USAGE_NONE)
- mt_create_flags |= MIPTREE_CREATE_NO_AUX;
-
- /* Disable creation of the texture's aux buffers because the driver exposes
- * no EGL API to manage them. That is, there is no API for resolving the aux
- * buffer's content to the main buffer nor for invalidating the aux buffer's
- * content.
- */
- struct brw_mipmap_tree *mt =
- brw_miptree_create_for_bo(brw, image->bo, format,
- image->offset, image->width, image->height, 1,
- image->pitch, tiling, mt_create_flags);
- if (mt == NULL)
- return NULL;
-
- mt->target = target;
- mt->level[0].level_x = image->tile_x;
- mt->level[0].level_y = image->tile_y;
- mt->drm_modifier = image->modifier;
-
- /* From "OES_EGL_image" error reporting. We report GL_INVALID_OPERATION
- * for EGL images from non-tile aligned sufaces in gfx4 hw and earlier which has
- * trouble resolving back to destination image due to alignment issues.
- */
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- if (!devinfo->has_surface_tile_offset) {
- uint32_t draw_x, draw_y;
- brw_miptree_get_tile_offsets(mt, 0, 0, &draw_x, &draw_y);
-
- if (draw_x != 0 || draw_y != 0) {
- _mesa_error(&brw->ctx, GL_INVALID_OPERATION, __func__);
- brw_miptree_release(&mt);
- return NULL;
- }
- }
-
- if (mod_info && mod_info->aux_usage != ISL_AUX_USAGE_NONE) {
- assert(mod_info->aux_usage == ISL_AUX_USAGE_CCS_E);
-
- mt->aux_usage = mod_info->aux_usage;
- /* If we are a window system buffer, then we can support fast-clears
- * even if the modifier doesn't support them by doing a partial resolve
- * as part of the flush operation.
- */
- mt->supports_fast_clear =
- allow_internal_aux || mod_info->supports_clear_color;
-
- /* We don't know the actual state of the surface when we get it but we
- * can make a pretty good guess based on the modifier. What we do know
- * for sure is that it isn't in the AUX_INVALID state, so we just assume
- * a worst case of compression.
- */
- enum isl_aux_state initial_state =
- isl_drm_modifier_get_default_aux_state(image->modifier);
-
- if (!create_ccs_buf_for_image(brw, image, mt, initial_state)) {
- brw_miptree_release(&mt);
- return NULL;
- }
- }
-
- /* Don't assume coherency for imported EGLimages. We don't know what
- * external clients are going to do with it. They may scan it out.
- */
- image->bo->cache_coherent = false;
-
- return mt;
-}
-
-/**
- * For a singlesample renderbuffer, this simply wraps the given BO with a
- * miptree.
- *
- * For a multisample renderbuffer, this wraps the window system's
- * (singlesample) BO with a singlesample miptree attached to the
- * brw_renderbuffer, then creates a multisample miptree attached to irb->mt
- * that will contain the actual rendering (which is lazily resolved to
- * irb->singlesample_mt).
- */
-bool
-brw_update_winsys_renderbuffer_miptree(struct brw_context *intel,
- struct brw_renderbuffer *irb,
- struct brw_mipmap_tree *singlesample_mt,
- uint32_t width, uint32_t height,
- uint32_t pitch)
-{
- struct brw_mipmap_tree *multisample_mt = NULL;
- struct gl_renderbuffer *rb = &irb->Base.Base;
- mesa_format format = rb->Format;
- const unsigned num_samples = MAX2(rb->NumSamples, 1);
-
- /* Only the front and back buffers, which are color buffers, are allocated
- * through the image loader.
- */
- assert(_mesa_get_format_base_format(format) == GL_RGB ||
- _mesa_get_format_base_format(format) == GL_RGBA);
-
- assert(singlesample_mt);
-
- if (num_samples == 1) {
- brw_miptree_release(&irb->mt);
- irb->mt = singlesample_mt;
-
- assert(!irb->singlesample_mt);
- } else {
- brw_miptree_release(&irb->singlesample_mt);
- irb->singlesample_mt = singlesample_mt;
-
- if (!irb->mt ||
- irb->mt->surf.logical_level0_px.width != width ||
- irb->mt->surf.logical_level0_px.height != height) {
- multisample_mt = brw_miptree_create_for_renderbuffer(intel,
- format,
- width,
- height,
- num_samples);
- if (!multisample_mt)
- goto fail;
-
- irb->need_downsample = false;
- brw_miptree_release(&irb->mt);
- irb->mt = multisample_mt;
- }
- }
- return true;
-
-fail:
- brw_miptree_release(&irb->mt);
- return false;
-}
-
-struct brw_mipmap_tree*
-brw_miptree_create_for_renderbuffer(struct brw_context *brw,
- mesa_format format,
- uint32_t width,
- uint32_t height,
- uint32_t num_samples)
-{
- struct brw_mipmap_tree *mt;
- uint32_t depth = 1;
- GLenum target = num_samples > 1 ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D;
-
- mt = brw_miptree_create(brw, target, format, 0, 0,
- width, height, depth, num_samples,
- MIPTREE_CREATE_BUSY);
- if (!mt)
- goto fail;
-
- return mt;
-
-fail:
- brw_miptree_release(&mt);
- return NULL;
-}
-
-void
-brw_miptree_reference(struct brw_mipmap_tree **dst,
- struct brw_mipmap_tree *src)
-{
- if (*dst == src)
- return;
-
- brw_miptree_release(dst);
-
- if (src) {
- src->refcount++;
- DBG("%s %p refcount now %d\n", __func__, src, src->refcount);
- }
-
- *dst = src;
-}
-
-static void
-brw_miptree_aux_buffer_free(struct brw_miptree_aux_buffer *aux_buf)
-{
- if (aux_buf == NULL)
- return;
-
- brw_bo_unreference(aux_buf->bo);
- brw_bo_unreference(aux_buf->clear_color_bo);
-
- free(aux_buf);
-}
-
-void
-brw_miptree_release(struct brw_mipmap_tree **mt)
-{
- if (!*mt)
- return;
-
- DBG("%s %p refcount will be %d\n", __func__, *mt, (*mt)->refcount - 1);
- if (--(*mt)->refcount <= 0) {
- GLuint i;
-
- DBG("%s deleting %p\n", __func__, *mt);
-
- brw_bo_unreference((*mt)->bo);
- brw_miptree_release(&(*mt)->stencil_mt);
- brw_miptree_release(&(*mt)->shadow_mt);
- brw_miptree_aux_buffer_free((*mt)->aux_buf);
- free_aux_state_map((*mt)->aux_state);
-
- brw_miptree_release(&(*mt)->plane[0]);
- brw_miptree_release(&(*mt)->plane[1]);
-
- for (i = 0; i < MAX_TEXTURE_LEVELS; i++) {
- free((*mt)->level[i].slice);
- }
-
- free(*mt);
- }
- *mt = NULL;
-}
-
-
-void
-brw_get_image_dims(struct gl_texture_image *image,
- int *width, int *height, int *depth)
-{
- switch (image->TexObject->Target) {
- case GL_TEXTURE_1D_ARRAY:
- /* For a 1D Array texture the OpenGL API will treat the image height as
- * the number of array slices. For Intel hardware, we treat the 1D array
- * as a 2D Array with a height of 1. So, here we want to swap image
- * height and depth.
- */
- assert(image->Depth == 1);
- *width = image->Width;
- *height = 1;
- *depth = image->Height;
- break;
- case GL_TEXTURE_CUBE_MAP:
- /* For Cube maps, the mesa/main api layer gives us a depth of 1 even
- * though we really have 6 slices.
- */
- assert(image->Depth == 1);
- *width = image->Width;
- *height = image->Height;
- *depth = 6;
- break;
- default:
- *width = image->Width;
- *height = image->Height;
- *depth = image->Depth;
- break;
- }
-}
-
-/**
- * Can the image be pulled into a unified mipmap tree? This mirrors
- * the completeness test in a lot of ways.
- *
- * Not sure whether I want to pass gl_texture_image here.
- */
-bool
-brw_miptree_match_image(struct brw_mipmap_tree *mt,
- struct gl_texture_image *image)
-{
- struct brw_texture_image *brw_image = brw_texture_image(image);
- GLuint level = brw_image->base.Base.Level;
- int width, height, depth;
-
- /* glTexImage* choose the texture object based on the target passed in, and
- * objects can't change targets over their lifetimes, so this should be
- * true.
- */
- assert(image->TexObject->Target == mt->target);
-
- mesa_format mt_format = mt->format;
- if (mt->format == MESA_FORMAT_Z24_UNORM_X8_UINT && mt->stencil_mt)
- mt_format = MESA_FORMAT_Z24_UNORM_S8_UINT;
- if (mt->format == MESA_FORMAT_Z_FLOAT32 && mt->stencil_mt)
- mt_format = MESA_FORMAT_Z32_FLOAT_S8X24_UINT;
-
- if (_mesa_get_srgb_format_linear(image->TexFormat) !=
- _mesa_get_srgb_format_linear(mt_format))
- return false;
-
- brw_get_image_dims(image, &width, &height, &depth);
-
- if (mt->target == GL_TEXTURE_CUBE_MAP)
- depth = 6;
-
- if (level >= mt->surf.levels)
- return false;
-
- const unsigned level_depth =
- mt->surf.dim == ISL_SURF_DIM_3D ?
- minify(mt->surf.logical_level0_px.depth, level) :
- mt->surf.logical_level0_px.array_len;
-
- return width == minify(mt->surf.logical_level0_px.width, level) &&
- height == minify(mt->surf.logical_level0_px.height, level) &&
- depth == level_depth &&
- MAX2(image->NumSamples, 1) == mt->surf.samples;
-}
-
-void
-brw_miptree_get_image_offset(const struct brw_mipmap_tree *mt,
- GLuint level, GLuint slice,
- GLuint *x, GLuint *y)
-{
- if (level == 0 && slice == 0) {
- *x = mt->level[0].level_x;
- *y = mt->level[0].level_y;
- return;
- }
-
- uint32_t x_offset_sa, y_offset_sa, z_offset_sa, array_offset;
-
- /* Miptree itself can have an offset only if it represents a single
- * slice in an imported buffer object.
- * See brw_miptree_create_for_dri_image().
- */
- assert(mt->level[0].level_x == 0);
- assert(mt->level[0].level_y == 0);
-
- /* Given level is relative to level zero while the miptree may be
- * represent just a subset of all levels starting from 'first_level'.
- */
- assert(level >= mt->first_level);
- level -= mt->first_level;
-
- const unsigned z = mt->surf.dim == ISL_SURF_DIM_3D ? slice : 0;
- slice = mt->surf.dim == ISL_SURF_DIM_3D ? 0 : slice;
- isl_surf_get_image_offset_el(&mt->surf, level, slice, z,
- &x_offset_sa, &y_offset_sa,
- &z_offset_sa, &array_offset);
-
- *x = x_offset_sa;
- *y = y_offset_sa;
- assert(z_offset_sa == 0);
- assert(array_offset == 0);
-}
-
-/**
- * Compute the offset (in bytes) from the start of the BO to the given x
- * and y coordinate. For tiled BOs, caller must ensure that x and y are
- * multiples of the tile size.
- */
-uint32_t
-brw_miptree_get_aligned_offset(const struct brw_mipmap_tree *mt,
- uint32_t x, uint32_t y)
-{
- int cpp = mt->cpp;
- uint32_t pitch = mt->surf.row_pitch_B;
-
- switch (mt->surf.tiling) {
- default:
- unreachable("not reached");
- case ISL_TILING_LINEAR:
- return y * pitch + x * cpp;
- case ISL_TILING_X:
- assert((x % (512 / cpp)) == 0);
- assert((y % 8) == 0);
- return y * pitch + x / (512 / cpp) * 4096;
- case ISL_TILING_Y0:
- assert((x % (128 / cpp)) == 0);
- assert((y % 32) == 0);
- return y * pitch + x / (128 / cpp) * 4096;
- }
-}
-
-/**
- * Rendering with tiled buffers requires that the base address of the buffer
- * be aligned to a page boundary. For renderbuffers, and sometimes with
- * textures, we may want the surface to point at a texture image level that
- * isn't at a page boundary.
- *
- * This function returns an appropriately-aligned base offset
- * according to the tiling restrictions, plus any required x/y offset
- * from there.
- */
-uint32_t
-brw_miptree_get_tile_offsets(const struct brw_mipmap_tree *mt,
- GLuint level, GLuint slice,
- uint32_t *tile_x,
- uint32_t *tile_y)
-{
- uint32_t x, y;
- uint32_t mask_x, mask_y;
-
- isl_get_tile_masks(mt->surf.tiling, mt->cpp, &mask_x, &mask_y);
- brw_miptree_get_image_offset(mt, level, slice, &x, &y);
-
- *tile_x = x & mask_x;
- *tile_y = y & mask_y;
-
- return brw_miptree_get_aligned_offset(mt, x & ~mask_x, y & ~mask_y);
-}
-
-static void
-brw_miptree_copy_slice_sw(struct brw_context *brw,
- struct brw_mipmap_tree *src_mt,
- unsigned src_level, unsigned src_layer,
- struct brw_mipmap_tree *dst_mt,
- unsigned dst_level, unsigned dst_layer,
- unsigned width, unsigned height)
-{
- void *src, *dst;
- ptrdiff_t src_stride, dst_stride;
- const unsigned cpp = (isl_format_get_layout(dst_mt->surf.format)->bpb / 8);
-
- brw_miptree_map(brw, src_mt,
- src_level, src_layer,
- 0, 0,
- width, height,
- GL_MAP_READ_BIT | BRW_MAP_DIRECT_BIT,
- &src, &src_stride);
-
- brw_miptree_map(brw, dst_mt,
- dst_level, dst_layer,
- 0, 0,
- width, height,
- GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT |
- BRW_MAP_DIRECT_BIT,
- &dst, &dst_stride);
-
- DBG("sw blit %s mt %p %p/%"PRIdPTR" -> %s mt %p %p/%"PRIdPTR" (%dx%d)\n",
- _mesa_get_format_name(src_mt->format),
- src_mt, src, src_stride,
- _mesa_get_format_name(dst_mt->format),
- dst_mt, dst, dst_stride,
- width, height);
-
- int row_size = cpp * width;
- if (src_stride == row_size &&
- dst_stride == row_size) {
- memcpy(dst, src, row_size * height);
- } else {
- for (int i = 0; i < height; i++) {
- memcpy(dst, src, row_size);
- dst += dst_stride;
- src += src_stride;
- }
- }
-
- brw_miptree_unmap(brw, dst_mt, dst_level, dst_layer);
- brw_miptree_unmap(brw, src_mt, src_level, src_layer);
-
- /* Don't forget to copy the stencil data over, too. We could have skipped
- * passing BRW_MAP_DIRECT_BIT, but that would have meant brw_miptree_map
- * shuffling the two data sources in/out of temporary storage instead of
- * the direct mapping we get this way.
- */
- if (dst_mt->stencil_mt) {
- assert(src_mt->stencil_mt);
- brw_miptree_copy_slice_sw(brw,
- src_mt->stencil_mt, src_level, src_layer,
- dst_mt->stencil_mt, dst_level, dst_layer,
- width, height);
- }
-}
-
-void
-brw_miptree_copy_slice(struct brw_context *brw,
- struct brw_mipmap_tree *src_mt,
- unsigned src_level, unsigned src_layer,
- struct brw_mipmap_tree *dst_mt,
- unsigned dst_level, unsigned dst_layer)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- mesa_format format = src_mt->format;
- unsigned width = minify(src_mt->surf.phys_level0_sa.width,
- src_level - src_mt->first_level);
- unsigned height = minify(src_mt->surf.phys_level0_sa.height,
- src_level - src_mt->first_level);
-
- assert(src_layer < get_num_phys_layers(&src_mt->surf,
- src_level - src_mt->first_level));
-
- assert(_mesa_get_srgb_format_linear(src_mt->format) ==
- _mesa_get_srgb_format_linear(dst_mt->format));
-
- DBG("validate blit mt %s %p %d,%d -> mt %s %p %d,%d (%dx%d)\n",
- _mesa_get_format_name(src_mt->format),
- src_mt, src_level, src_layer,
- _mesa_get_format_name(dst_mt->format),
- dst_mt, dst_level, dst_layer,
- width, height);
-
- if (devinfo->ver >= 6) {
- /* On gfx6 and above, we just use blorp. It's faster than the blitter
- * and can handle everything without software fallbacks.
- */
- brw_blorp_copy_miptrees(brw,
- src_mt, src_level, src_layer,
- dst_mt, dst_level, dst_layer,
- 0, 0, 0, 0, width, height);
-
- if (src_mt->stencil_mt) {
- assert(dst_mt->stencil_mt);
- brw_blorp_copy_miptrees(brw,
- src_mt->stencil_mt, src_level, src_layer,
- dst_mt->stencil_mt, dst_level, dst_layer,
- 0, 0, 0, 0, width, height);
- }
- return;
- }
-
- if (dst_mt->compressed) {
- unsigned int i, j;
- _mesa_get_format_block_size(dst_mt->format, &i, &j);
- height = ALIGN_NPOT(height, j) / j;
- width = ALIGN_NPOT(width, i) / i;
- }
-
- /* Gfx4-5 doesn't support separate stencil */
- assert(!src_mt->stencil_mt);
-
- uint32_t dst_x, dst_y, src_x, src_y;
- brw_miptree_get_image_offset(dst_mt, dst_level, dst_layer, &dst_x, &dst_y);
- brw_miptree_get_image_offset(src_mt, src_level, src_layer, &src_x, &src_y);
-
- DBG("validate blit mt %s %p %d,%d/%d -> mt %s %p %d,%d/%d (%dx%d)\n",
- _mesa_get_format_name(src_mt->format),
- src_mt, src_x, src_y, src_mt->surf.row_pitch_B,
- _mesa_get_format_name(dst_mt->format),
- dst_mt, dst_x, dst_y, dst_mt->surf.row_pitch_B,
- width, height);
-
- if (!brw_miptree_blit(brw,
- src_mt, src_level, src_layer, 0, 0, false,
- dst_mt, dst_level, dst_layer, 0, 0, false,
- width, height, COLOR_LOGICOP_COPY)) {
- perf_debug("miptree validate blit for %s failed\n",
- _mesa_get_format_name(format));
-
- brw_miptree_copy_slice_sw(brw,
- src_mt, src_level, src_layer,
- dst_mt, dst_level, dst_layer,
- width, height);
- }
-}
-
-/**
- * Copies the image's current data to the given miptree, and associates that
- * miptree with the image.
- */
-void
-brw_miptree_copy_teximage(struct brw_context *brw,
- struct brw_texture_image *brw_image,
- struct brw_mipmap_tree *dst_mt)
-{
- struct brw_mipmap_tree *src_mt = brw_image->mt;
- struct brw_texture_object *intel_obj =
- brw_texture_object(brw_image->base.Base.TexObject);
- int level = brw_image->base.Base.Level;
- const unsigned face = brw_image->base.Base.Face;
- unsigned start_layer, end_layer;
-
- if (intel_obj->base.Target == GL_TEXTURE_1D_ARRAY) {
- assert(face == 0);
- assert(brw_image->base.Base.Height);
- start_layer = 0;
- end_layer = brw_image->base.Base.Height - 1;
- } else if (face > 0) {
- start_layer = face;
- end_layer = face;
- } else {
- assert(brw_image->base.Base.Depth);
- start_layer = 0;
- end_layer = brw_image->base.Base.Depth - 1;
- }
-
- for (unsigned i = start_layer; i <= end_layer; i++) {
- brw_miptree_copy_slice(brw, src_mt, level, i, dst_mt, level, i);
- }
-
- brw_miptree_reference(&brw_image->mt, dst_mt);
- intel_obj->needs_validate = true;
-}
-
-static struct brw_miptree_aux_buffer *
-brw_alloc_aux_buffer(struct brw_context *brw,
- const struct isl_surf *aux_surf,
- bool wants_memset,
- uint8_t memset_value)
-{
- struct brw_miptree_aux_buffer *buf = calloc(sizeof(*buf), 1);
- if (!buf)
- return false;
-
- uint64_t size = aux_surf->size_B;
-
- const bool has_indirect_clear = brw->isl_dev.ss.clear_color_state_size > 0;
- if (has_indirect_clear) {
- /* On CNL+, instead of setting the clear color in the SURFACE_STATE, we
- * will set a pointer to a dword somewhere that contains the color. So,
- * allocate the space for the clear color value here on the aux buffer.
- */
- buf->clear_color_offset = size;
- size += brw->isl_dev.ss.clear_color_state_size;
- }
-
- /* If the buffer needs to be initialised (requiring the buffer to be
- * immediately mapped to cpu space for writing), do not use the gpu access
- * flag which can cause an unnecessary delay if the backing pages happened
- * to be just used by the GPU.
- */
- const bool alloc_zeroed = wants_memset && memset_value == 0;
- const bool needs_memset =
- !alloc_zeroed && (wants_memset || has_indirect_clear);
- const uint32_t alloc_flags =
- alloc_zeroed ? BO_ALLOC_ZEROED : (needs_memset ? 0 : BO_ALLOC_BUSY);
-
- /* ISL has stricter set of alignment rules then the drm allocator.
- * Therefore one can pass the ISL dimensions in terms of bytes instead of
- * trying to recalculate based on different format block sizes.
- */
- buf->bo = brw_bo_alloc_tiled(brw->bufmgr, "aux-miptree", size,
- BRW_MEMZONE_OTHER, I915_TILING_Y,
- aux_surf->row_pitch_B, alloc_flags);
- if (!buf->bo) {
- free(buf);
- return NULL;
- }
-
- /* Initialize the bo to the desired value */
- if (needs_memset) {
- assert(!(alloc_flags & BO_ALLOC_BUSY));
-
- void *map = brw_bo_map(brw, buf->bo, MAP_WRITE | MAP_RAW);
- if (map == NULL) {
- brw_miptree_aux_buffer_free(buf);
- return NULL;
- }
-
- /* Memset the aux_surf portion of the BO. */
- if (wants_memset)
- memset(map, memset_value, aux_surf->size_B);
-
- /* Zero the indirect clear color to match ::fast_clear_color. */
- if (has_indirect_clear) {
- memset((char *)map + buf->clear_color_offset, 0,
- brw->isl_dev.ss.clear_color_state_size);
- }
-
- brw_bo_unmap(buf->bo);
- }
-
- if (has_indirect_clear) {
- buf->clear_color_bo = buf->bo;
- brw_bo_reference(buf->clear_color_bo);
- }
-
- buf->surf = *aux_surf;
-
- return buf;
-}
-
-
-/**
- * Helper for brw_miptree_alloc_aux() that sets
- * \c mt->level[level].has_hiz. Return true if and only if
- * \c has_hiz was set.
- */
-static bool
-brw_miptree_level_enable_hiz(struct brw_context *brw,
- struct brw_mipmap_tree *mt,
- uint32_t level)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- assert(mt->aux_buf);
- assert(mt->surf.size_B > 0);
-
- if (devinfo->verx10 >= 75) {
- uint32_t width = minify(mt->surf.phys_level0_sa.width, level);
- uint32_t height = minify(mt->surf.phys_level0_sa.height, level);
-
- /* Disable HiZ for LOD > 0 unless the width is 8 aligned
- * and the height is 4 aligned. This allows our HiZ support
- * to fulfill Haswell restrictions for HiZ ops. For LOD == 0,
- * we can grow the width & height to allow the HiZ op to
- * force the proper size alignments.
- */
- if (level > 0 && ((width & 7) || (height & 3))) {
- DBG("mt %p level %d: HiZ DISABLED\n", mt, level);
- return false;
- }
- }
-
- DBG("mt %p level %d: HiZ enabled\n", mt, level);
- mt->level[level].has_hiz = true;
- return true;
-}
-
-
-/**
- * Allocate the initial aux surface for a miptree based on mt->aux_usage
- *
- * Since MCS, HiZ, and CCS_E can compress more than just clear color, we
- * create the auxiliary surfaces up-front. CCS_D, on the other hand, can only
- * compress clear color so we wait until an actual fast-clear to allocate it.
- */
-bool
-brw_miptree_alloc_aux(struct brw_context *brw, struct brw_mipmap_tree *mt)
-{
- assert(mt->aux_buf == NULL);
-
- /* Get the aux buf allocation parameters for this miptree. */
- enum isl_aux_state initial_state;
- uint8_t memset_value;
- struct isl_surf aux_surf = {0,};
- bool aux_surf_ok = false;
-
- switch (mt->aux_usage) {
- case ISL_AUX_USAGE_NONE:
- aux_surf.size_B = 0;
- aux_surf_ok = true;
- break;
- case ISL_AUX_USAGE_HIZ:
- initial_state = ISL_AUX_STATE_AUX_INVALID;
- memset_value = 0;
- aux_surf_ok = isl_surf_get_hiz_surf(&brw->isl_dev, &mt->surf, &aux_surf);
- break;
- case ISL_AUX_USAGE_MCS:
- /* From the Ivy Bridge PRM, Vol 2 Part 1 p326:
- *
- * When MCS buffer is enabled and bound to MSRT, it is required that
- * it is cleared prior to any rendering.
- *
- * Since we don't use the MCS buffer for any purpose other than
- * rendering, it makes sense to just clear it immediately upon
- * allocation.
- *
- * Note: the clear value for MCS buffers is all 1's, so we memset to
- * 0xff.
- */
- initial_state = ISL_AUX_STATE_CLEAR;
- memset_value = 0xFF;
- aux_surf_ok = isl_surf_get_mcs_surf(&brw->isl_dev, &mt->surf, &aux_surf);
- break;
- case ISL_AUX_USAGE_CCS_D:
- case ISL_AUX_USAGE_CCS_E:
- /* When CCS_E is used, we need to ensure that the CCS starts off in a
- * valid state. From the Sky Lake PRM, "MCS Buffer for Render
- * Target(s)":
- *
- * "If Software wants to enable Color Compression without Fast
- * clear, Software needs to initialize MCS with zeros."
- *
- * A CCS value of 0 indicates that the corresponding block is in the
- * pass-through state which is what we want.
- *
- * For CCS_D, do the same thing. On gfx9+, this avoids having any
- * undefined bits in the aux buffer.
- */
- initial_state = ISL_AUX_STATE_PASS_THROUGH;
- memset_value = 0;
- aux_surf_ok =
- isl_surf_get_ccs_surf(&brw->isl_dev, &mt->surf, NULL, &aux_surf, 0);
- break;
-
- default:
- unreachable("Invalid aux usage");
- }
-
- /* We should have a valid aux_surf. */
- if (!aux_surf_ok)
- return false;
-
- /* No work is needed for a zero-sized auxiliary buffer. */
- if (aux_surf.size_B == 0)
- return true;
-
- /* Create the aux_state for the auxiliary buffer. */
- mt->aux_state = create_aux_state_map(mt, initial_state);
- if (mt->aux_state == NULL)
- return false;
-
- /* Allocate the auxiliary buffer. */
- const bool needs_memset = initial_state != ISL_AUX_STATE_AUX_INVALID;
- mt->aux_buf = brw_alloc_aux_buffer(brw, &aux_surf, needs_memset,
- memset_value);
- if (mt->aux_buf == NULL) {
- free_aux_state_map(mt->aux_state);
- mt->aux_state = NULL;
- return false;
- }
-
- /* Perform aux_usage-specific initialization. */
- if (mt->aux_usage == ISL_AUX_USAGE_HIZ) {
- for (unsigned level = mt->first_level; level <= mt->last_level; ++level)
- brw_miptree_level_enable_hiz(brw, mt, level);
- }
-
- return true;
-}
-
-
-/**
- * Can the miptree sample using the hiz buffer?
- */
-bool
-brw_miptree_sample_with_hiz(struct brw_context *brw,
- struct brw_mipmap_tree *mt)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- if (!devinfo->has_sample_with_hiz) {
- return false;
- }
-
- if (!mt->aux_buf) {
- return false;
- }
-
- for (unsigned level = 0; level < mt->surf.levels; ++level) {
- if (!brw_miptree_level_has_hiz(mt, level))
- return false;
- }
-
- /* From the BDW PRM (Volume 2d: Command Reference: Structures
- * RENDER_SURFACE_STATE.AuxiliarySurfaceMode):
- *
- * "If this field is set to AUX_HIZ, Number of Multisamples must be
- * MULTISAMPLECOUNT_1, and Surface Type cannot be SURFTYPE_3D.
- *
- * There is no such blurb for 1D textures, but there is sufficient evidence
- * that this is broken on SKL+.
- */
- return (mt->surf.samples == 1 &&
- mt->target != GL_TEXTURE_3D &&
- mt->target != GL_TEXTURE_1D /* gfx9+ restriction */);
-}
-
-static bool
-level_has_aux(const struct brw_mipmap_tree *mt, uint32_t level)
-{
- return isl_aux_usage_has_hiz(mt->aux_usage) ?
- brw_miptree_level_has_hiz(mt, level) :
- mt->aux_usage != ISL_AUX_USAGE_NONE && mt->aux_buf;
-}
-
-/**
- * Does the miptree slice have hiz enabled?
- */
-bool
-brw_miptree_level_has_hiz(const struct brw_mipmap_tree *mt, uint32_t level)
-{
- brw_miptree_check_level_layer(mt, level, 0);
- return mt->level[level].has_hiz;
-}
-
-static inline uint32_t
-miptree_level_range_length(const struct brw_mipmap_tree *mt,
- uint32_t start_level, uint32_t num_levels)
-{
- assert(start_level >= mt->first_level);
- assert(start_level <= mt->last_level);
-
- if (num_levels == INTEL_REMAINING_LAYERS)
- num_levels = mt->last_level - start_level + 1;
- /* Check for overflow */
- assert(start_level + num_levels >= start_level);
- assert(start_level + num_levels <= mt->last_level + 1);
-
- return num_levels;
-}
-
-static inline uint32_t
-miptree_layer_range_length(const struct brw_mipmap_tree *mt, uint32_t level,
- uint32_t start_layer, uint32_t num_layers)
-{
- assert(level <= mt->last_level);
-
- const uint32_t total_num_layers = brw_get_num_logical_layers(mt, level);
- assert(start_layer < total_num_layers);
- if (num_layers == INTEL_REMAINING_LAYERS)
- num_layers = total_num_layers - start_layer;
- /* Check for overflow */
- assert(start_layer + num_layers >= start_layer);
- assert(start_layer + num_layers <= total_num_layers);
-
- return num_layers;
-}
-
-bool
-brw_miptree_has_color_unresolved(const struct brw_mipmap_tree *mt,
- unsigned start_level, unsigned num_levels,
- unsigned start_layer, unsigned num_layers)
-{
- assert(_mesa_is_format_color_format(mt->format));
-
- if (!mt->aux_buf)
- return false;
-
- /* Clamp the level range to fit the miptree */
- num_levels = miptree_level_range_length(mt, start_level, num_levels);
-
- for (uint32_t l = 0; l < num_levels; l++) {
- const uint32_t level = start_level + l;
- const uint32_t level_layers =
- miptree_layer_range_length(mt, level, start_layer, num_layers);
- for (unsigned a = 0; a < level_layers; a++) {
- enum isl_aux_state aux_state =
- brw_miptree_get_aux_state(mt, level, start_layer + a);
- assert(aux_state != ISL_AUX_STATE_AUX_INVALID);
- if (aux_state != ISL_AUX_STATE_PASS_THROUGH)
- return true;
- }
- }
-
- return false;
-}
-
-static void
-brw_miptree_check_color_resolve(const struct brw_context *brw,
- const struct brw_mipmap_tree *mt,
- unsigned level, unsigned layer)
-{
- if (!mt->aux_buf)
- return;
-
- /* Fast color clear is supported for mipmapped surfaces only on Gfx8+. */
- assert(brw->screen->devinfo.ver >= 8 ||
- (level == 0 && mt->first_level == 0 && mt->last_level == 0));
-
- /* Compression of arrayed msaa surfaces is supported. */
- if (mt->surf.samples > 1)
- return;
-
- /* Fast color clear is supported for non-msaa arrays only on Gfx8+. */
- assert(brw->screen->devinfo.ver >= 8 ||
- (layer == 0 &&
- mt->surf.logical_level0_px.depth == 1 &&
- mt->surf.logical_level0_px.array_len == 1));
-
- (void)level;
- (void)layer;
-}
-
-void
-brw_miptree_prepare_access(struct brw_context *brw,
- struct brw_mipmap_tree *mt,
- uint32_t start_level, uint32_t num_levels,
- uint32_t start_layer, uint32_t num_layers,
- enum isl_aux_usage aux_usage,
- bool fast_clear_supported)
-{
- const uint32_t clamped_levels =
- miptree_level_range_length(mt, start_level, num_levels);
- for (uint32_t l = 0; l < clamped_levels; l++) {
- const uint32_t level = start_level + l;
- if (!level_has_aux(mt, level))
- continue;
-
- const uint32_t level_layers =
- miptree_layer_range_length(mt, level, start_layer, num_layers);
- for (uint32_t a = 0; a < level_layers; a++) {
- const uint32_t layer = start_layer + a;
- const enum isl_aux_state aux_state =
- brw_miptree_get_aux_state(mt, level, layer);
- const enum isl_aux_op aux_op =
- isl_aux_prepare_access(aux_state, aux_usage, fast_clear_supported);
-
- if (aux_op == ISL_AUX_OP_NONE) {
- /* Nothing to do here. */
- } else if (isl_aux_usage_has_mcs(mt->aux_usage)) {
- assert(aux_op == ISL_AUX_OP_PARTIAL_RESOLVE);
- brw_blorp_mcs_partial_resolve(brw, mt, layer, 1);
- } else if (isl_aux_usage_has_hiz(mt->aux_usage)) {
- brw_hiz_exec(brw, mt, level, layer, 1, aux_op);
- } else {
- assert(isl_aux_usage_has_ccs(mt->aux_usage));
- brw_miptree_check_color_resolve(brw, mt, level, layer);
- brw_blorp_resolve_color(brw, mt, level, layer, aux_op);
- }
-
- const enum isl_aux_state new_state =
- isl_aux_state_transition_aux_op(aux_state, mt->aux_usage, aux_op);
- brw_miptree_set_aux_state(brw, mt, level, layer, 1, new_state);
- }
- }
-}
-
-void
-brw_miptree_finish_write(struct brw_context *brw,
- struct brw_mipmap_tree *mt, uint32_t level,
- uint32_t start_layer, uint32_t num_layers,
- enum isl_aux_usage aux_usage)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- if (mt->format == MESA_FORMAT_S_UINT8 && devinfo->ver <= 7) {
- mt->shadow_needs_update = true;
- } else if (brw_miptree_has_etc_shadow(brw, mt)) {
- mt->shadow_needs_update = true;
- }
-
- if (!level_has_aux(mt, level))
- return;
-
- const uint32_t level_layers =
- miptree_layer_range_length(mt, level, start_layer, num_layers);
-
- for (uint32_t a = 0; a < level_layers; a++) {
- const uint32_t layer = start_layer + a;
- const enum isl_aux_state aux_state =
- brw_miptree_get_aux_state(mt, level, layer);
- const enum isl_aux_state new_aux_state =
- isl_aux_state_transition_write(aux_state, aux_usage, false);
- brw_miptree_set_aux_state(brw, mt, level, layer, 1, new_aux_state);
- }
-}
-
-enum isl_aux_state
-brw_miptree_get_aux_state(const struct brw_mipmap_tree *mt,
- uint32_t level, uint32_t layer)
-{
- brw_miptree_check_level_layer(mt, level, layer);
-
- if (_mesa_is_format_color_format(mt->format)) {
- assert(mt->aux_buf != NULL);
- assert(mt->surf.samples == 1 ||
- mt->surf.msaa_layout == ISL_MSAA_LAYOUT_ARRAY);
- } else if (mt->format == MESA_FORMAT_S_UINT8) {
- unreachable("Cannot get aux state for stencil");
- } else {
- assert(brw_miptree_level_has_hiz(mt, level));
- }
-
- return mt->aux_state[level][layer];
-}
-
-void
-brw_miptree_set_aux_state(struct brw_context *brw,
- struct brw_mipmap_tree *mt, uint32_t level,
- uint32_t start_layer, uint32_t num_layers,
- enum isl_aux_state aux_state)
-{
- num_layers = miptree_layer_range_length(mt, level, start_layer, num_layers);
-
- if (_mesa_is_format_color_format(mt->format)) {
- assert(mt->aux_buf != NULL);
- assert(mt->surf.samples == 1 ||
- mt->surf.msaa_layout == ISL_MSAA_LAYOUT_ARRAY);
- } else if (mt->format == MESA_FORMAT_S_UINT8) {
- unreachable("Cannot get aux state for stencil");
- } else {
- assert(brw_miptree_level_has_hiz(mt, level));
- }
-
- for (unsigned a = 0; a < num_layers; a++) {
- if (mt->aux_state[level][start_layer + a] != aux_state) {
- mt->aux_state[level][start_layer + a] = aux_state;
- brw->ctx.NewDriverState |= BRW_NEW_AUX_STATE;
- }
- }
-}
-
-/* On Gfx9 color buffers may be compressed by the hardware (lossless
- * compression). There are, however, format restrictions and care needs to be
- * taken that the sampler engine is capable for re-interpreting a buffer with
- * format different the buffer was originally written with.
- *
- * For example, SRGB formats are not compressible and the sampler engine isn't
- * capable of treating RGBA_UNORM as SRGB_ALPHA. In such a case the underlying
- * color buffer needs to be resolved so that the sampling surface can be
- * sampled as non-compressed (i.e., without the auxiliary MCS buffer being
- * set).
- */
-static bool
-can_texture_with_ccs(struct brw_context *brw,
- struct brw_mipmap_tree *mt,
- enum isl_format view_format)
-{
- if (mt->aux_usage != ISL_AUX_USAGE_CCS_E)
- return false;
-
- if (!format_ccs_e_compat_with_miptree(&brw->screen->devinfo,
- mt, view_format)) {
- perf_debug("Incompatible sampling format (%s) for rbc (%s)\n",
- isl_format_get_name(view_format),
- _mesa_get_format_name(mt->format));
- return false;
- }
-
- return true;
-}
-
-enum isl_aux_usage
-brw_miptree_texture_aux_usage(struct brw_context *brw,
- struct brw_mipmap_tree *mt,
- enum isl_format view_format,
- enum gfx9_astc5x5_wa_tex_type astc5x5_wa_bits)
-{
- assert(brw->screen->devinfo.ver == 9 || astc5x5_wa_bits == 0);
-
- /* On gfx9, ASTC 5x5 textures cannot live in the sampler cache along side
- * CCS or HiZ compressed textures. See gfx9_apply_astc5x5_wa_flush() for
- * details.
- */
- if ((astc5x5_wa_bits & GFX9_ASTC5X5_WA_TEX_TYPE_ASTC5x5) &&
- mt->aux_usage != ISL_AUX_USAGE_MCS)
- return ISL_AUX_USAGE_NONE;
-
- switch (mt->aux_usage) {
- case ISL_AUX_USAGE_HIZ:
- if (brw_miptree_sample_with_hiz(brw, mt))
- return ISL_AUX_USAGE_HIZ;
- break;
-
- case ISL_AUX_USAGE_MCS:
- return ISL_AUX_USAGE_MCS;
-
- case ISL_AUX_USAGE_CCS_D:
- case ISL_AUX_USAGE_CCS_E:
- if (!mt->aux_buf) {
- assert(mt->aux_usage == ISL_AUX_USAGE_CCS_D);
- return ISL_AUX_USAGE_NONE;
- }
-
- /* If we don't have any unresolved color, report an aux usage of
- * ISL_AUX_USAGE_NONE. This way, texturing won't even look at the
- * aux surface and we can save some bandwidth.
- */
- if (!brw_miptree_has_color_unresolved(mt, 0, INTEL_REMAINING_LEVELS,
- 0, INTEL_REMAINING_LAYERS))
- return ISL_AUX_USAGE_NONE;
-
- if (can_texture_with_ccs(brw, mt, view_format))
- return ISL_AUX_USAGE_CCS_E;
- break;
-
- default:
- break;
- }
-
- return ISL_AUX_USAGE_NONE;
-}
-
-static bool
-isl_formats_are_fast_clear_compatible(enum isl_format a, enum isl_format b)
-{
- /* On gfx8 and earlier, the hardware was only capable of handling 0/1 clear
- * values so sRGB curve application was a no-op for all fast-clearable
- * formats.
- *
- * On gfx9+, the hardware supports arbitrary clear values. For sRGB clear
- * values, the hardware interprets the floats, not as what would be
- * returned from the sampler (or written by the shader), but as being
- * between format conversion and sRGB curve application. This means that
- * we can switch between sRGB and UNORM without having to whack the clear
- * color.
- */
- return isl_format_srgb_to_linear(a) == isl_format_srgb_to_linear(b);
-}
-
-void
-brw_miptree_prepare_texture(struct brw_context *brw,
- struct brw_mipmap_tree *mt,
- enum isl_format view_format,
- uint32_t start_level, uint32_t num_levels,
- uint32_t start_layer, uint32_t num_layers,
- enum gfx9_astc5x5_wa_tex_type astc5x5_wa_bits)
-{
- enum isl_aux_usage aux_usage =
- brw_miptree_texture_aux_usage(brw, mt, view_format, astc5x5_wa_bits);
-
- bool clear_supported = aux_usage != ISL_AUX_USAGE_NONE;
-
- /* Clear color is specified as ints or floats and the conversion is done by
- * the sampler. If we have a texture view, we would have to perform the
- * clear color conversion manually. Just disable clear color.
- */
- if (!isl_formats_are_fast_clear_compatible(mt->surf.format, view_format))
- clear_supported = false;
-
- brw_miptree_prepare_access(brw, mt, start_level, num_levels,
- start_layer, num_layers,
- aux_usage, clear_supported);
-}
-
-void
-brw_miptree_prepare_image(struct brw_context *brw, struct brw_mipmap_tree *mt)
-{
- /* The data port doesn't understand any compression */
- brw_miptree_prepare_access(brw, mt, 0, INTEL_REMAINING_LEVELS,
- 0, INTEL_REMAINING_LAYERS,
- ISL_AUX_USAGE_NONE, false);
-}
-
-enum isl_aux_usage
-brw_miptree_render_aux_usage(struct brw_context *brw,
- struct brw_mipmap_tree *mt,
- enum isl_format render_format,
- bool blend_enabled,
- bool draw_aux_disabled)
-{
- struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- if (draw_aux_disabled)
- return ISL_AUX_USAGE_NONE;
-
- switch (mt->aux_usage) {
- case ISL_AUX_USAGE_MCS:
- assert(mt->aux_buf);
- return ISL_AUX_USAGE_MCS;
-
- case ISL_AUX_USAGE_CCS_D:
- case ISL_AUX_USAGE_CCS_E:
- if (!mt->aux_buf) {
- assert(mt->aux_usage == ISL_AUX_USAGE_CCS_D);
- return ISL_AUX_USAGE_NONE;
- }
-
- /* gfx9+ hardware technically supports non-0/1 clear colors with sRGB
- * formats. However, there are issues with blending where it doesn't
- * properly apply the sRGB curve to the clear color when blending.
- */
- if (devinfo->ver >= 9 && blend_enabled &&
- isl_format_is_srgb(render_format) &&
- !isl_color_value_is_zero_one(mt->fast_clear_color, render_format))
- return ISL_AUX_USAGE_NONE;
-
- if (mt->aux_usage == ISL_AUX_USAGE_CCS_E &&
- format_ccs_e_compat_with_miptree(&brw->screen->devinfo,
- mt, render_format))
- return ISL_AUX_USAGE_CCS_E;
-
- /* Otherwise, we have to fall back to CCS_D */
- return ISL_AUX_USAGE_CCS_D;
-
- default:
- return ISL_AUX_USAGE_NONE;
- }
-}
-
-void
-brw_miptree_prepare_render(struct brw_context *brw,
- struct brw_mipmap_tree *mt, uint32_t level,
- uint32_t start_layer, uint32_t layer_count,
- enum isl_aux_usage aux_usage)
-{
- brw_miptree_prepare_access(brw, mt, level, 1, start_layer, layer_count,
- aux_usage, aux_usage != ISL_AUX_USAGE_NONE);
-}
-
-void
-brw_miptree_finish_render(struct brw_context *brw,
- struct brw_mipmap_tree *mt, uint32_t level,
- uint32_t start_layer, uint32_t layer_count,
- enum isl_aux_usage aux_usage)
-{
- assert(_mesa_is_format_color_format(mt->format));
-
- brw_miptree_finish_write(brw, mt, level, start_layer, layer_count,
- aux_usage);
-}
-
-void
-brw_miptree_prepare_depth(struct brw_context *brw,
- struct brw_mipmap_tree *mt, uint32_t level,
- uint32_t start_layer, uint32_t layer_count)
-{
- brw_miptree_prepare_access(brw, mt, level, 1, start_layer, layer_count,
- mt->aux_usage, mt->aux_buf != NULL);
-}
-
-void
-brw_miptree_finish_depth(struct brw_context *brw,
- struct brw_mipmap_tree *mt, uint32_t level,
- uint32_t start_layer, uint32_t layer_count,
- bool depth_written)
-{
- if (depth_written) {
- brw_miptree_finish_write(brw, mt, level, start_layer, layer_count,
- mt->aux_usage);
- }
-}
-
-void
-brw_miptree_prepare_external(struct brw_context *brw,
- struct brw_mipmap_tree *mt)
-{
- enum isl_aux_usage aux_usage = ISL_AUX_USAGE_NONE;
- bool supports_fast_clear = false;
-
- const struct isl_drm_modifier_info *mod_info =
- isl_drm_modifier_get_info(mt->drm_modifier);
-
- if (mod_info && mod_info->aux_usage != ISL_AUX_USAGE_NONE) {
- /* CCS_E is the only supported aux for external images and it's only
- * supported on very simple images.
- */
- assert(mod_info->aux_usage == ISL_AUX_USAGE_CCS_E);
- assert(_mesa_is_format_color_format(mt->format));
- assert(mt->first_level == 0 && mt->last_level == 0);
- assert(mt->surf.logical_level0_px.depth == 1);
- assert(mt->surf.logical_level0_px.array_len == 1);
- assert(mt->surf.samples == 1);
- assert(mt->aux_buf != NULL);
-
- aux_usage = mod_info->aux_usage;
- supports_fast_clear = mod_info->supports_clear_color;
- }
-
- brw_miptree_prepare_access(brw, mt, 0, INTEL_REMAINING_LEVELS,
- 0, INTEL_REMAINING_LAYERS,
- aux_usage, supports_fast_clear);
-}
-
-void
-brw_miptree_finish_external(struct brw_context *brw,
- struct brw_mipmap_tree *mt)
-{
- if (!mt->aux_buf)
- return;
-
- /* We don't know the actual aux state of the aux surface. The previous
- * owner could have given it to us in a number of different states.
- * Because we don't know the aux state, we reset the aux state to the
- * least common denominator of possible valid states.
- */
- enum isl_aux_state default_aux_state =
- isl_drm_modifier_get_default_aux_state(mt->drm_modifier);
- assert(mt->last_level == mt->first_level);
- brw_miptree_set_aux_state(brw, mt, 0, 0, INTEL_REMAINING_LAYERS,
- default_aux_state);
-}
-
-/**
- * Make it possible to share the BO backing the given miptree with another
- * process or another miptree.
- *
- * Fast color clears are unsafe with shared buffers, so we need to resolve and
- * then discard the MCS buffer, if present. We also set the no_ccs flag to
- * ensure that no MCS buffer gets allocated in the future.
- *
- * HiZ is similarly unsafe with shared buffers.
- */
-void
-brw_miptree_make_shareable(struct brw_context *brw,
- struct brw_mipmap_tree *mt)
-{
- /* MCS buffers are also used for multisample buffers, but we can't resolve
- * away a multisample MCS buffer because it's an integral part of how the
- * pixel data is stored. Fortunately this code path should never be
- * reached for multisample buffers.
- */
- assert(mt->surf.msaa_layout == ISL_MSAA_LAYOUT_NONE ||
- mt->surf.samples == 1);
-
- brw_miptree_prepare_access(brw, mt, 0, INTEL_REMAINING_LEVELS,
- 0, INTEL_REMAINING_LAYERS,
- ISL_AUX_USAGE_NONE, false);
-
- if (mt->aux_buf) {
- brw_miptree_aux_buffer_free(mt->aux_buf);
- mt->aux_buf = NULL;
-
- /* Make future calls of brw_miptree_level_has_hiz() return false. */
- for (uint32_t l = mt->first_level; l <= mt->last_level; ++l) {
- mt->level[l].has_hiz = false;
- }
-
- free(mt->aux_state);
- mt->aux_state = NULL;
- brw->ctx.NewDriverState |= BRW_NEW_AUX_STATE;
- }
-
- mt->aux_usage = ISL_AUX_USAGE_NONE;
- mt->supports_fast_clear = false;
-}
-
-
-/**
- * \brief Get pointer offset into stencil buffer.
- *
- * The stencil buffer is W tiled. Since the GTT is incapable of W fencing, we
- * must decode the tile's layout in software.
- *
- * See
- * - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.2.1 W-Major Tile
- * Format.
- * - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.3 Tiling Algorithm
- *
- * Even though the returned offset is always positive, the return type is
- * signed due to
- * commit e8b1c6d6f55f5be3bef25084fdd8b6127517e137
- * mesa: Fix return type of _mesa_get_format_bytes() (#37351)
- */
-static intptr_t
-brw_offset_S8(uint32_t stride, uint32_t x, uint32_t y, bool swizzled)
-{
- uint32_t tile_size = 4096;
- uint32_t tile_width = 64;
- uint32_t tile_height = 64;
- uint32_t row_size = 64 * stride / 2; /* Two rows are interleaved. */
-
- uint32_t tile_x = x / tile_width;
- uint32_t tile_y = y / tile_height;
-
- /* The byte's address relative to the tile's base addres. */
- uint32_t byte_x = x % tile_width;
- uint32_t byte_y = y % tile_height;
-
- uintptr_t u = tile_y * row_size
- + tile_x * tile_size
- + 512 * (byte_x / 8)
- + 64 * (byte_y / 8)
- + 32 * ((byte_y / 4) % 2)
- + 16 * ((byte_x / 4) % 2)
- + 8 * ((byte_y / 2) % 2)
- + 4 * ((byte_x / 2) % 2)
- + 2 * (byte_y % 2)
- + 1 * (byte_x % 2);
-
- if (swizzled) {
- /* adjust for bit6 swizzling */
- if (((byte_x / 8) % 2) == 1) {
- if (((byte_y / 8) % 2) == 0) {
- u += 64;
- } else {
- u -= 64;
- }
- }
- }
-
- return u;
-}
-
-void
-brw_miptree_updownsample(struct brw_context *brw,
- struct brw_mipmap_tree *src,
- struct brw_mipmap_tree *dst)
-{
- unsigned src_w = src->surf.logical_level0_px.width;
- unsigned src_h = src->surf.logical_level0_px.height;
- unsigned dst_w = dst->surf.logical_level0_px.width;
- unsigned dst_h = dst->surf.logical_level0_px.height;
-
- brw_blorp_blit_miptrees(brw,
- src, 0 /* level */, 0 /* layer */,
- src->format, SWIZZLE_XYZW,
- dst, 0 /* level */, 0 /* layer */, dst->format,
- 0, 0, src_w, src_h,
- 0, 0, dst_w, dst_h,
- GL_NEAREST, false, false /*mirror x, y*/,
- false, false);
-
- if (src->stencil_mt) {
- src_w = src->stencil_mt->surf.logical_level0_px.width;
- src_h = src->stencil_mt->surf.logical_level0_px.height;
- dst_w = dst->stencil_mt->surf.logical_level0_px.width;
- dst_h = dst->stencil_mt->surf.logical_level0_px.height;
-
- brw_blorp_blit_miptrees(brw,
- src->stencil_mt, 0 /* level */, 0 /* layer */,
- src->stencil_mt->format, SWIZZLE_XYZW,
- dst->stencil_mt, 0 /* level */, 0 /* layer */,
- dst->stencil_mt->format,
- 0, 0, src_w, src_h,
- 0, 0, dst_w, dst_h,
- GL_NEAREST, false, false /*mirror x, y*/,
- false, false /* decode/encode srgb */);
- }
-}
-
-void
-brw_update_r8stencil(struct brw_context *brw,
- struct brw_mipmap_tree *mt)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- assert(devinfo->ver >= 7);
- struct brw_mipmap_tree *src =
- mt->format == MESA_FORMAT_S_UINT8 ? mt : mt->stencil_mt;
- if (!src || devinfo->ver >= 8)
- return;
-
- assert(src->surf.size_B > 0);
-
- if (!mt->shadow_mt) {
- assert(devinfo->ver > 6); /* Handle MIPTREE_LAYOUT_GFX6_HIZ_STENCIL */
- mt->shadow_mt = make_surface(
- brw,
- src->target,
- MESA_FORMAT_R_UINT8,
- src->first_level, src->last_level,
- src->surf.logical_level0_px.width,
- src->surf.logical_level0_px.height,
- src->surf.dim == ISL_SURF_DIM_3D ?
- src->surf.logical_level0_px.depth :
- src->surf.logical_level0_px.array_len,
- src->surf.samples,
- ISL_TILING_Y0_BIT,
- ISL_SURF_USAGE_TEXTURE_BIT,
- BO_ALLOC_BUSY, 0, NULL);
- assert(mt->shadow_mt);
- }
-
- if (src->shadow_needs_update == false)
- return;
-
- struct brw_mipmap_tree *dst = mt->shadow_mt;
-
- for (int level = src->first_level; level <= src->last_level; level++) {
- const unsigned depth = src->surf.dim == ISL_SURF_DIM_3D ?
- minify(src->surf.phys_level0_sa.depth, level) :
- src->surf.phys_level0_sa.array_len;
-
- for (unsigned layer = 0; layer < depth; layer++) {
- brw_blorp_copy_miptrees(brw,
- src, level, layer,
- dst, level, layer,
- 0, 0, 0, 0,
- minify(src->surf.logical_level0_px.width,
- level),
- minify(src->surf.logical_level0_px.height,
- level));
- }
- }
-
- brw_cache_flush_for_read(brw, dst->bo);
- src->shadow_needs_update = false;
-}
-
-static void *
-brw_miptree_map_raw(struct brw_context *brw,
- struct brw_mipmap_tree *mt,
- GLbitfield mode)
-{
- struct brw_bo *bo = mt->bo;
-
- if (brw_batch_references(&brw->batch, bo))
- brw_batch_flush(brw);
-
- return brw_bo_map(brw, bo, mode);
-}
-
-static void
-brw_miptree_unmap_raw(struct brw_mipmap_tree *mt)
-{
- brw_bo_unmap(mt->bo);
-}
-
-static void
-brw_miptree_unmap_map(struct brw_context *brw,
- struct brw_mipmap_tree *mt,
- struct brw_miptree_map *map,
- unsigned int level, unsigned int slice)
-{
- brw_miptree_unmap_raw(mt);
-}
-
-static void
-brw_miptree_map_map(struct brw_context *brw,
- struct brw_mipmap_tree *mt,
- struct brw_miptree_map *map,
- unsigned int level, unsigned int slice)
-{
- unsigned int bw, bh;
- void *base;
- unsigned int image_x, image_y;
- intptr_t x = map->x;
- intptr_t y = map->y;
-
- /* For compressed formats, the stride is the number of bytes per
- * row of blocks. brw_miptree_get_image_offset() already does
- * the divide.
- */
- _mesa_get_format_block_size(mt->format, &bw, &bh);
- assert(y % bh == 0);
- assert(x % bw == 0);
- y /= bh;
- x /= bw;
-
- brw_miptree_access_raw(brw, mt, level, slice,
- map->mode & GL_MAP_WRITE_BIT);
-
- base = brw_miptree_map_raw(brw, mt, map->mode);
-
- if (base == NULL)
- map->ptr = NULL;
- else {
- base += mt->offset;
-
- /* Note that in the case of cube maps, the caller must have passed the
- * slice number referencing the face.
- */
- brw_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
- x += image_x;
- y += image_y;
-
- map->stride = mt->surf.row_pitch_B;
- map->ptr = base + y * map->stride + x * mt->cpp;
- }
-
- DBG("%s: %d,%d %dx%d from mt %p (%s) "
- "%"PRIiPTR",%"PRIiPTR" = %p/%d\n", __func__,
- map->x, map->y, map->w, map->h,
- mt, _mesa_get_format_name(mt->format),
- x, y, map->ptr, map->stride);
-
- map->unmap = brw_miptree_unmap_map;
-}
-
-static void
-brw_miptree_unmap_blit(struct brw_context *brw,
- struct brw_mipmap_tree *mt,
- struct brw_miptree_map *map,
- unsigned int level,
- unsigned int slice)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- struct gl_context *ctx = &brw->ctx;
-
- brw_miptree_unmap_raw(map->linear_mt);
-
- if (map->mode & GL_MAP_WRITE_BIT) {
- if (devinfo->ver >= 6) {
- brw_blorp_copy_miptrees(brw, map->linear_mt, 0, 0,
- mt, level, slice,
- 0, 0, map->x, map->y, map->w, map->h);
- } else {
- bool ok = brw_miptree_copy(brw,
- map->linear_mt, 0, 0, 0, 0,
- mt, level, slice, map->x, map->y,
- map->w, map->h);
- WARN_ONCE(!ok, "Failed to blit from linear temporary mapping");
- }
- }
-
- brw_miptree_release(&map->linear_mt);
-}
-
-/* Compute extent parameters for use with tiled_memcpy functions.
- * xs are in units of bytes and ys are in units of strides.
- */
-static inline void
-tile_extents(struct brw_mipmap_tree *mt, struct brw_miptree_map *map,
- unsigned int level, unsigned int slice, unsigned int *x1_B,
- unsigned int *x2_B, unsigned int *y1_el, unsigned int *y2_el)
-{
- unsigned int block_width, block_height;
- unsigned int x0_el, y0_el;
-
- _mesa_get_format_block_size(mt->format, &block_width, &block_height);
-
- assert(map->x % block_width == 0);
- assert(map->y % block_height == 0);
-
- brw_miptree_get_image_offset(mt, level, slice, &x0_el, &y0_el);
- *x1_B = (map->x / block_width + x0_el) * mt->cpp;
- *y1_el = map->y / block_height + y0_el;
- *x2_B = (DIV_ROUND_UP(map->x + map->w, block_width) + x0_el) * mt->cpp;
- *y2_el = DIV_ROUND_UP(map->y + map->h, block_height) + y0_el;
-}
-
-static void
-brw_miptree_unmap_tiled_memcpy(struct brw_context *brw,
- struct brw_mipmap_tree *mt,
- struct brw_miptree_map *map,
- unsigned int level,
- unsigned int slice)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- if (map->mode & GL_MAP_WRITE_BIT) {
- unsigned int x1, x2, y1, y2;
- tile_extents(mt, map, level, slice, &x1, &x2, &y1, &y2);
-
- char *dst = brw_miptree_map_raw(brw, mt, map->mode | MAP_RAW);
- dst += mt->offset;
-
- isl_memcpy_linear_to_tiled(
- x1, x2, y1, y2, dst, map->ptr, mt->surf.row_pitch_B, map->stride,
- devinfo->has_bit6_swizzle, mt->surf.tiling, ISL_MEMCPY);
-
- brw_miptree_unmap_raw(mt);
- }
- align_free(map->buffer);
- map->buffer = map->ptr = NULL;
-}
-
-/**
- * Determine which copy function to use for the given format combination
- *
- * The only two possible copy functions which are ever returned are a
- * direct memcpy and a RGBA <-> BGRA copy function. Since RGBA -> BGRA and
- * BGRA -> RGBA are exactly the same operation (and memcpy is obviously
- * symmetric), it doesn't matter whether the copy is from the tiled image
- * to the untiled or vice versa. The copy function required is the same in
- * either case so this function can be used.
- *
- * \param[in] tiledFormat The format of the tiled image
- * \param[in] format The GL format of the client data
- * \param[in] type The GL type of the client data
- * \param[out] mem_copy Will be set to one of either the standard
- * library's memcpy or a different copy function
- * that performs an RGBA to BGRA conversion
- * \param[out] cpp Number of bytes per channel
- *
- * \return true if the format and type combination are valid
- */
-isl_memcpy_type
-brw_miptree_get_memcpy_type(mesa_format tiledFormat, GLenum format, GLenum type,
- uint32_t *cpp)
-{
- if (type == GL_UNSIGNED_INT_8_8_8_8_REV &&
- !(format == GL_RGBA || format == GL_BGRA))
- return ISL_MEMCPY_INVALID; /* Invalid type/format combination */
-
- if ((tiledFormat == MESA_FORMAT_L_UNORM8 && format == GL_LUMINANCE) ||
- (tiledFormat == MESA_FORMAT_A_UNORM8 && format == GL_ALPHA)) {
- *cpp = 1;
- return ISL_MEMCPY;
- } else if ((tiledFormat == MESA_FORMAT_B8G8R8A8_UNORM) ||
- (tiledFormat == MESA_FORMAT_B8G8R8X8_UNORM) ||
- (tiledFormat == MESA_FORMAT_B8G8R8A8_SRGB) ||
- (tiledFormat == MESA_FORMAT_B8G8R8X8_SRGB)) {
- *cpp = 4;
- if (format == GL_BGRA) {
- return ISL_MEMCPY;
- } else if (format == GL_RGBA) {
- return ISL_MEMCPY_BGRA8;
- }
- } else if ((tiledFormat == MESA_FORMAT_R8G8B8A8_UNORM) ||
- (tiledFormat == MESA_FORMAT_R8G8B8X8_UNORM) ||
- (tiledFormat == MESA_FORMAT_R8G8B8A8_SRGB) ||
- (tiledFormat == MESA_FORMAT_R8G8B8X8_SRGB)) {
- *cpp = 4;
- if (format == GL_BGRA) {
- /* Copying from RGBA to BGRA is the same as BGRA to RGBA so we can
- * use the same function.
- */
- return ISL_MEMCPY_BGRA8;
- } else if (format == GL_RGBA) {
- return ISL_MEMCPY;
- }
- }
-
- return ISL_MEMCPY_INVALID;
-}
-
-static void
-brw_miptree_map_tiled_memcpy(struct brw_context *brw,
- struct brw_mipmap_tree *mt,
- struct brw_miptree_map *map,
- unsigned int level, unsigned int slice)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- brw_miptree_access_raw(brw, mt, level, slice,
- map->mode & GL_MAP_WRITE_BIT);
-
- unsigned int x1, x2, y1, y2;
- tile_extents(mt, map, level, slice, &x1, &x2, &y1, &y2);
- map->stride = ALIGN(_mesa_format_row_stride(mt->format, map->w), 16);
-
- /* The tiling and detiling functions require that the linear buffer
- * has proper 16-byte alignment (that is, its `x0` is 16-byte
- * aligned). Here we over-allocate the linear buffer by enough
- * bytes to get the proper alignment.
- */
- map->buffer = align_malloc(map->stride * (y2 - y1) + (x1 & 0xf), 16);
- map->ptr = (char *)map->buffer + (x1 & 0xf);
- assert(map->buffer);
-
- if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
- char *src = brw_miptree_map_raw(brw, mt, map->mode | MAP_RAW);
- src += mt->offset;
-
- const isl_memcpy_type copy_type =
-#if defined(USE_SSE41)
- cpu_has_sse4_1 ? ISL_MEMCPY_STREAMING_LOAD :
-#endif
- ISL_MEMCPY;
-
- isl_memcpy_tiled_to_linear(
- x1, x2, y1, y2, map->ptr, src, map->stride,
- mt->surf.row_pitch_B, devinfo->has_bit6_swizzle, mt->surf.tiling,
- copy_type);
-
- brw_miptree_unmap_raw(mt);
- }
-
- map->unmap = brw_miptree_unmap_tiled_memcpy;
-}
-
-static void
-brw_miptree_map_blit(struct brw_context *brw,
- struct brw_mipmap_tree *mt,
- struct brw_miptree_map *map,
- unsigned int level, unsigned int slice)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- map->linear_mt = make_surface(brw, GL_TEXTURE_2D, mt->format,
- 0, 0, map->w, map->h, 1, 1,
- ISL_TILING_LINEAR_BIT,
- ISL_SURF_USAGE_RENDER_TARGET_BIT |
- ISL_SURF_USAGE_TEXTURE_BIT,
- 0, 0, NULL);
-
- if (!map->linear_mt) {
- fprintf(stderr, "Failed to allocate blit temporary\n");
- goto fail;
- }
- map->stride = map->linear_mt->surf.row_pitch_B;
-
- /* One of either READ_BIT or WRITE_BIT or both is set. READ_BIT implies no
- * INVALIDATE_RANGE_BIT. WRITE_BIT needs the original values read in unless
- * invalidate is set, since we'll be writing the whole rectangle from our
- * temporary buffer back out.
- */
- if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
- if (devinfo->ver >= 6) {
- brw_blorp_copy_miptrees(brw, mt, level, slice,
- map->linear_mt, 0, 0,
- map->x, map->y, 0, 0, map->w, map->h);
- } else {
- if (!brw_miptree_copy(brw,
- mt, level, slice, map->x, map->y,
- map->linear_mt, 0, 0, 0, 0,
- map->w, map->h)) {
- fprintf(stderr, "Failed to blit\n");
- goto fail;
- }
- }
- }
-
- map->ptr = brw_miptree_map_raw(brw, map->linear_mt, map->mode);
-
- DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __func__,
- map->x, map->y, map->w, map->h,
- mt, _mesa_get_format_name(mt->format),
- level, slice, map->ptr, map->stride);
-
- map->unmap = brw_miptree_unmap_blit;
- return;
-
-fail:
- brw_miptree_release(&map->linear_mt);
- map->ptr = NULL;
- map->stride = 0;
-}
-
-/**
- * "Map" a buffer by copying it to an untiled temporary using MOVNTDQA.
- */
-#if defined(USE_SSE41)
-static void
-brw_miptree_unmap_movntdqa(struct brw_context *brw,
- struct brw_mipmap_tree *mt,
- struct brw_miptree_map *map,
- unsigned int level,
- unsigned int slice)
-{
- align_free(map->buffer);
- map->buffer = NULL;
- map->ptr = NULL;
-}
-
-static void
-brw_miptree_map_movntdqa(struct brw_context *brw,
- struct brw_mipmap_tree *mt,
- struct brw_miptree_map *map,
- unsigned int level, unsigned int slice)
-{
- assert(map->mode & GL_MAP_READ_BIT);
- assert(!(map->mode & GL_MAP_WRITE_BIT));
-
- brw_miptree_access_raw(brw, mt, level, slice, false);
-
- DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __func__,
- map->x, map->y, map->w, map->h,
- mt, _mesa_get_format_name(mt->format),
- level, slice, map->ptr, map->stride);
-
- /* Map the original image */
- uint32_t image_x;
- uint32_t image_y;
- brw_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
- image_x += map->x;
- image_y += map->y;
-
- void *src = brw_miptree_map_raw(brw, mt, map->mode);
- if (!src)
- return;
-
- src += mt->offset;
-
- src += image_y * mt->surf.row_pitch_B;
- src += image_x * mt->cpp;
-
- /* Due to the pixel offsets for the particular image being mapped, our
- * src pointer may not be 16-byte aligned. However, if the pitch is
- * divisible by 16, then the amount by which it's misaligned will remain
- * consistent from row to row.
- */
- assert((mt->surf.row_pitch_B % 16) == 0);
- const int misalignment = ((uintptr_t) src) & 15;
-
- /* Create an untiled temporary buffer for the mapping. */
- const unsigned width_bytes = _mesa_format_row_stride(mt->format, map->w);
-
- map->stride = ALIGN(misalignment + width_bytes, 16);
-
- map->buffer = align_malloc(map->stride * map->h, 16);
- /* Offset the destination so it has the same misalignment as src. */
- map->ptr = map->buffer + misalignment;
-
- assert((((uintptr_t) map->ptr) & 15) == misalignment);
-
- for (uint32_t y = 0; y < map->h; y++) {
- void *dst_ptr = map->ptr + y * map->stride;
- void *src_ptr = src + y * mt->surf.row_pitch_B;
-
- _mesa_streaming_load_memcpy(dst_ptr, src_ptr, width_bytes);
- }
-
- brw_miptree_unmap_raw(mt);
-
- map->unmap = brw_miptree_unmap_movntdqa;
-}
-#endif
-
-static void
-brw_miptree_unmap_s8(struct brw_context *brw,
- struct brw_mipmap_tree *mt,
- struct brw_miptree_map *map,
- unsigned int level,
- unsigned int slice)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- if (map->mode & GL_MAP_WRITE_BIT) {
- unsigned int image_x, image_y;
- uint8_t *untiled_s8_map = map->ptr;
- uint8_t *tiled_s8_map = brw_miptree_map_raw(brw, mt, GL_MAP_WRITE_BIT);
-
- brw_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
-
- for (uint32_t y = 0; y < map->h; y++) {
- for (uint32_t x = 0; x < map->w; x++) {
- ptrdiff_t offset = brw_offset_S8(mt->surf.row_pitch_B,
- image_x + x + map->x,
- image_y + y + map->y,
- devinfo->has_bit6_swizzle);
- tiled_s8_map[offset] = untiled_s8_map[y * map->w + x];
- }
- }
-
- brw_miptree_unmap_raw(mt);
- }
-
- free(map->buffer);
-}
-
-static void
-brw_miptree_map_s8(struct brw_context *brw,
- struct brw_mipmap_tree *mt,
- struct brw_miptree_map *map,
- unsigned int level, unsigned int slice)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- map->stride = map->w;
- map->buffer = map->ptr = malloc(map->stride * map->h);
- if (!map->buffer)
- return;
-
- brw_miptree_access_raw(brw, mt, level, slice,
- map->mode & GL_MAP_WRITE_BIT);
-
- /* One of either READ_BIT or WRITE_BIT or both is set. READ_BIT implies no
- * INVALIDATE_RANGE_BIT. WRITE_BIT needs the original values read in unless
- * invalidate is set, since we'll be writing the whole rectangle from our
- * temporary buffer back out.
- */
- if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
- uint8_t *untiled_s8_map = map->ptr;
- uint8_t *tiled_s8_map = brw_miptree_map_raw(brw, mt, GL_MAP_READ_BIT);
- unsigned int image_x, image_y;
-
- brw_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
-
- for (uint32_t y = 0; y < map->h; y++) {
- for (uint32_t x = 0; x < map->w; x++) {
- ptrdiff_t offset = brw_offset_S8(mt->surf.row_pitch_B,
- x + image_x + map->x,
- y + image_y + map->y,
- devinfo->has_bit6_swizzle);
- untiled_s8_map[y * map->w + x] = tiled_s8_map[offset];
- }
- }
-
- brw_miptree_unmap_raw(mt);
-
- DBG("%s: %d,%d %dx%d from mt %p %d,%d = %p/%d\n", __func__,
- map->x, map->y, map->w, map->h,
- mt, map->x + image_x, map->y + image_y, map->ptr, map->stride);
- } else {
- DBG("%s: %d,%d %dx%d from mt %p = %p/%d\n", __func__,
- map->x, map->y, map->w, map->h,
- mt, map->ptr, map->stride);
- }
-
- map->unmap = brw_miptree_unmap_s8;
-}
-
-/**
- * Mapping functions for packed depth/stencil miptrees backed by real separate
- * miptrees for depth and stencil.
- *
- * On gfx7, and to support HiZ pre-gfx7, we have to have the stencil buffer
- * separate from the depth buffer. Yet at the GL API level, we have to expose
- * packed depth/stencil textures and FBO attachments, and Mesa core expects to
- * be able to map that memory for texture storage and glReadPixels-type
- * operations. We give Mesa core that access by mallocing a temporary and
- * copying the data between the actual backing store and the temporary.
- */
-static void
-brw_miptree_unmap_depthstencil(struct brw_context *brw,
- struct brw_mipmap_tree *mt,
- struct brw_miptree_map *map,
- unsigned int level,
- unsigned int slice)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- struct brw_mipmap_tree *z_mt = mt;
- struct brw_mipmap_tree *s_mt = mt->stencil_mt;
- bool map_z32f_x24s8 = mt->format == MESA_FORMAT_Z_FLOAT32;
-
- if (map->mode & GL_MAP_WRITE_BIT) {
- uint32_t *packed_map = map->ptr;
- uint8_t *s_map = brw_miptree_map_raw(brw, s_mt, GL_MAP_WRITE_BIT);
- uint32_t *z_map = brw_miptree_map_raw(brw, z_mt, GL_MAP_WRITE_BIT);
- unsigned int s_image_x, s_image_y;
- unsigned int z_image_x, z_image_y;
-
- brw_miptree_get_image_offset(s_mt, level, slice,
- &s_image_x, &s_image_y);
- brw_miptree_get_image_offset(z_mt, level, slice,
- &z_image_x, &z_image_y);
-
- for (uint32_t y = 0; y < map->h; y++) {
- for (uint32_t x = 0; x < map->w; x++) {
- ptrdiff_t s_offset = brw_offset_S8(s_mt->surf.row_pitch_B,
- x + s_image_x + map->x,
- y + s_image_y + map->y,
- devinfo->has_bit6_swizzle);
- ptrdiff_t z_offset = ((y + z_image_y + map->y) *
- (z_mt->surf.row_pitch_B / 4) +
- (x + z_image_x + map->x));
-
- if (map_z32f_x24s8) {
- z_map[z_offset] = packed_map[(y * map->w + x) * 2 + 0];
- s_map[s_offset] = packed_map[(y * map->w + x) * 2 + 1];
- } else {
- uint32_t packed = packed_map[y * map->w + x];
- s_map[s_offset] = packed >> 24;
- z_map[z_offset] = packed;
- }
- }
- }
-
- brw_miptree_unmap_raw(s_mt);
- brw_miptree_unmap_raw(z_mt);
-
- DBG("%s: %d,%d %dx%d from z mt %p (%s) %d,%d, s mt %p %d,%d = %p/%d\n",
- __func__,
- map->x, map->y, map->w, map->h,
- z_mt, _mesa_get_format_name(z_mt->format),
- map->x + z_image_x, map->y + z_image_y,
- s_mt, map->x + s_image_x, map->y + s_image_y,
- map->ptr, map->stride);
- }
-
- free(map->buffer);
-}
-
-static void
-brw_miptree_map_depthstencil(struct brw_context *brw,
- struct brw_mipmap_tree *mt,
- struct brw_miptree_map *map,
- unsigned int level, unsigned int slice)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- struct brw_mipmap_tree *z_mt = mt;
- struct brw_mipmap_tree *s_mt = mt->stencil_mt;
- bool map_z32f_x24s8 = mt->format == MESA_FORMAT_Z_FLOAT32;
- int packed_bpp = map_z32f_x24s8 ? 8 : 4;
-
- map->stride = map->w * packed_bpp;
- map->buffer = map->ptr = malloc(map->stride * map->h);
- if (!map->buffer)
- return;
-
- brw_miptree_access_raw(brw, z_mt, level, slice,
- map->mode & GL_MAP_WRITE_BIT);
- brw_miptree_access_raw(brw, s_mt, level, slice,
- map->mode & GL_MAP_WRITE_BIT);
-
- /* One of either READ_BIT or WRITE_BIT or both is set. READ_BIT implies no
- * INVALIDATE_RANGE_BIT. WRITE_BIT needs the original values read in unless
- * invalidate is set, since we'll be writing the whole rectangle from our
- * temporary buffer back out.
- */
- if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
- uint32_t *packed_map = map->ptr;
- uint8_t *s_map = brw_miptree_map_raw(brw, s_mt, GL_MAP_READ_BIT);
- uint32_t *z_map = brw_miptree_map_raw(brw, z_mt, GL_MAP_READ_BIT);
- unsigned int s_image_x, s_image_y;
- unsigned int z_image_x, z_image_y;
-
- brw_miptree_get_image_offset(s_mt, level, slice,
- &s_image_x, &s_image_y);
- brw_miptree_get_image_offset(z_mt, level, slice,
- &z_image_x, &z_image_y);
-
- for (uint32_t y = 0; y < map->h; y++) {
- for (uint32_t x = 0; x < map->w; x++) {
- int map_x = map->x + x, map_y = map->y + y;
- ptrdiff_t s_offset = brw_offset_S8(s_mt->surf.row_pitch_B,
- map_x + s_image_x,
- map_y + s_image_y,
- devinfo->has_bit6_swizzle);
- ptrdiff_t z_offset = ((map_y + z_image_y) *
- (z_mt->surf.row_pitch_B / 4) +
- (map_x + z_image_x));
- uint8_t s = s_map[s_offset];
- uint32_t z = z_map[z_offset];
-
- if (map_z32f_x24s8) {
- packed_map[(y * map->w + x) * 2 + 0] = z;
- packed_map[(y * map->w + x) * 2 + 1] = s;
- } else {
- packed_map[y * map->w + x] = (s << 24) | (z & 0x00ffffff);
- }
- }
- }
-
- brw_miptree_unmap_raw(s_mt);
- brw_miptree_unmap_raw(z_mt);
-
- DBG("%s: %d,%d %dx%d from z mt %p %d,%d, s mt %p %d,%d = %p/%d\n",
- __func__,
- map->x, map->y, map->w, map->h,
- z_mt, map->x + z_image_x, map->y + z_image_y,
- s_mt, map->x + s_image_x, map->y + s_image_y,
- map->ptr, map->stride);
- } else {
- DBG("%s: %d,%d %dx%d from mt %p = %p/%d\n", __func__,
- map->x, map->y, map->w, map->h,
- mt, map->ptr, map->stride);
- }
-
- map->unmap = brw_miptree_unmap_depthstencil;
-}
-
-/**
- * Create and attach a map to the miptree at (level, slice). Return the
- * attached map.
- */
-static struct brw_miptree_map*
-brw_miptree_attach_map(struct brw_mipmap_tree *mt,
- unsigned int level,
- unsigned int slice,
- unsigned int x,
- unsigned int y,
- unsigned int w,
- unsigned int h,
- GLbitfield mode)
-{
- struct brw_miptree_map *map = calloc(1, sizeof(*map));
-
- if (!map)
- return NULL;
-
- assert(mt->level[level].slice[slice].map == NULL);
- mt->level[level].slice[slice].map = map;
-
- map->mode = mode;
- map->x = x;
- map->y = y;
- map->w = w;
- map->h = h;
-
- return map;
-}
-
-/**
- * Release the map at (level, slice).
- */
-static void
-brw_miptree_release_map(struct brw_mipmap_tree *mt,
- unsigned int level,
- unsigned int slice)
-{
- struct brw_miptree_map **map;
-
- map = &mt->level[level].slice[slice].map;
- free(*map);
- *map = NULL;
-}
-
-static bool
-can_blit_slice(struct brw_mipmap_tree *mt,
- const struct brw_miptree_map *map)
-{
- /* See brw_miptree_blit() for details on the 32k pitch limit. */
- const unsigned src_blt_pitch = brw_miptree_blt_pitch(mt);
- const unsigned dst_blt_pitch = ALIGN(map->w * mt->cpp, 64);
- return src_blt_pitch < 32768 && dst_blt_pitch < 32768;
-}
-
-static bool
-use_blitter_to_map(struct brw_context *brw,
- struct brw_mipmap_tree *mt,
- const struct brw_miptree_map *map)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- if (devinfo->has_llc &&
- /* It's probably not worth swapping to the blit ring because of
- * all the overhead involved.
- */
- !(map->mode & GL_MAP_WRITE_BIT) &&
- !mt->compressed &&
- (mt->surf.tiling == ISL_TILING_X ||
- /* Prior to Sandybridge, the blitter can't handle Y tiling */
- (devinfo->ver >= 6 && mt->surf.tiling == ISL_TILING_Y0) ||
- /* Fast copy blit on skl+ supports all tiling formats. */
- devinfo->ver >= 9) &&
- can_blit_slice(mt, map))
- return true;
-
- if (mt->surf.tiling != ISL_TILING_LINEAR &&
- mt->bo->size >= brw->max_gtt_map_object_size) {
- assert(can_blit_slice(mt, map));
- return true;
- }
-
- return false;
-}
-
-/**
- * Parameter \a out_stride has type ptrdiff_t not because the buffer stride may
- * exceed 32 bits but to diminish the likelihood subtle bugs in pointer
- * arithmetic overflow.
- *
- * If you call this function and use \a out_stride, then you're doing pointer
- * arithmetic on \a out_ptr. The type of \a out_stride doesn't prevent all
- * bugs. The caller must still take care to avoid 32-bit overflow errors in
- * all arithmetic expressions that contain buffer offsets and pixel sizes,
- * which usually have type uint32_t or GLuint.
- */
-void
-brw_miptree_map(struct brw_context *brw,
- struct brw_mipmap_tree *mt,
- unsigned int level,
- unsigned int slice,
- unsigned int x,
- unsigned int y,
- unsigned int w,
- unsigned int h,
- GLbitfield mode,
- void **out_ptr,
- ptrdiff_t *out_stride)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- struct brw_miptree_map *map;
-
- assert(mt->surf.samples == 1);
-
- map = brw_miptree_attach_map(mt, level, slice, x, y, w, h, mode);
- if (!map){
- *out_ptr = NULL;
- *out_stride = 0;
- return;
- }
-
- if (mt->format == MESA_FORMAT_S_UINT8) {
- brw_miptree_map_s8(brw, mt, map, level, slice);
- } else if (mt->stencil_mt && !(mode & BRW_MAP_DIRECT_BIT)) {
- brw_miptree_map_depthstencil(brw, mt, map, level, slice);
- } else if (use_blitter_to_map(brw, mt, map)) {
- brw_miptree_map_blit(brw, mt, map, level, slice);
- } else if (mt->surf.tiling != ISL_TILING_LINEAR && devinfo->ver > 4) {
- brw_miptree_map_tiled_memcpy(brw, mt, map, level, slice);
-#if defined(USE_SSE41)
- } else if (!(mode & GL_MAP_WRITE_BIT) &&
- !mt->compressed && cpu_has_sse4_1 &&
- (mt->surf.row_pitch_B % 16 == 0)) {
- brw_miptree_map_movntdqa(brw, mt, map, level, slice);
-#endif
- } else {
- if (mt->surf.tiling != ISL_TILING_LINEAR)
- perf_debug("brw_miptree_map: mapping via gtt");
- brw_miptree_map_map(brw, mt, map, level, slice);
- }
-
- *out_ptr = map->ptr;
- *out_stride = map->stride;
-
- if (map->ptr == NULL)
- brw_miptree_release_map(mt, level, slice);
-}
-
-void
-brw_miptree_unmap(struct brw_context *brw,
- struct brw_mipmap_tree *mt,
- unsigned int level,
- unsigned int slice)
-{
- struct brw_miptree_map *map = mt->level[level].slice[slice].map;
-
- assert(mt->surf.samples == 1);
-
- if (!map)
- return;
-
- DBG("%s: mt %p (%s) level %d slice %d\n", __func__,
- mt, _mesa_get_format_name(mt->format), level, slice);
-
- if (map->unmap)
- map->unmap(brw, mt, map, level, slice);
-
- brw_miptree_release_map(mt, level, slice);
-}
-
-enum isl_surf_dim
-get_isl_surf_dim(GLenum target)
-{
- switch (target) {
- case GL_TEXTURE_1D:
- case GL_TEXTURE_1D_ARRAY:
- return ISL_SURF_DIM_1D;
-
- case GL_TEXTURE_2D:
- case GL_TEXTURE_2D_ARRAY:
- case GL_TEXTURE_RECTANGLE:
- case GL_TEXTURE_CUBE_MAP:
- case GL_TEXTURE_CUBE_MAP_ARRAY:
- case GL_TEXTURE_2D_MULTISAMPLE:
- case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
- case GL_TEXTURE_EXTERNAL_OES:
- return ISL_SURF_DIM_2D;
-
- case GL_TEXTURE_3D:
- return ISL_SURF_DIM_3D;
- }
-
- unreachable("Invalid texture target");
-}
-
-enum isl_dim_layout
-get_isl_dim_layout(const struct intel_device_info *devinfo,
- enum isl_tiling tiling, GLenum target)
-{
- switch (target) {
- case GL_TEXTURE_1D:
- case GL_TEXTURE_1D_ARRAY:
- return (devinfo->ver >= 9 && tiling == ISL_TILING_LINEAR ?
- ISL_DIM_LAYOUT_GFX9_1D : ISL_DIM_LAYOUT_GFX4_2D);
-
- case GL_TEXTURE_2D:
- case GL_TEXTURE_2D_ARRAY:
- case GL_TEXTURE_RECTANGLE:
- case GL_TEXTURE_2D_MULTISAMPLE:
- case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
- case GL_TEXTURE_EXTERNAL_OES:
- return ISL_DIM_LAYOUT_GFX4_2D;
-
- case GL_TEXTURE_CUBE_MAP:
- case GL_TEXTURE_CUBE_MAP_ARRAY:
- return (devinfo->ver == 4 ? ISL_DIM_LAYOUT_GFX4_3D :
- ISL_DIM_LAYOUT_GFX4_2D);
-
- case GL_TEXTURE_3D:
- return (devinfo->ver >= 9 ?
- ISL_DIM_LAYOUT_GFX4_2D : ISL_DIM_LAYOUT_GFX4_3D);
- }
-
- unreachable("Invalid texture target");
-}
-
-bool
-brw_miptree_set_clear_color(struct brw_context *brw,
- struct brw_mipmap_tree *mt,
- union isl_color_value clear_color)
-{
- if (memcmp(&mt->fast_clear_color, &clear_color, sizeof(clear_color)) != 0) {
- mt->fast_clear_color = clear_color;
- if (mt->aux_buf->clear_color_bo) {
- /* We can't update the clear color while the hardware is still using
- * the previous one for a resolve or sampling from it. Make sure that
- * there are no pending commands at this point.
- */
- brw_emit_pipe_control_flush(brw, PIPE_CONTROL_CS_STALL);
- for (int i = 0; i < 4; i++) {
- brw_store_data_imm32(brw, mt->aux_buf->clear_color_bo,
- mt->aux_buf->clear_color_offset + i * 4,
- mt->fast_clear_color.u32[i]);
- }
- brw_emit_pipe_control_flush(brw, PIPE_CONTROL_STATE_CACHE_INVALIDATE);
- }
- brw->ctx.NewDriverState |= BRW_NEW_AUX_STATE;
- return true;
- }
- return false;
-}
-
-union isl_color_value
-brw_miptree_get_clear_color(const struct brw_mipmap_tree *mt,
- struct brw_bo **clear_color_bo,
- uint64_t *clear_color_offset)
-{
- assert(mt->aux_buf);
-
- *clear_color_bo = mt->aux_buf->clear_color_bo;
- *clear_color_offset = mt->aux_buf->clear_color_offset;
- return mt->fast_clear_color;
-}
-
-static void
-brw_miptree_update_etc_shadow(struct brw_context *brw,
- struct brw_mipmap_tree *mt,
- unsigned int level,
- unsigned int slice,
- int level_w,
- int level_h)
-{
- ptrdiff_t etc_stride, shadow_stride;
- void *mptr, *sptr;
- struct brw_mipmap_tree *smt = mt->shadow_mt;
-
- assert(brw_miptree_has_etc_shadow(brw, mt));
-
- brw_miptree_map(brw, mt, level, slice, 0, 0, level_w, level_h,
- GL_MAP_READ_BIT, &mptr, &etc_stride);
- brw_miptree_map(brw, smt, level, slice, 0, 0, level_w, level_h,
- GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT,
- &sptr, &shadow_stride);
-
- if (mt->format == MESA_FORMAT_ETC1_RGB8) {
- _mesa_etc1_unpack_rgba8888(sptr, shadow_stride, mptr, etc_stride,
- level_w, level_h);
- } else {
- /* destination and source images must have the same swizzle */
- bool is_bgra = (smt->format == MESA_FORMAT_B8G8R8A8_SRGB);
- _mesa_unpack_etc2_format(sptr, shadow_stride, mptr, etc_stride,
- level_w, level_h, mt->format, is_bgra);
- }
-
- brw_miptree_unmap(brw, mt, level, slice);
- brw_miptree_unmap(brw, smt, level, slice);
-}
-
-void
-brw_miptree_update_etc_shadow_levels(struct brw_context *brw,
- struct brw_mipmap_tree *mt)
-{
- struct brw_mipmap_tree *smt;
- int num_slices;
-
- assert(mt);
- assert(mt->surf.size_B > 0);
- assert(brw_miptree_has_etc_shadow(brw, mt));
-
- smt = mt->shadow_mt;
- num_slices = smt->surf.logical_level0_px.array_len;
-
- for (int level = smt->first_level; level <= smt->last_level; level++) {
- int level_w = minify(smt->surf.logical_level0_px.width,
- level - smt->first_level);
- int level_h = minify(smt->surf.logical_level0_px.height,
- level - smt->first_level);
-
- for (unsigned int slice = 0; slice < num_slices; slice++) {
- brw_miptree_update_etc_shadow(brw, mt, level, slice, level_w,
- level_h);
- }
- }
-
- mt->shadow_needs_update = false;
-}
+++ /dev/null
-/*
- * Copyright 2006 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/** @file intel_mipmap_tree.h
- *
- * This file defines the structure that wraps a BO and describes how the
- * mipmap levels and slices of a texture are laid out.
- *
- * The hardware has a fixed layout of a texture depending on parameters such
- * as the target/type (2D, 3D, CUBE), width, height, pitch, and number of
- * mipmap levels. The individual level/layer slices are each 2D rectangles of
- * pixels at some x/y offset from the start of the brw_bo.
- *
- * Original OpenGL allowed texture miplevels to be specified in arbitrary
- * order, and a texture may change size over time. Thus, each
- * brw_texture_image has a reference to a miptree that contains the pixel
- * data sized appropriately for it, which will later be referenced by/copied
- * to the brw_texture_object at draw time (brw_finalize_mipmap_tree()) so
- * that there's a single miptree for the complete texture.
- */
-
-#ifndef BRW_MIPMAP_TREE_H
-#define BRW_MIPMAP_TREE_H
-
-#include <assert.h>
-
-#include "main/mtypes.h"
-#include "isl/isl.h"
-#include "blorp/blorp.h"
-#include "brw_bufmgr.h"
-#include "brw_context.h"
-#include <GL/internal/dri_interface.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct brw_context;
-struct brw_renderbuffer;
-
-struct brw_texture_image;
-
-/**
- * This bit extends the set of GL_MAP_*_BIT enums.
- *
- * When calling brw_miptree_map() on an ETC-transcoded-to-RGB miptree or a
- * depthstencil-split-to-separate-stencil miptree, we'll normally make a
- * temporary and recreate the kind of data requested by Mesa core, since we're
- * satisfying some glGetTexImage() request or something.
- *
- * However, occasionally you want to actually map the miptree's current data
- * without transcoding back. This flag to brw_miptree_map() gets you that.
- */
-#define BRW_MAP_DIRECT_BIT 0x80000000
-
-struct brw_miptree_map {
- /** Bitfield of GL_MAP_*_BIT and BRW_MAP_*_BIT. */
- GLbitfield mode;
- /** Region of interest for the map. */
- int x, y, w, h;
- /** Possibly malloced temporary buffer for the mapping. */
- void *buffer;
- /** Possible pointer to a temporary linear miptree for the mapping. */
- struct brw_mipmap_tree *linear_mt;
- /** Pointer to the start of (map_x, map_y) returned by the mapping. */
- void *ptr;
- /** Stride of the mapping. */
- int stride;
-
- void (*unmap)(struct brw_context *brw,
- struct brw_mipmap_tree *mt,
- struct brw_miptree_map *map,
- unsigned int level,
- unsigned int slice);
-};
-
-/**
- * Describes the location of each texture image within a miptree.
- */
-struct brw_mipmap_level
-{
- /** Offset to this miptree level, used in computing x_offset. */
- GLuint level_x;
- /** Offset to this miptree level, used in computing y_offset. */
- GLuint level_y;
-
- /**
- * \brief Is HiZ enabled for this level?
- *
- * If \c mt->level[l].has_hiz is set, then (1) \c mt->hiz_mt has been
- * allocated and (2) the HiZ memory for the slices in this level reside at
- * \c mt->hiz_mt->level[l].
- */
- bool has_hiz;
-
- /**
- * \brief List of 2D images in this mipmap level.
- *
- * This may be a list of cube faces, array slices in 2D array texture, or
- * layers in a 3D texture. The list's length is \c depth.
- */
- struct brw_mipmap_slice {
- /**
- * Mapping information. Persistent for the duration of
- * brw_miptree_map/unmap on this slice.
- */
- struct brw_miptree_map *map;
- } *slice;
-};
-
-/**
- * Miptree aux buffer. These buffers are associated with a miptree, but the
- * format is managed by the hardware.
- *
- * For Gfx7+, we always give the hardware the start of the buffer, and let it
- * handle all accesses to the buffer. Therefore we don't need the full miptree
- * layout structure for this buffer.
- */
-struct brw_miptree_aux_buffer
-{
- struct isl_surf surf;
-
- /**
- * Buffer object containing the pixel data.
- *
- * @see RENDER_SURFACE_STATE.AuxiliarySurfaceBaseAddress
- * @see 3DSTATE_HIER_DEPTH_BUFFER.AuxiliarySurfaceBaseAddress
- */
- struct brw_bo *bo;
-
- /**
- * Offset into bo where the surface starts.
- *
- * @see brw_mipmap_aux_buffer::bo
- *
- * @see RENDER_SURFACE_STATE.AuxiliarySurfaceBaseAddress
- * @see 3DSTATE_DEPTH_BUFFER.SurfaceBaseAddress
- * @see 3DSTATE_HIER_DEPTH_BUFFER.SurfaceBaseAddress
- * @see 3DSTATE_STENCIL_BUFFER.SurfaceBaseAddress
- */
- uint32_t offset;
-
- /**
- * Buffer object containing the indirect clear color.
- *
- * @see create_ccs_buf_for_image
- * @see RENDER_SURFACE_STATE.ClearValueAddress
- */
- struct brw_bo *clear_color_bo;
-
- /**
- * Offset into bo where the clear color can be found.
- *
- * @see create_ccs_buf_for_image
- * @see RENDER_SURFACE_STATE.ClearValueAddress
- */
- uint32_t clear_color_offset;
-};
-
-struct brw_mipmap_tree
-{
- struct isl_surf surf;
-
- /**
- * Buffer object containing the surface.
- *
- * @see brw_mipmap_tree::offset
- * @see RENDER_SURFACE_STATE.SurfaceBaseAddress
- * @see RENDER_SURFACE_STATE.AuxiliarySurfaceBaseAddress
- * @see 3DSTATE_DEPTH_BUFFER.SurfaceBaseAddress
- * @see 3DSTATE_HIER_DEPTH_BUFFER.SurfaceBaseAddress
- * @see 3DSTATE_STENCIL_BUFFER.SurfaceBaseAddress
- */
- struct brw_bo *bo;
-
- /**
- * @brief One of GL_TEXTURE_2D, GL_TEXTURE_2D_ARRAY, etc.
- *
- * @see RENDER_SURFACE_STATE.SurfaceType
- * @see RENDER_SURFACE_STATE.SurfaceArray
- * @see 3DSTATE_DEPTH_BUFFER.SurfaceType
- */
- GLenum target;
-
- /**
- * Generally, this is just the same as the gl_texture_image->TexFormat or
- * gl_renderbuffer->Format.
- *
- * However, for textures and renderbuffers with packed depth/stencil formats
- * on hardware where we want or need to use separate stencil, there will be
- * two miptrees for storing the data. If the depthstencil texture or rb is
- * MESA_FORMAT_Z32_FLOAT_S8X24_UINT, then mt->format will be
- * MESA_FORMAT_Z_FLOAT32, otherwise for MESA_FORMAT_Z24_UNORM_S8_UINT objects it will be
- * MESA_FORMAT_Z24_UNORM_X8_UINT.
- *
- * @see RENDER_SURFACE_STATE.SurfaceFormat
- * @see 3DSTATE_DEPTH_BUFFER.SurfaceFormat
- */
- mesa_format format;
-
- GLuint first_level;
- GLuint last_level;
-
- /** Bytes per pixel (or bytes per block if compressed) */
- GLuint cpp;
-
- bool compressed;
-
- /* Includes image offset tables: */
- struct brw_mipmap_level level[MAX_TEXTURE_LEVELS];
-
- /**
- * Offset into bo where the surface starts.
- *
- * @see brw_mipmap_tree::bo
- *
- * @see RENDER_SURFACE_STATE.AuxiliarySurfaceBaseAddress
- * @see 3DSTATE_DEPTH_BUFFER.SurfaceBaseAddress
- * @see 3DSTATE_HIER_DEPTH_BUFFER.SurfaceBaseAddress
- * @see 3DSTATE_STENCIL_BUFFER.SurfaceBaseAddress
- */
- uint32_t offset;
-
- /**
- * \brief The type of auxiliary compression used by this miptree.
- *
- * This describes the type of auxiliary compression that is intended to be
- * used by this miptree. An aux usage of ISL_AUX_USAGE_NONE means that
- * auxiliary compression is permanently disabled. An aux usage other than
- * ISL_AUX_USAGE_NONE does not imply that the auxiliary buffer has actually
- * been allocated nor does it imply that auxiliary compression will always
- * be enabled for this surface. For instance, with CCS_D, we may allocate
- * the CCS on-the-fly and it may not be used for texturing if the miptree
- * is fully resolved.
- */
- enum isl_aux_usage aux_usage;
-
- /**
- * \brief Whether or not this miptree supports fast clears.
- */
- bool supports_fast_clear;
-
- /**
- * \brief Maps miptree slices to their current aux state
- *
- * This two-dimensional array is indexed as [level][layer] and stores an
- * aux state for each slice.
- */
- enum isl_aux_state **aux_state;
-
- /**
- * \brief Stencil miptree for depthstencil textures.
- *
- * This miptree is used for depthstencil textures and renderbuffers that
- * require separate stencil. It always has the true copy of the stencil
- * bits, regardless of mt->format.
- *
- * \see 3DSTATE_STENCIL_BUFFER
- * \see brw_miptree_map_depthstencil()
- * \see brw_miptree_unmap_depthstencil()
- */
- struct brw_mipmap_tree *stencil_mt;
-
- /**
- * \brief Shadow miptree for sampling when the main isn't supported by HW.
- *
- * To workaround various sampler bugs and limitations, we blit the main
- * texture into a new texture that can be sampled.
- *
- * This miptree may be used for:
- * - Stencil texturing (pre-BDW) as required by GL_ARB_stencil_texturing.
- * - To store the decompressed ETC/EAC data in case we emulate the ETC
- * compression on Gen 7 or earlier GPUs.
- */
- struct brw_mipmap_tree *shadow_mt;
- bool shadow_needs_update;
-
- /**
- * \brief CCS, MCS, or HiZ auxiliary buffer.
- *
- * NULL if no auxiliary buffer is in use for this surface.
- *
- * For single-sampled color miptrees:
- * This buffer contains the Color Control Surface, which stores the
- * necessary information to implement lossless color compression (CCS_E)
- * and "fast color clear" (CCS_D) behaviour.
- *
- * For multi-sampled color miptrees:
- * This buffer contains the Multisample Control Surface, which stores the
- * necessary information to implement compressed MSAA
- * (INTEL_MSAA_FORMAT_CMS).
- *
- * For depth miptrees:
- * This buffer contains the Hierarchical Depth Buffer, which stores the
- * necessary information to implement lossless depth compression and fast
- * depth clear behavior.
- *
- * To determine if HiZ is enabled, do not check this pointer. Instead,
- * use brw_miptree_level_has_hiz().
- */
- struct brw_miptree_aux_buffer *aux_buf;
-
- /**
- * Planes 1 and 2 in case this is a planar surface.
- */
- struct brw_mipmap_tree *plane[2];
-
- /**
- * Fast clear color for this surface. For depth surfaces, the clear value
- * is stored as a float32 in the red component.
- */
- union isl_color_value fast_clear_color;
-
- /**
- * For external surfaces, this is DRM format modifier that was used to
- * create or import the surface. For internal surfaces, this will always
- * be DRM_FORMAT_MOD_INVALID.
- */
- uint64_t drm_modifier;
-
- /* These are also refcounted:
- */
- GLuint refcount;
-};
-
-bool
-brw_miptree_alloc_aux(struct brw_context *brw,
- struct brw_mipmap_tree *mt);
-
-enum brw_miptree_create_flags {
- /** No miptree create flags */
- MIPTREE_CREATE_DEFAULT = 0,
-
- /** Miptree creation should try to allocate a currently busy BO
- *
- * This may be advantageous if we know the next thing to touch the BO will
- * be the GPU because the BO will likely already be in the GTT and maybe
- * even in some caches. If there is a chance that the next thing to touch
- * the miptree BO will be the CPU, this flag should not be set.
- */
- MIPTREE_CREATE_BUSY = 1 << 0,
-
- /** Create the miptree with auxiliary compression disabled
- *
- * This does not prevent the caller of brw_miptree_create from coming
- * along later and turning auxiliary compression back on but it does mean
- * that the miptree will be created with mt->aux_usage == NONE.
- */
- MIPTREE_CREATE_NO_AUX = 1 << 1,
-};
-
-struct brw_mipmap_tree *brw_miptree_create(struct brw_context *brw,
- GLenum target,
- mesa_format format,
- GLuint first_level,
- GLuint last_level,
- GLuint width0,
- GLuint height0,
- GLuint depth0,
- GLuint num_samples,
- enum brw_miptree_create_flags flags);
-
-struct brw_mipmap_tree *
-brw_miptree_create_for_bo(struct brw_context *brw,
- struct brw_bo *bo,
- mesa_format format,
- uint32_t offset,
- uint32_t width,
- uint32_t height,
- uint32_t depth,
- int pitch,
- enum isl_tiling tiling,
- enum brw_miptree_create_flags flags);
-
-struct brw_mipmap_tree *
-brw_miptree_create_for_dri_image(struct brw_context *brw,
- __DRIimage *image,
- GLenum target,
- mesa_format format,
- bool allow_internal_aux);
-
-bool
-brw_update_winsys_renderbuffer_miptree(struct brw_context *intel,
- struct brw_renderbuffer *irb,
- struct brw_mipmap_tree *singlesample_mt,
- uint32_t width, uint32_t height,
- uint32_t pitch);
-
-/**
- * Create a miptree appropriate as the storage for a non-texture renderbuffer.
- * The miptree has the following properties:
- * - The target is GL_TEXTURE_2D.
- * - There are no levels other than the base level 0.
- * - Depth is 1.
- */
-struct brw_mipmap_tree*
-brw_miptree_create_for_renderbuffer(struct brw_context *brw,
- mesa_format format,
- uint32_t width,
- uint32_t height,
- uint32_t num_samples);
-
-mesa_format
-brw_depth_format_for_depthstencil_format(mesa_format format);
-
-mesa_format
-brw_lower_compressed_format(struct brw_context *brw, mesa_format format);
-
-unsigned
-brw_get_num_logical_layers(const struct brw_mipmap_tree *mt, unsigned level);
-
-/** \brief Assert that the level and layer are valid for the miptree. */
-void
-brw_miptree_check_level_layer(const struct brw_mipmap_tree *mt,
- uint32_t level,
- uint32_t layer);
-
-void brw_miptree_reference(struct brw_mipmap_tree **dst,
- struct brw_mipmap_tree *src);
-
-void brw_miptree_release(struct brw_mipmap_tree **mt);
-
-/* Check if an image fits an existing mipmap tree layout
- */
-bool brw_miptree_match_image(struct brw_mipmap_tree *mt,
- struct gl_texture_image *image);
-
-void
-brw_miptree_get_image_offset(const struct brw_mipmap_tree *mt,
- GLuint level, GLuint slice,
- GLuint *x, GLuint *y);
-
-enum isl_surf_dim
-get_isl_surf_dim(GLenum target);
-
-enum isl_dim_layout
-get_isl_dim_layout(const struct intel_device_info *devinfo,
- enum isl_tiling tiling, GLenum target);
-
-void
-brw_get_image_dims(struct gl_texture_image *image,
- int *width, int *height, int *depth);
-
-uint32_t
-brw_miptree_get_tile_offsets(const struct brw_mipmap_tree *mt,
- GLuint level, GLuint slice,
- uint32_t *tile_x,
- uint32_t *tile_y);
-uint32_t
-brw_miptree_get_aligned_offset(const struct brw_mipmap_tree *mt,
- uint32_t x, uint32_t y);
-
-void
-brw_miptree_copy_slice(struct brw_context *brw,
- struct brw_mipmap_tree *src_mt,
- unsigned src_level, unsigned src_layer,
- struct brw_mipmap_tree *dst_mt,
- unsigned dst_level, unsigned dst_layer);
-
-void
-brw_miptree_copy_teximage(struct brw_context *brw,
- struct brw_texture_image *brw_image,
- struct brw_mipmap_tree *dst_mt);
-
-/**
- * \name Miptree HiZ functions
- * \{
- *
- * It is safe to call the "slice_set_need_resolve" and "slice_resolve"
- * functions on a miptree without HiZ. In that case, each function is a no-op.
- */
-
-bool
-brw_miptree_level_has_hiz(const struct brw_mipmap_tree *mt, uint32_t level);
-
-/**\}*/
-
-bool
-brw_miptree_has_color_unresolved(const struct brw_mipmap_tree *mt,
- unsigned start_level, unsigned num_levels,
- unsigned start_layer, unsigned num_layers);
-
-
-#define INTEL_REMAINING_LAYERS UINT32_MAX
-#define INTEL_REMAINING_LEVELS UINT32_MAX
-
-/** Prepare a miptree for access
- *
- * This function should be called prior to any access to miptree in order to
- * perform any needed resolves.
- *
- * \param[in] start_level The first mip level to be accessed
- *
- * \param[in] num_levels The number of miplevels to be accessed or
- * INTEL_REMAINING_LEVELS to indicate every level
- * above start_level will be accessed
- *
- * \param[in] start_layer The first array slice or 3D layer to be accessed
- *
- * \param[in] num_layers The number of array slices or 3D layers be
- * accessed or INTEL_REMAINING_LAYERS to indicate
- * every layer above start_layer will be accessed
- *
- * \param[in] aux_supported Whether or not the access will support the
- * miptree's auxiliary compression format; this
- * must be false for uncompressed miptrees
- *
- * \param[in] fast_clear_supported Whether or not the access will support
- * fast clears in the miptree's auxiliary
- * compression format
- */
-void
-brw_miptree_prepare_access(struct brw_context *brw,
- struct brw_mipmap_tree *mt,
- uint32_t start_level, uint32_t num_levels,
- uint32_t start_layer, uint32_t num_layers,
- enum isl_aux_usage aux_usage,
- bool fast_clear_supported);
-
-/** Complete a write operation
- *
- * This function should be called after any operation writes to a miptree.
- * This will update the miptree's compression state so that future resolves
- * happen correctly. Technically, this function can be called before the
- * write occurs but the caller must ensure that they don't interlace
- * brw_miptree_prepare_access and brw_miptree_finish_write calls to
- * overlapping layer/level ranges.
- *
- * \param[in] level The mip level that was written
- *
- * \param[in] start_layer The first array slice or 3D layer written
- *
- * \param[in] num_layers The number of array slices or 3D layers
- * written or INTEL_REMAINING_LAYERS to indicate
- * every layer above start_layer was written
- *
- * \param[in] written_with_aux Whether or not the write was done with
- * auxiliary compression enabled
- */
-void
-brw_miptree_finish_write(struct brw_context *brw,
- struct brw_mipmap_tree *mt, uint32_t level,
- uint32_t start_layer, uint32_t num_layers,
- enum isl_aux_usage aux_usage);
-
-/** Get the auxiliary compression state of a miptree slice */
-enum isl_aux_state
-brw_miptree_get_aux_state(const struct brw_mipmap_tree *mt,
- uint32_t level, uint32_t layer);
-
-/** Set the auxiliary compression state of a miptree slice range
- *
- * This function directly sets the auxiliary compression state of a slice
- * range of a miptree. It only modifies data structures and does not do any
- * resolves. This should only be called by code which directly performs
- * compression operations such as fast clears and resolves. Most code should
- * use brw_miptree_prepare_access or brw_miptree_finish_write.
- */
-void
-brw_miptree_set_aux_state(struct brw_context *brw,
- struct brw_mipmap_tree *mt, uint32_t level,
- uint32_t start_layer, uint32_t num_layers,
- enum isl_aux_state aux_state);
-
-/**
- * Prepare a miptree for raw access
- *
- * This helper prepares the miptree for access that knows nothing about any
- * sort of compression whatsoever. This is useful when mapping the surface or
- * using it with the blitter.
- */
-static inline void
-brw_miptree_access_raw(struct brw_context *brw,
- struct brw_mipmap_tree *mt,
- uint32_t level, uint32_t layer,
- bool write)
-{
- brw_miptree_prepare_access(brw, mt, level, 1, layer, 1,
- ISL_AUX_USAGE_NONE, false);
- if (write)
- brw_miptree_finish_write(brw, mt, level, layer, 1, ISL_AUX_USAGE_NONE);
-}
-
-enum isl_aux_usage
-brw_miptree_texture_aux_usage(struct brw_context *brw,
- struct brw_mipmap_tree *mt,
- enum isl_format view_format,
- enum gfx9_astc5x5_wa_tex_type astc5x5_wa_bits);
-void
-brw_miptree_prepare_texture(struct brw_context *brw,
- struct brw_mipmap_tree *mt,
- enum isl_format view_format,
- uint32_t start_level, uint32_t num_levels,
- uint32_t start_layer, uint32_t num_layers,
- enum gfx9_astc5x5_wa_tex_type astc5x5_wa_bits);
-void
-brw_miptree_prepare_image(struct brw_context *brw,
- struct brw_mipmap_tree *mt);
-
-enum isl_aux_usage
-brw_miptree_render_aux_usage(struct brw_context *brw,
- struct brw_mipmap_tree *mt,
- enum isl_format render_format,
- bool blend_enabled,
- bool draw_aux_disabled);
-void
-brw_miptree_prepare_render(struct brw_context *brw,
- struct brw_mipmap_tree *mt, uint32_t level,
- uint32_t start_layer, uint32_t layer_count,
- enum isl_aux_usage aux_usage);
-void
-brw_miptree_finish_render(struct brw_context *brw,
- struct brw_mipmap_tree *mt, uint32_t level,
- uint32_t start_layer, uint32_t layer_count,
- enum isl_aux_usage aux_usage);
-void
-brw_miptree_prepare_depth(struct brw_context *brw,
- struct brw_mipmap_tree *mt, uint32_t level,
- uint32_t start_layer, uint32_t layer_count);
-void
-brw_miptree_finish_depth(struct brw_context *brw,
- struct brw_mipmap_tree *mt, uint32_t level,
- uint32_t start_layer, uint32_t layer_count,
- bool depth_written);
-void
-brw_miptree_prepare_external(struct brw_context *brw,
- struct brw_mipmap_tree *mt);
-void
-brw_miptree_finish_external(struct brw_context *brw,
- struct brw_mipmap_tree *mt);
-
-void
-brw_miptree_make_shareable(struct brw_context *brw,
- struct brw_mipmap_tree *mt);
-
-void
-brw_miptree_updownsample(struct brw_context *brw,
- struct brw_mipmap_tree *src,
- struct brw_mipmap_tree *dst);
-
-void
-brw_update_r8stencil(struct brw_context *brw,
- struct brw_mipmap_tree *mt);
-
-void
-brw_miptree_map(struct brw_context *brw,
- struct brw_mipmap_tree *mt,
- unsigned int level,
- unsigned int slice,
- unsigned int x,
- unsigned int y,
- unsigned int w,
- unsigned int h,
- GLbitfield mode,
- void **out_ptr,
- ptrdiff_t *out_stride);
-
-void
-brw_miptree_unmap(struct brw_context *brw,
- struct brw_mipmap_tree *mt,
- unsigned int level,
- unsigned int slice);
-
-bool
-brw_miptree_sample_with_hiz(struct brw_context *brw,
- struct brw_mipmap_tree *mt);
-
-bool
-brw_miptree_set_clear_color(struct brw_context *brw,
- struct brw_mipmap_tree *mt,
- union isl_color_value clear_color);
-
-/* Get a clear color suitable for filling out an ISL surface state. */
-union isl_color_value
-brw_miptree_get_clear_color(const struct brw_mipmap_tree *mt,
- struct brw_bo **clear_color_bo,
- uint64_t *clear_color_offset);
-
-
-static inline int
-brw_miptree_blt_pitch(struct brw_mipmap_tree *mt)
-{
- int pitch = mt->surf.row_pitch_B;
- if (mt->surf.tiling != ISL_TILING_LINEAR)
- pitch /= 4;
- return pitch;
-}
-
-isl_memcpy_type
-brw_miptree_get_memcpy_type(mesa_format tiledFormat, GLenum format, GLenum type,
- uint32_t *cpp);
-
-static inline bool
-brw_miptree_needs_fake_etc(struct brw_context *brw,
- struct brw_mipmap_tree *mt)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- bool is_etc = _mesa_is_format_etc2(mt->format) ||
- (mt->format == MESA_FORMAT_ETC1_RGB8);
-
- return devinfo->ver < 8 && devinfo->platform != INTEL_PLATFORM_BYT && is_etc;
-}
-
-static inline bool
-brw_miptree_has_etc_shadow(struct brw_context *brw,
- struct brw_mipmap_tree *mt)
-{
- return brw_miptree_needs_fake_etc(brw, mt) && mt->shadow_mt;
-}
-
-void
-brw_miptree_update_etc_shadow_levels(struct brw_context *brw,
- struct brw_mipmap_tree *mt);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
+++ /dev/null
-/*
- Copyright (C) Intel Corp. 2006. All Rights Reserved.
- Intel funded Tungsten Graphics to
- develop this 3D driver.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
- **********************************************************************/
- /*
- * Authors:
- * Keith Whitwell <keithw@vmware.com>
- */
-
-
-
-#include "brw_batch.h"
-#include "brw_fbo.h"
-#include "brw_mipmap_tree.h"
-
-#include "brw_context.h"
-#include "brw_state.h"
-#include "brw_defines.h"
-#include "compiler/brw_eu_defines.h"
-
-#include "main/framebuffer.h"
-#include "main/fbobject.h"
-#include "main/format_utils.h"
-#include "main/glformats.h"
-
-/**
- * Upload pointers to the per-stage state.
- *
- * The state pointers in this packet are all relative to the general state
- * base address set by CMD_STATE_BASE_ADDRESS, which is 0.
- */
-static void
-upload_pipelined_state_pointers(struct brw_context *brw)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- if (devinfo->ver == 5) {
- /* Need to flush before changing clip max threads for errata. */
- BEGIN_BATCH(1);
- OUT_BATCH(MI_FLUSH);
- ADVANCE_BATCH();
- }
-
- BEGIN_BATCH(7);
- OUT_BATCH(_3DSTATE_PIPELINED_POINTERS << 16 | (7 - 2));
- OUT_RELOC(brw->batch.state.bo, 0, brw->vs.base.state_offset);
- if (brw->ff_gs.prog_active)
- OUT_RELOC(brw->batch.state.bo, 0, brw->ff_gs.state_offset | 1);
- else
- OUT_BATCH(0);
- OUT_RELOC(brw->batch.state.bo, 0, brw->clip.state_offset | 1);
- OUT_RELOC(brw->batch.state.bo, 0, brw->sf.state_offset);
- OUT_RELOC(brw->batch.state.bo, 0, brw->wm.base.state_offset);
- OUT_RELOC(brw->batch.state.bo, 0, brw->cc.state_offset);
- ADVANCE_BATCH();
-
- brw->ctx.NewDriverState |= BRW_NEW_PSP;
-}
-
-static void
-upload_psp_urb_cbs(struct brw_context *brw)
-{
- upload_pipelined_state_pointers(brw);
- brw_upload_urb_fence(brw);
- brw_upload_cs_urb_state(brw);
-}
-
-const struct brw_tracked_state brw_psp_urb_cbs = {
- .dirty = {
- .mesa = 0,
- .brw = BRW_NEW_BATCH |
- BRW_NEW_BLORP |
- BRW_NEW_FF_GS_PROG_DATA |
- BRW_NEW_GFX4_UNIT_STATE |
- BRW_NEW_STATE_BASE_ADDRESS |
- BRW_NEW_URB_FENCE,
- },
- .emit = upload_psp_urb_cbs,
-};
-
-uint32_t
-brw_depthbuffer_format(struct brw_context *brw)
-{
- struct gl_context *ctx = &brw->ctx;
- struct gl_framebuffer *fb = ctx->DrawBuffer;
- struct brw_renderbuffer *drb = brw_get_renderbuffer(fb, BUFFER_DEPTH);
- struct brw_renderbuffer *srb;
-
- if (!drb &&
- (srb = brw_get_renderbuffer(fb, BUFFER_STENCIL)) &&
- !srb->mt->stencil_mt &&
- (brw_rb_format(srb) == MESA_FORMAT_Z24_UNORM_S8_UINT ||
- brw_rb_format(srb) == MESA_FORMAT_Z32_FLOAT_S8X24_UINT)) {
- drb = srb;
- }
-
- if (!drb)
- return BRW_DEPTHFORMAT_D32_FLOAT;
-
- return brw_depth_format(brw, drb->mt->format);
-}
-
-static struct brw_mipmap_tree *
-get_stencil_miptree(struct brw_renderbuffer *irb)
-{
- if (!irb)
- return NULL;
- if (irb->mt->stencil_mt)
- return irb->mt->stencil_mt;
- return brw_renderbuffer_get_mt(irb);
-}
-
-static bool
-rebase_depth_stencil(struct brw_context *brw, struct brw_renderbuffer *irb,
- bool invalidate)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- struct gl_context *ctx = &brw->ctx;
- uint32_t tile_mask_x = 0, tile_mask_y = 0;
-
- isl_get_tile_masks(irb->mt->surf.tiling, irb->mt->cpp,
- &tile_mask_x, &tile_mask_y);
- assert(!brw_miptree_level_has_hiz(irb->mt, irb->mt_level));
-
- uint32_t tile_x = irb->draw_x & tile_mask_x;
- uint32_t tile_y = irb->draw_y & tile_mask_y;
-
- /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
- * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth
- * Coordinate Offset X/Y":
- *
- * "The 3 LSBs of both offsets must be zero to ensure correct
- * alignment"
- */
- bool rebase = tile_x & 7 || tile_y & 7;
-
- /* We didn't even have intra-tile offsets before g45. */
- rebase |= (!devinfo->has_surface_tile_offset && (tile_x || tile_y));
-
- if (rebase) {
- perf_debug("HW workaround: blitting depth level %d to a temporary "
- "to fix alignment (depth tile offset %d,%d)\n",
- irb->mt_level, tile_x, tile_y);
- brw_renderbuffer_move_to_temp(brw, irb, invalidate);
-
- /* There is now only single slice miptree. */
- brw->depthstencil.tile_x = 0;
- brw->depthstencil.tile_y = 0;
- brw->depthstencil.depth_offset = 0;
- return true;
- }
-
- /* While we just tried to get everything aligned, we may have failed to do
- * so in the case of rendering to array or 3D textures, where nonzero faces
- * will still have an offset post-rebase. At least give an informative
- * warning.
- */
- WARN_ONCE((tile_x & 7) || (tile_y & 7),
- "Depth/stencil buffer needs alignment to 8-pixel boundaries.\n"
- "Truncating offset (%u:%u), bad rendering may occur.\n",
- tile_x, tile_y);
- tile_x &= ~7;
- tile_y &= ~7;
-
- brw->depthstencil.tile_x = tile_x;
- brw->depthstencil.tile_y = tile_y;
- brw->depthstencil.depth_offset = brw_miptree_get_aligned_offset(
- irb->mt,
- irb->draw_x & ~tile_mask_x,
- irb->draw_y & ~tile_mask_y);
-
- return false;
-}
-
-void
-brw_workaround_depthstencil_alignment(struct brw_context *brw,
- GLbitfield clear_mask)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- struct gl_context *ctx = &brw->ctx;
- struct gl_framebuffer *fb = ctx->DrawBuffer;
- struct brw_renderbuffer *depth_irb = brw_get_renderbuffer(fb, BUFFER_DEPTH);
- struct brw_renderbuffer *stencil_irb = brw_get_renderbuffer(fb, BUFFER_STENCIL);
- struct brw_mipmap_tree *depth_mt = NULL;
- bool invalidate_depth = clear_mask & BUFFER_BIT_DEPTH;
- bool invalidate_stencil = clear_mask & BUFFER_BIT_STENCIL;
-
- if (depth_irb)
- depth_mt = depth_irb->mt;
-
- /* Initialize brw->depthstencil to 'nop' workaround state.
- */
- brw->depthstencil.tile_x = 0;
- brw->depthstencil.tile_y = 0;
- brw->depthstencil.depth_offset = 0;
-
- /* Gfx6+ doesn't require the workarounds, since we always program the
- * surface state at the start of the whole surface.
- */
- if (devinfo->ver >= 6)
- return;
-
- /* Check if depth buffer is in depth/stencil format. If so, then it's only
- * safe to invalidate it if we're also clearing stencil.
- */
- if (depth_irb && invalidate_depth &&
- _mesa_get_format_base_format(depth_mt->format) == GL_DEPTH_STENCIL)
- invalidate_depth = invalidate_stencil && stencil_irb;
-
- if (depth_irb) {
- if (rebase_depth_stencil(brw, depth_irb, invalidate_depth)) {
- /* In the case of stencil_irb being the same packed depth/stencil
- * texture but not the same rb, make it point at our rebased mt, too.
- */
- if (stencil_irb &&
- stencil_irb != depth_irb &&
- stencil_irb->mt == depth_mt) {
- brw_miptree_reference(&stencil_irb->mt, depth_irb->mt);
- brw_renderbuffer_set_draw_offset(stencil_irb);
- }
- }
-
- if (stencil_irb) {
- assert(stencil_irb->mt == depth_irb->mt);
- assert(stencil_irb->mt_level == depth_irb->mt_level);
- assert(stencil_irb->mt_layer == depth_irb->mt_layer);
- }
- }
-
- /* If there is no depth attachment, consider if stencil needs rebase. */
- if (!depth_irb && stencil_irb)
- rebase_depth_stencil(brw, stencil_irb, invalidate_stencil);
-}
-
-static void
-brw_emit_depth_stencil_hiz(struct brw_context *brw,
- struct brw_renderbuffer *depth_irb,
- struct brw_mipmap_tree *depth_mt,
- struct brw_renderbuffer *stencil_irb,
- struct brw_mipmap_tree *stencil_mt)
-{
- uint32_t tile_x = brw->depthstencil.tile_x;
- uint32_t tile_y = brw->depthstencil.tile_y;
- uint32_t depth_surface_type = BRW_SURFACE_NULL;
- uint32_t depthbuffer_format = BRW_DEPTHFORMAT_D32_FLOAT;
- uint32_t depth_offset = 0;
- uint32_t width = 1, height = 1;
- bool tiled_surface = true;
-
- /* If there's a packed depth/stencil bound to stencil only, we need to
- * emit the packed depth/stencil buffer packet.
- */
- if (!depth_irb && stencil_irb) {
- depth_irb = stencil_irb;
- depth_mt = stencil_mt;
- }
-
- if (depth_irb && depth_mt) {
- depthbuffer_format = brw_depthbuffer_format(brw);
- depth_surface_type = BRW_SURFACE_2D;
- depth_offset = brw->depthstencil.depth_offset;
- width = depth_irb->Base.Base.Width;
- height = depth_irb->Base.Base.Height;
- tiled_surface = depth_mt->surf.tiling != ISL_TILING_LINEAR;
- }
-
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- const unsigned len = (devinfo->verx10 == 45 || devinfo->ver == 5) ? 6 : 5;
-
- BEGIN_BATCH(len);
- OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
- OUT_BATCH((depth_mt ? depth_mt->surf.row_pitch_B - 1 : 0) |
- (depthbuffer_format << 18) |
- (BRW_TILEWALK_YMAJOR << 26) |
- (tiled_surface << 27) |
- (depth_surface_type << 29));
-
- if (depth_mt) {
- OUT_RELOC(depth_mt->bo, RELOC_WRITE, depth_offset);
- } else {
- OUT_BATCH(0);
- }
-
- OUT_BATCH(((width + tile_x - 1) << 6) |
- ((height + tile_y - 1) << 19));
- OUT_BATCH(0);
-
- if (devinfo->verx10 >= 45)
- OUT_BATCH(tile_x | (tile_y << 16));
- else
- assert(tile_x == 0 && tile_y == 0);
-
- if (devinfo->ver >= 6)
- OUT_BATCH(0);
-
- ADVANCE_BATCH();
-}
-
-void
-brw_emit_depthbuffer(struct brw_context *brw)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- struct gl_context *ctx = &brw->ctx;
- struct gl_framebuffer *fb = ctx->DrawBuffer;
- /* _NEW_BUFFERS */
- struct brw_renderbuffer *depth_irb = brw_get_renderbuffer(fb, BUFFER_DEPTH);
- struct brw_renderbuffer *stencil_irb = brw_get_renderbuffer(fb, BUFFER_STENCIL);
- struct brw_mipmap_tree *depth_mt = brw_renderbuffer_get_mt(depth_irb);
- struct brw_mipmap_tree *stencil_mt = get_stencil_miptree(stencil_irb);
-
- if (depth_mt)
- brw_cache_flush_for_depth(brw, depth_mt->bo);
- if (stencil_mt)
- brw_cache_flush_for_depth(brw, stencil_mt->bo);
-
- if (devinfo->ver < 6) {
- brw_emit_depth_stencil_hiz(brw, depth_irb, depth_mt,
- stencil_irb, stencil_mt);
- return;
- }
-
- /* Skip repeated NULL depth/stencil emits (think 2D rendering). */
- if (!depth_mt && !stencil_mt && brw->no_depth_or_stencil) {
- assert(brw->hw_ctx);
- return;
- }
-
- brw_emit_depth_stall_flushes(brw);
-
- const unsigned ds_dwords = brw->isl_dev.ds.size / 4;
- brw_batch_begin(brw, ds_dwords);
- uint32_t *ds_map = brw->batch.map_next;
- const uint32_t ds_offset = (char *)ds_map - (char *)brw->batch.batch.map;
-
- struct isl_view view = {
- /* Some nice defaults */
- .base_level = 0,
- .levels = 1,
- .base_array_layer = 0,
- .array_len = 1,
- .swizzle = ISL_SWIZZLE_IDENTITY,
- };
-
- struct isl_depth_stencil_hiz_emit_info info = {
- .view = &view,
- .mocs = brw_mocs(&brw->isl_dev, NULL),
- };
-
- if (depth_mt) {
- view.usage |= ISL_SURF_USAGE_DEPTH_BIT;
- info.depth_surf = &depth_mt->surf;
-
- info.depth_address =
- brw_batch_reloc(&brw->batch,
- ds_offset + brw->isl_dev.ds.depth_offset,
- depth_mt->bo, depth_mt->offset, RELOC_WRITE);
-
- info.mocs = brw_mocs(&brw->isl_dev, depth_mt->bo);
- view.base_level = depth_irb->mt_level - depth_irb->mt->first_level;
- view.base_array_layer = depth_irb->mt_layer;
- view.array_len = MAX2(depth_irb->layer_count, 1);
- view.format = depth_mt->surf.format;
-
- info.hiz_usage = depth_mt->aux_usage;
- if (!brw_renderbuffer_has_hiz(depth_irb)) {
- /* Just because a miptree has ISL_AUX_USAGE_HIZ does not mean that
- * all miplevels of that miptree are guaranteed to support HiZ. See
- * brw_miptree_level_enable_hiz for details.
- */
- info.hiz_usage = ISL_AUX_USAGE_NONE;
- }
-
- if (info.hiz_usage == ISL_AUX_USAGE_HIZ) {
- info.hiz_surf = &depth_mt->aux_buf->surf;
-
- uint64_t hiz_offset = 0;
- if (devinfo->ver == 6) {
- /* HiZ surfaces on Sandy Bridge technically don't support
- * mip-mapping. However, we can fake it by offsetting to the
- * first slice of LOD0 in the HiZ surface.
- */
- isl_surf_get_image_offset_B_tile_sa(&depth_mt->aux_buf->surf,
- view.base_level, 0, 0,
- &hiz_offset, NULL, NULL);
- }
-
- info.hiz_address =
- brw_batch_reloc(&brw->batch,
- ds_offset + brw->isl_dev.ds.hiz_offset,
- depth_mt->aux_buf->bo,
- depth_mt->aux_buf->offset + hiz_offset,
- RELOC_WRITE);
- }
-
- info.depth_clear_value = depth_mt->fast_clear_color.f32[0];
- }
-
- if (stencil_mt) {
- view.usage |= ISL_SURF_USAGE_STENCIL_BIT;
- info.stencil_surf = &stencil_mt->surf;
-
- if (!depth_mt) {
- info.mocs = brw_mocs(&brw->isl_dev, stencil_mt->bo);
- view.base_level = stencil_irb->mt_level - stencil_irb->mt->first_level;
- view.base_array_layer = stencil_irb->mt_layer;
- view.array_len = MAX2(stencil_irb->layer_count, 1);
- view.format = stencil_mt->surf.format;
- }
-
- uint64_t stencil_offset = 0;
- if (devinfo->ver == 6) {
- /* Stencil surfaces on Sandy Bridge technically don't support
- * mip-mapping. However, we can fake it by offsetting to the
- * first slice of LOD0 in the stencil surface.
- */
- isl_surf_get_image_offset_B_tile_sa(&stencil_mt->surf,
- view.base_level, 0, 0,
- &stencil_offset, NULL, NULL);
- }
-
- info.stencil_address =
- brw_batch_reloc(&brw->batch,
- ds_offset + brw->isl_dev.ds.stencil_offset,
- stencil_mt->bo,
- stencil_mt->offset + stencil_offset,
- RELOC_WRITE);
- }
-
- isl_emit_depth_stencil_hiz_s(&brw->isl_dev, ds_map, &info);
-
- brw->batch.map_next += ds_dwords;
- brw_batch_advance(brw);
-
- brw->no_depth_or_stencil = !depth_mt && !stencil_mt;
-}
-
-const struct brw_tracked_state brw_depthbuffer = {
- .dirty = {
- .mesa = _NEW_BUFFERS,
- .brw = BRW_NEW_AUX_STATE |
- BRW_NEW_BATCH |
- BRW_NEW_BLORP,
- },
- .emit = brw_emit_depthbuffer,
-};
-
-void
-brw_emit_select_pipeline(struct brw_context *brw, enum brw_pipeline pipeline)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- const bool is_965 = devinfo->verx10 == 40;
- const uint32_t _3DSTATE_PIPELINE_SELECT =
- is_965 ? CMD_PIPELINE_SELECT_965 : CMD_PIPELINE_SELECT_GM45;
-
- if (devinfo->ver >= 8 && devinfo->ver < 10) {
- /* From the Broadwell PRM, Volume 2a: Instructions, PIPELINE_SELECT:
- *
- * Software must clear the COLOR_CALC_STATE Valid field in
- * 3DSTATE_CC_STATE_POINTERS command prior to send a PIPELINE_SELECT
- * with Pipeline Select set to GPGPU.
- *
- * The internal hardware docs recommend the same workaround for Gfx9
- * hardware too.
- */
- if (pipeline == BRW_COMPUTE_PIPELINE) {
- BEGIN_BATCH(2);
- OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (2 - 2));
- OUT_BATCH(0);
- ADVANCE_BATCH();
-
- brw->ctx.NewDriverState |= BRW_NEW_CC_STATE;
- }
- }
-
- if (devinfo->ver == 9 && pipeline == BRW_RENDER_PIPELINE) {
- /* We seem to have issues with geometry flickering when 3D and compute
- * are combined in the same batch and this appears to fix it.
- */
- const uint32_t maxNumberofThreads =
- devinfo->max_cs_threads * devinfo->subslice_total - 1;
-
- BEGIN_BATCH(9);
- OUT_BATCH(MEDIA_VFE_STATE << 16 | (9 - 2));
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(2 << 8 | maxNumberofThreads << 16);
- OUT_BATCH(0);
- OUT_BATCH(2 << 16);
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- ADVANCE_BATCH();
- }
-
- if (devinfo->ver >= 6) {
- /* From "BXML » GT » MI » vol1a GPU Overview » [Instruction]
- * PIPELINE_SELECT [DevBWR+]":
- *
- * Project: DEVSNB+
- *
- * Software must ensure all the write caches are flushed through a
- * stalling PIPE_CONTROL command followed by another PIPE_CONTROL
- * command to invalidate read only caches prior to programming
- * MI_PIPELINE_SELECT command to change the Pipeline Select Mode.
- */
- const unsigned dc_flush =
- devinfo->ver >= 7 ? PIPE_CONTROL_DATA_CACHE_FLUSH : 0;
-
- brw_emit_pipe_control_flush(brw,
- PIPE_CONTROL_RENDER_TARGET_FLUSH |
- PIPE_CONTROL_DEPTH_CACHE_FLUSH |
- dc_flush |
- PIPE_CONTROL_CS_STALL);
-
- brw_emit_pipe_control_flush(brw,
- PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
- PIPE_CONTROL_CONST_CACHE_INVALIDATE |
- PIPE_CONTROL_STATE_CACHE_INVALIDATE |
- PIPE_CONTROL_INSTRUCTION_INVALIDATE);
-
- } else {
- /* From "BXML » GT » MI » vol1a GPU Overview » [Instruction]
- * PIPELINE_SELECT [DevBWR+]":
- *
- * Project: PRE-DEVSNB
- *
- * Software must ensure the current pipeline is flushed via an
- * MI_FLUSH or PIPE_CONTROL prior to the execution of PIPELINE_SELECT.
- */
- BEGIN_BATCH(1);
- OUT_BATCH(MI_FLUSH);
- ADVANCE_BATCH();
- }
-
- /* Select the pipeline */
- BEGIN_BATCH(1);
- OUT_BATCH(_3DSTATE_PIPELINE_SELECT << 16 |
- (devinfo->ver >= 9 ? (3 << 8) : 0) |
- (pipeline == BRW_COMPUTE_PIPELINE ? 2 : 0));
- ADVANCE_BATCH();
-
- if (devinfo->verx10 == 70 &&
- pipeline == BRW_RENDER_PIPELINE) {
- /* From "BXML » GT » MI » vol1a GPU Overview » [Instruction]
- * PIPELINE_SELECT [DevBWR+]":
- *
- * Project: DEVIVB, DEVHSW:GT3:A0
- *
- * Software must send a pipe_control with a CS stall and a post sync
- * operation and then a dummy DRAW after every MI_SET_CONTEXT and
- * after any PIPELINE_SELECT that is enabling 3D mode.
- */
- gfx7_emit_cs_stall_flush(brw);
-
- BEGIN_BATCH(7);
- OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2));
- OUT_BATCH(_3DPRIM_POINTLIST);
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- ADVANCE_BATCH();
- }
-
- if (devinfo->platform == INTEL_PLATFORM_GLK) {
- /* Project: DevGLK
- *
- * "This chicken bit works around a hardware issue with barrier logic
- * encountered when switching between GPGPU and 3D pipelines. To
- * workaround the issue, this mode bit should be set after a pipeline
- * is selected."
- */
- const unsigned barrier_mode =
- pipeline == BRW_RENDER_PIPELINE ? GLK_SCEC_BARRIER_MODE_3D_HULL
- : GLK_SCEC_BARRIER_MODE_GPGPU;
- brw_load_register_imm32(brw, SLICE_COMMON_ECO_CHICKEN1,
- barrier_mode | GLK_SCEC_BARRIER_MODE_MASK);
- }
-}
-
-/**
- * Update the pixel hashing modes that determine the balancing of PS threads
- * across subslices and slices.
- *
- * \param width Width bound of the rendering area (already scaled down if \p
- * scale is greater than 1).
- * \param height Height bound of the rendering area (already scaled down if \p
- * scale is greater than 1).
- * \param scale The number of framebuffer samples that could potentially be
- * affected by an individual channel of the PS thread. This is
- * typically one for single-sampled rendering, but for operations
- * like CCS resolves and fast clears a single PS invocation may
- * update a huge number of pixels, in which case a finer
- * balancing is desirable in order to maximally utilize the
- * bandwidth available. UINT_MAX can be used as shorthand for
- * "finest hashing mode available".
- */
-void
-brw_emit_hashing_mode(struct brw_context *brw, unsigned width,
- unsigned height, unsigned scale)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- if (devinfo->ver == 9) {
- const uint32_t slice_hashing[] = {
- /* Because all Gfx9 platforms with more than one slice require
- * three-way subslice hashing, a single "normal" 16x16 slice hashing
- * block is guaranteed to suffer from substantial imbalance, with one
- * subslice receiving twice as much work as the other two in the
- * slice.
- *
- * The performance impact of that would be particularly severe when
- * three-way hashing is also in use for slice balancing (which is the
- * case for all Gfx9 GT4 platforms), because one of the slices
- * receives one every three 16x16 blocks in either direction, which
- * is roughly the periodicity of the underlying subslice imbalance
- * pattern ("roughly" because in reality the hardware's
- * implementation of three-way hashing doesn't do exact modulo 3
- * arithmetic, which somewhat decreases the magnitude of this effect
- * in practice). This leads to a systematic subslice imbalance
- * within that slice regardless of the size of the primitive. The
- * 32x32 hashing mode guarantees that the subslice imbalance within a
- * single slice hashing block is minimal, largely eliminating this
- * effect.
- */
- GFX9_SLICE_HASHING_32x32,
- /* Finest slice hashing mode available. */
- GFX9_SLICE_HASHING_NORMAL
- };
- const uint32_t subslice_hashing[] = {
- /* The 16x16 subslice hashing mode is used on non-LLC platforms to
- * match the performance of previous Mesa versions. 16x16 has a
- * slight cache locality benefit especially visible in the sampler L1
- * cache efficiency of low-bandwidth platforms, but it comes at the
- * cost of greater subslice imbalance for primitives of dimensions
- * approximately intermediate between 16x4 and 16x16.
- */
- (devinfo->has_llc ? GFX9_SUBSLICE_HASHING_16x4 :
- GFX9_SUBSLICE_HASHING_16x16),
- /* Finest subslice hashing mode available. */
- GFX9_SUBSLICE_HASHING_8x4
- };
- /* Dimensions of the smallest hashing block of a given hashing mode. If
- * the rendering area is smaller than this there can't possibly be any
- * benefit from switching to this mode, so we optimize out the
- * transition.
- */
- const unsigned min_size[][2] = {
- { 16, 4 },
- { 8, 4 }
- };
- const unsigned idx = scale > 1;
-
- if (width > min_size[idx][0] || height > min_size[idx][1]) {
- const uint32_t gt_mode =
- (devinfo->num_slices == 1 ? 0 :
- GFX9_SLICE_HASHING_MASK_BITS | slice_hashing[idx]) |
- GFX9_SUBSLICE_HASHING_MASK_BITS | subslice_hashing[idx];
-
- brw_emit_pipe_control_flush(brw,
- PIPE_CONTROL_STALL_AT_SCOREBOARD |
- PIPE_CONTROL_CS_STALL);
-
- brw_load_register_imm32(brw, GFX7_GT_MODE, gt_mode);
-
- brw->current_hash_scale = scale;
- }
- }
-}
-
-/**
- * Misc invariant state packets
- */
-void
-brw_upload_invariant_state(struct brw_context *brw)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- const bool is_965 = devinfo->verx10 == 40;
-
- brw_emit_select_pipeline(brw, BRW_RENDER_PIPELINE);
- brw->last_pipeline = BRW_RENDER_PIPELINE;
-
- if (devinfo->ver >= 8) {
- BEGIN_BATCH(3);
- OUT_BATCH(CMD_STATE_SIP << 16 | (3 - 2));
- OUT_BATCH(0);
- OUT_BATCH(0);
- ADVANCE_BATCH();
- } else {
- BEGIN_BATCH(2);
- OUT_BATCH(CMD_STATE_SIP << 16 | (2 - 2));
- OUT_BATCH(0);
- ADVANCE_BATCH();
- }
-
- /* Original Gfx4 doesn't have 3DSTATE_AA_LINE_PARAMETERS. */
- if (!is_965) {
- BEGIN_BATCH(3);
- OUT_BATCH(_3DSTATE_AA_LINE_PARAMETERS << 16 | (3 - 2));
- /* use legacy aa line coverage computation */
- OUT_BATCH(0);
- OUT_BATCH(0);
- ADVANCE_BATCH();
- }
-}
+++ /dev/null
-/*
- * Copyright © 2013 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#ifndef BRW_MULTISAMPLE_STATE_H
-#define BRW_MULTISAMPLE_STATE_H
-
-#include <stdint.h>
-
-/**
- * Note: There are no standard multisample positions defined in OpenGL
- * specifications. Implementations have the freedom to pick the positions
- * which give plausible results. But the Vulkan specification does define
- * standard sample positions. So, we decided to pick the same pattern in
- * OpenGL as in Vulkan to keep it uniform across drivers and also to avoid
- * breaking applications which rely on this standard pattern.
- */
-
-/**
- * 1x MSAA has a single sample at the center: (0.5, 0.5) -> (0x8, 0x8).
- *
- * 2x MSAA sample positions are (0.75, 0.75) and (0.25, 0.25):
- * 4 c
- * 4 1
- * c 0
- */
-static const uint32_t
-brw_multisample_positions_1x_2x = 0x008844cc;
-
-/**
- * Sample positions:
- * 2 6 a e
- * 2 0
- * 6 1
- * a 2
- * e 3
- */
-static const uint32_t
-brw_multisample_positions_4x = 0xae2ae662;
-
-/**
- * Sample positions:
- *
- * From the Ivy Bridge PRM, Vol2 Part1 p304 (3DSTATE_MULTISAMPLE:
- * Programming Notes):
- * "When programming the sample offsets (for NUMSAMPLES_4 or _8 and
- * MSRASTMODE_xxx_PATTERN), the order of the samples 0 to 3 (or 7
- * for 8X) must have monotonically increasing distance from the
- * pixel center. This is required to get the correct centroid
- * computation in the device."
- *
- * Sample positions:
- * 1 3 5 7 9 b d f
- * 1 7
- * 3 3
- * 5 0
- * 7 5
- * 9 2
- * b 1
- * d 4
- * f 6
- */
-static const uint32_t
-brw_multisample_positions_8x[] = { 0x53d97b95, 0xf1bf173d };
-
-/**
- * Sample positions:
- *
- * 0 1 2 3 4 5 6 7 8 9 a b c d e f
- * 0 15
- * 1 9
- * 2 10
- * 3 7
- * 4 13
- * 5 1
- * 6 4
- * 7 3
- * 8 12
- * 9 0
- * a 2
- * b 6
- * c 11
- * d 5
- * e 8
- * f 14
- */
-static const uint32_t
-brw_multisample_positions_16x[] = {
- 0xc75a7599, 0xb3dbad36, 0x2c42816e, 0x10eff408
-};
-
-#endif /* BRW_MULTISAMPLE_STATE_H */
+++ /dev/null
-/*
- * Copyright © 2015 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "compiler/brw_nir.h"
-#include "compiler/glsl/ir_uniform.h"
-#include "compiler/nir/nir_builder.h"
-#include "brw_program.h"
-
-static void
-brw_nir_setup_glsl_builtin_uniform(nir_variable *var,
- const struct gl_program *prog,
- struct brw_stage_prog_data *stage_prog_data,
- bool is_scalar)
-{
- const nir_state_slot *const slots = var->state_slots;
- assert(var->state_slots != NULL);
-
- unsigned uniform_index = var->data.driver_location / 4;
- for (unsigned int i = 0; i < var->num_state_slots; i++) {
- /* This state reference has already been setup by ir_to_mesa, but we'll
- * get the same index back here.
- */
- int index = _mesa_add_state_reference(prog->Parameters,
- slots[i].tokens);
-
- /* Add each of the unique swizzles of the element as a parameter.
- * This'll end up matching the expected layout of the
- * array/matrix/structure we're trying to fill in.
- */
- int last_swiz = -1;
- for (unsigned j = 0; j < 4; j++) {
- int swiz = GET_SWZ(slots[i].swizzle, j);
-
- /* If we hit a pair of identical swizzles, this means we've hit the
- * end of the builtin variable. In scalar mode, we should just quit
- * and move on to the next one. In vec4, we need to continue and pad
- * it out to 4 components.
- */
- if (swiz == last_swiz && is_scalar)
- break;
-
- last_swiz = swiz;
-
- stage_prog_data->param[uniform_index++] =
- BRW_PARAM_PARAMETER(index, swiz);
- }
- }
-}
-
-static void
-setup_vec4_image_param(uint32_t *params, uint32_t idx,
- unsigned offset, unsigned n)
-{
- assert(offset % sizeof(uint32_t) == 0);
- for (unsigned i = 0; i < n; ++i)
- params[i] = BRW_PARAM_IMAGE(idx, offset / sizeof(uint32_t) + i);
-
- for (unsigned i = n; i < 4; ++i)
- params[i] = BRW_PARAM_BUILTIN_ZERO;
-}
-
-static void
-brw_setup_image_uniform_values(nir_variable *var,
- struct brw_stage_prog_data *prog_data)
-{
- unsigned param_start_index = var->data.driver_location / 4;
- uint32_t *param = &prog_data->param[param_start_index];
- unsigned num_images = MAX2(1, var->type->arrays_of_arrays_size());
-
- for (unsigned i = 0; i < num_images; i++) {
- const unsigned image_idx = var->data.binding + i;
-
- /* Upload the brw_image_param structure. The order is expected to match
- * the BRW_IMAGE_PARAM_*_OFFSET defines.
- */
- setup_vec4_image_param(param + BRW_IMAGE_PARAM_OFFSET_OFFSET,
- image_idx,
- offsetof(brw_image_param, offset), 2);
- setup_vec4_image_param(param + BRW_IMAGE_PARAM_SIZE_OFFSET,
- image_idx,
- offsetof(brw_image_param, size), 3);
- setup_vec4_image_param(param + BRW_IMAGE_PARAM_STRIDE_OFFSET,
- image_idx,
- offsetof(brw_image_param, stride), 4);
- setup_vec4_image_param(param + BRW_IMAGE_PARAM_TILING_OFFSET,
- image_idx,
- offsetof(brw_image_param, tiling), 3);
- setup_vec4_image_param(param + BRW_IMAGE_PARAM_SWIZZLING_OFFSET,
- image_idx,
- offsetof(brw_image_param, swizzling), 2);
- param += BRW_IMAGE_PARAM_SIZE;
- }
-}
-
-static unsigned
-count_uniform_storage_slots(const struct glsl_type *type)
-{
- /* gl_uniform_storage can cope with one level of array, so if the
- * type is a composite type or an array where each element occupies
- * more than one slot than we need to recursively process it.
- */
- if (glsl_type_is_struct_or_ifc(type)) {
- unsigned location_count = 0;
-
- for (unsigned i = 0; i < glsl_get_length(type); i++) {
- const struct glsl_type *field_type = glsl_get_struct_field(type, i);
-
- location_count += count_uniform_storage_slots(field_type);
- }
-
- return location_count;
- }
-
- if (glsl_type_is_array(type)) {
- const struct glsl_type *element_type = glsl_get_array_element(type);
-
- if (glsl_type_is_array(element_type) ||
- glsl_type_is_struct_or_ifc(element_type)) {
- unsigned element_count = count_uniform_storage_slots(element_type);
- return element_count * glsl_get_length(type);
- }
- }
-
- return 1;
-}
-
-static void
-brw_nir_setup_glsl_uniform(gl_shader_stage stage, nir_variable *var,
- const struct gl_program *prog,
- struct brw_stage_prog_data *stage_prog_data,
- bool is_scalar)
-{
- if (var->type->without_array()->is_sampler() ||
- var->type->without_array()->is_image())
- return;
-
- /* The data for our (non-builtin) uniforms is stored in a series of
- * gl_uniform_storage structs for each subcomponent that
- * glGetUniformLocation() could name. We know it's been set up in the same
- * order we'd walk the type, so walk the list of storage that matches the
- * range of slots covered by this variable.
- */
- unsigned uniform_index = var->data.driver_location / 4;
- unsigned num_slots = count_uniform_storage_slots(var->type);
- for (unsigned u = 0; u < num_slots; u++) {
- struct gl_uniform_storage *storage =
- &prog->sh.data->UniformStorage[var->data.location + u];
-
- /* We already handled samplers and images via the separate top-level
- * variables created by gl_nir_lower_samplers_as_deref(), but they're
- * still part of the structure's storage, and so we'll see them while
- * walking it to set up the other regular fields. Just skip over them.
- */
- if (storage->builtin ||
- storage->type->is_sampler() ||
- storage->type->is_image())
- continue;
-
- gl_constant_value *components = storage->storage;
- unsigned vector_count = (MAX2(storage->array_elements, 1) *
- storage->type->matrix_columns);
- unsigned vector_size = storage->type->vector_elements;
- unsigned max_vector_size = 4;
- if (storage->type->base_type == GLSL_TYPE_DOUBLE ||
- storage->type->base_type == GLSL_TYPE_UINT64 ||
- storage->type->base_type == GLSL_TYPE_INT64) {
- vector_size *= 2;
- if (vector_size > 4)
- max_vector_size = 8;
- }
-
- for (unsigned s = 0; s < vector_count; s++) {
- unsigned i;
- for (i = 0; i < vector_size; i++) {
- uint32_t idx = components - prog->sh.data->UniformDataSlots;
- stage_prog_data->param[uniform_index++] = BRW_PARAM_UNIFORM(idx);
- components++;
- }
-
- if (!is_scalar) {
- /* Pad out with zeros if needed (only needed for vec4) */
- for (; i < max_vector_size; i++) {
- stage_prog_data->param[uniform_index++] =
- BRW_PARAM_BUILTIN_ZERO;
- }
- }
- }
- }
-}
-
-void
-brw_nir_setup_glsl_uniforms(void *mem_ctx, nir_shader *shader,
- const struct gl_program *prog,
- struct brw_stage_prog_data *stage_prog_data,
- bool is_scalar)
-{
- unsigned nr_params = shader->num_uniforms / 4;
- stage_prog_data->nr_params = nr_params;
- stage_prog_data->param = rzalloc_array(mem_ctx, uint32_t, nr_params);
-
- nir_foreach_uniform_variable(var, shader) {
- /* UBO's, atomics and samplers don't take up space in the
- uniform file */
- if (var->interface_type != NULL || var->type->contains_atomic())
- continue;
-
- if (var->num_state_slots > 0) {
- brw_nir_setup_glsl_builtin_uniform(var, prog, stage_prog_data,
- is_scalar);
- } else {
- brw_nir_setup_glsl_uniform(shader->info.stage, var, prog,
- stage_prog_data, is_scalar);
- }
- }
-
- nir_foreach_image_variable(var, shader)
- brw_setup_image_uniform_values(var, stage_prog_data);
-}
-
-void
-brw_nir_setup_arb_uniforms(void *mem_ctx, nir_shader *shader,
- struct gl_program *prog,
- struct brw_stage_prog_data *stage_prog_data)
-{
- struct gl_program_parameter_list *plist = prog->Parameters;
-
- unsigned nr_params = plist->NumParameters * 4;
- stage_prog_data->nr_params = nr_params;
- stage_prog_data->param = rzalloc_array(mem_ctx, uint32_t, nr_params);
-
- /* For ARB programs, prog_to_nir generates a single "parameters" variable
- * for all uniform data. There may be additional sampler variables, and
- * an extra uniform from nir_lower_wpos_ytransform.
- */
-
- for (unsigned p = 0; p < plist->NumParameters; p++) {
- /* Parameters should be either vec4 uniforms or single component
- * constants; matrices and other larger types should have been broken
- * down earlier.
- */
- assert(plist->Parameters[p].Size <= 4);
-
- unsigned i;
- for (i = 0; i < plist->Parameters[p].Size; i++)
- stage_prog_data->param[4 * p + i] = BRW_PARAM_PARAMETER(p, i);
- for (; i < 4; i++)
- stage_prog_data->param[4 * p + i] = BRW_PARAM_BUILTIN_ZERO;
- }
-}
-
-static nir_ssa_def *
-get_aoa_deref_offset(nir_builder *b,
- nir_deref_instr *deref,
- unsigned elem_size)
-{
- unsigned array_size = elem_size;
- nir_ssa_def *offset = nir_imm_int(b, 0);
-
- while (deref->deref_type != nir_deref_type_var) {
- assert(deref->deref_type == nir_deref_type_array);
-
- /* This level's element size is the previous level's array size */
- nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
- assert(deref->arr.index.ssa);
- offset = nir_iadd(b, offset,
- nir_imul(b, index, nir_imm_int(b, array_size)));
-
- deref = nir_deref_instr_parent(deref);
- assert(glsl_type_is_array(deref->type));
- array_size *= glsl_get_length(deref->type);
- }
-
- /* Accessing an invalid surface index with the dataport can result in a
- * hang. According to the spec "if the index used to select an individual
- * element is negative or greater than or equal to the size of the array,
- * the results of the operation are undefined but may not lead to
- * termination" -- which is one of the possible outcomes of the hang.
- * Clamp the index to prevent access outside of the array bounds.
- */
- return nir_umin(b, offset, nir_imm_int(b, array_size - elem_size));
-}
-
-void
-brw_nir_lower_gl_images(nir_shader *shader,
- const struct gl_program *prog)
-{
- /* We put image uniforms at the end */
- nir_foreach_image_variable(var, shader) {
- const unsigned num_images = MAX2(1, var->type->arrays_of_arrays_size());
-
- var->data.driver_location = shader->num_uniforms;
- shader->num_uniforms += num_images * BRW_IMAGE_PARAM_SIZE * 4;
- }
-
- nir_function_impl *impl = nir_shader_get_entrypoint(shader);
-
- nir_builder b;
- nir_builder_init(&b, impl);
-
- nir_foreach_block(block, impl) {
- nir_foreach_instr_safe(instr, block) {
- if (instr->type != nir_instr_type_intrinsic)
- continue;
-
- nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
- switch (intrin->intrinsic) {
- case nir_intrinsic_image_deref_load:
- case nir_intrinsic_image_deref_store:
- case nir_intrinsic_image_deref_atomic_add:
- case nir_intrinsic_image_deref_atomic_imin:
- case nir_intrinsic_image_deref_atomic_umin:
- case nir_intrinsic_image_deref_atomic_imax:
- case nir_intrinsic_image_deref_atomic_umax:
- case nir_intrinsic_image_deref_atomic_and:
- case nir_intrinsic_image_deref_atomic_or:
- case nir_intrinsic_image_deref_atomic_xor:
- case nir_intrinsic_image_deref_atomic_exchange:
- case nir_intrinsic_image_deref_atomic_comp_swap:
- case nir_intrinsic_image_deref_size:
- case nir_intrinsic_image_deref_samples:
- case nir_intrinsic_image_deref_load_raw_intel:
- case nir_intrinsic_image_deref_store_raw_intel: {
- nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
- nir_variable *var = nir_deref_instr_get_variable(deref);
-
- struct gl_uniform_storage *storage =
- &prog->sh.data->UniformStorage[var->data.location];
- const unsigned image_var_idx =
- storage->opaque[shader->info.stage].index;
-
- b.cursor = nir_before_instr(&intrin->instr);
- nir_ssa_def *index = nir_iadd(&b, nir_imm_int(&b, image_var_idx),
- get_aoa_deref_offset(&b, deref, 1));
- nir_rewrite_image_intrinsic(intrin, index, false);
- break;
- }
-
- case nir_intrinsic_image_deref_load_param_intel: {
- nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
- nir_variable *var = nir_deref_instr_get_variable(deref);
- const unsigned num_images =
- MAX2(1, var->type->arrays_of_arrays_size());
-
- b.cursor = nir_instr_remove(&intrin->instr);
-
- const unsigned param = nir_intrinsic_base(intrin);
- nir_ssa_def *offset =
- get_aoa_deref_offset(&b, deref, BRW_IMAGE_PARAM_SIZE * 4);
- offset = nir_iadd(&b, offset, nir_imm_int(&b, param * 16));
-
- nir_intrinsic_instr *load =
- nir_intrinsic_instr_create(b.shader,
- nir_intrinsic_load_uniform);
- nir_intrinsic_set_base(load, var->data.driver_location);
- nir_intrinsic_set_range(load, num_images * BRW_IMAGE_PARAM_SIZE * 4);
- load->src[0] = nir_src_for_ssa(offset);
- load->num_components = intrin->dest.ssa.num_components;
- nir_ssa_dest_init(&load->instr, &load->dest,
- intrin->dest.ssa.num_components,
- intrin->dest.ssa.bit_size, NULL);
- nir_builder_instr_insert(&b, &load->instr);
-
- nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
- &load->dest.ssa);
- break;
- }
-
- default:
- break;
- }
- }
- }
-}
-
-void
-brw_nir_lower_legacy_clipping(nir_shader *nir, int nr_userclip_plane_consts,
- struct brw_stage_prog_data *prog_data)
-{
- if (nr_userclip_plane_consts == 0)
- return;
-
- nir_function_impl *impl = nir_shader_get_entrypoint(nir);
-
- nir_lower_clip_vs(nir, (1 << nr_userclip_plane_consts) - 1, true, false,
- NULL);
- nir_lower_io_to_temporaries(nir, impl, true, false);
- nir_lower_global_vars_to_local(nir);
- nir_lower_vars_to_ssa(nir);
-
- const unsigned clip_plane_base = nir->num_uniforms;
-
- assert(nir->num_uniforms == prog_data->nr_params * 4);
- const unsigned num_clip_floats = 4 * nr_userclip_plane_consts;
- uint32_t *clip_param =
- brw_stage_prog_data_add_params(prog_data, num_clip_floats);
- nir->num_uniforms += num_clip_floats * sizeof(float);
- assert(nir->num_uniforms == prog_data->nr_params * 4);
-
- for (unsigned i = 0; i < num_clip_floats; i++)
- clip_param[i] = BRW_PARAM_BUILTIN_CLIP_PLANE(i / 4, i % 4);
-
- nir_builder b;
- nir_builder_init(&b, impl);
- nir_foreach_block(block, impl) {
- nir_foreach_instr_safe(instr, block) {
- if (instr->type != nir_instr_type_intrinsic)
- continue;
-
- nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
- if (intrin->intrinsic != nir_intrinsic_load_user_clip_plane)
- continue;
-
- b.cursor = nir_before_instr(instr);
-
- nir_intrinsic_instr *load =
- nir_intrinsic_instr_create(nir, nir_intrinsic_load_uniform);
- load->num_components = 4;
- load->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
- nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
- nir_intrinsic_set_base(load, clip_plane_base + 4 * sizeof(float) *
- nir_intrinsic_ucp_id(intrin));
- nir_intrinsic_set_range(load, 4 * sizeof(float));
- nir_builder_instr_insert(&b, &load->instr);
-
- nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
- &load->dest.ssa);
- nir_instr_remove(instr);
- }
- }
-}
+++ /dev/null
-/*
- * Copyright © 2010 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-/**
- * @file brw_object_purgeable.c
- *
- * The driver implementation of the GL_APPLE_object_purgeable extension.
- */
-
-#include "main/mtypes.h"
-#include "main/macros.h"
-#include "main/bufferobj.h"
-
-#include "brw_context.h"
-#include "brw_buffer_objects.h"
-#include "brw_fbo.h"
-#include "brw_mipmap_tree.h"
-
-static GLenum
-brw_buffer_purgeable(struct brw_bo *buffer)
-{
- int retained = 0;
-
- if (buffer != NULL)
- retained = brw_bo_madvise(buffer, I915_MADV_DONTNEED);
-
- return retained ? GL_VOLATILE_APPLE : GL_RELEASED_APPLE;
-}
-
-static GLenum
-brw_buffer_object_purgeable(struct gl_context * ctx,
- struct gl_buffer_object *obj,
- GLenum option)
-{
- struct brw_buffer_object *intel_obj = brw_buffer_object(obj);
-
- if (intel_obj->buffer != NULL)
- return brw_buffer_purgeable(intel_obj->buffer);
-
- if (option == GL_RELEASED_APPLE) {
- return GL_RELEASED_APPLE;
- } else {
- /* XXX Create the buffer and madvise(MADV_DONTNEED)? */
- return brw_buffer_purgeable(intel_obj->buffer);
- }
-}
-
-static GLenum
-brw_texture_object_purgeable(struct gl_context * ctx,
- struct gl_texture_object *obj,
- GLenum option)
-{
- struct brw_texture_object *intel;
-
- (void) ctx;
- (void) option;
-
- intel = brw_texture_object(obj);
- if (intel->mt == NULL || intel->mt->bo == NULL)
- return GL_RELEASED_APPLE;
-
- return brw_buffer_purgeable(intel->mt->bo);
-}
-
-static GLenum
-brw_render_object_purgeable(struct gl_context * ctx,
- struct gl_renderbuffer *obj,
- GLenum option)
-{
- struct brw_renderbuffer *intel;
-
- (void) ctx;
- (void) option;
-
- intel = brw_renderbuffer(obj);
- if (intel->mt == NULL)
- return GL_RELEASED_APPLE;
-
- return brw_buffer_purgeable(intel->mt->bo);
-}
-
-static int
-brw_bo_unpurgeable(struct brw_bo *buffer)
-{
- int retained;
-
- retained = 0;
- if (buffer != NULL)
- retained = brw_bo_madvise(buffer, I915_MADV_WILLNEED);
-
- return retained;
-}
-
-static GLenum
-brw_buffer_object_unpurgeable(struct gl_context * ctx,
- struct gl_buffer_object *obj,
- GLenum option)
-{
- struct brw_buffer_object *intel = brw_buffer_object(obj);
-
- (void) ctx;
-
- if (!intel->buffer)
- return GL_UNDEFINED_APPLE;
-
- if (option == GL_UNDEFINED_APPLE || !brw_bo_unpurgeable(intel->buffer)) {
- brw_bo_unreference(intel->buffer);
- intel->buffer = NULL;
- return GL_UNDEFINED_APPLE;
- }
-
- return GL_RETAINED_APPLE;
-}
-
-static GLenum
-brw_texture_object_unpurgeable(struct gl_context * ctx,
- struct gl_texture_object *obj,
- GLenum option)
-{
- struct brw_texture_object *intel;
-
- (void) ctx;
-
- intel = brw_texture_object(obj);
- if (intel->mt == NULL || intel->mt->bo == NULL)
- return GL_UNDEFINED_APPLE;
-
- if (option == GL_UNDEFINED_APPLE || !brw_bo_unpurgeable(intel->mt->bo)) {
- brw_miptree_release(&intel->mt);
- return GL_UNDEFINED_APPLE;
- }
-
- return GL_RETAINED_APPLE;
-}
-
-static GLenum
-brw_render_object_unpurgeable(struct gl_context * ctx,
- struct gl_renderbuffer *obj,
- GLenum option)
-{
- struct brw_renderbuffer *intel;
-
- (void) ctx;
-
- intel = brw_renderbuffer(obj);
- if (intel->mt == NULL)
- return GL_UNDEFINED_APPLE;
-
- if (option == GL_UNDEFINED_APPLE || !brw_bo_unpurgeable(intel->mt->bo)) {
- brw_miptree_release(&intel->mt);
- return GL_UNDEFINED_APPLE;
- }
-
- return GL_RETAINED_APPLE;
-}
-
-void
-brw_init_object_purgeable_functions(struct dd_function_table *functions)
-{
- functions->BufferObjectPurgeable = brw_buffer_object_purgeable;
- functions->TextureObjectPurgeable = brw_texture_object_purgeable;
- functions->RenderObjectPurgeable = brw_render_object_purgeable;
-
- functions->BufferObjectUnpurgeable = brw_buffer_object_unpurgeable;
- functions->TextureObjectUnpurgeable = brw_texture_object_unpurgeable;
- functions->RenderObjectUnpurgeable = brw_render_object_unpurgeable;
-}
+++ /dev/null
-/*
- * Copyright © 2013 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * \file brw_performance_query.c
- *
- * Implementation of the GL_INTEL_performance_query extension.
- *
- * Currently there are two possible counter sources exposed here:
- *
- * On Gfx6+ hardware we have numerous 64bit Pipeline Statistics Registers
- * that we can snapshot at the beginning and end of a query.
- *
- * On Gfx7.5+ we have Observability Architecture counters which are
- * covered in separate document from the rest of the PRMs. It is available at:
- * https://01.org/linuxgraphics/documentation/driver-documentation-prms
- * => 2013 Intel Core Processor Family => Observability Performance Counters
- * (This one volume covers Sandybridge, Ivybridge, Baytrail, and Haswell,
- * though notably we currently only support OA counters for Haswell+)
- */
-
-#include <limits.h>
-
-/* put before sys/types.h to silence glibc warnings */
-#ifdef MAJOR_IN_MKDEV
-#include <sys/mkdev.h>
-#endif
-#ifdef MAJOR_IN_SYSMACROS
-#include <sys/sysmacros.h>
-#endif
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <sys/mman.h>
-#include <sys/ioctl.h>
-
-#include <xf86drm.h>
-#include "drm-uapi/i915_drm.h"
-
-#include "main/hash.h"
-#include "main/macros.h"
-#include "main/mtypes.h"
-#include "main/performance_query.h"
-
-#include "util/bitset.h"
-#include "util/ralloc.h"
-#include "util/hash_table.h"
-#include "util/list.h"
-#include "util/u_math.h"
-
-#include "brw_context.h"
-#include "brw_defines.h"
-#include "brw_batch.h"
-
-#include "perf/intel_perf.h"
-#include "perf/intel_perf_regs.h"
-#include "perf/intel_perf_mdapi.h"
-#include "perf/intel_perf_query.h"
-
-#define FILE_DEBUG_FLAG DEBUG_PERFMON
-
-#define OAREPORT_REASON_MASK 0x3f
-#define OAREPORT_REASON_SHIFT 19
-#define OAREPORT_REASON_TIMER (1<<0)
-#define OAREPORT_REASON_TRIGGER1 (1<<1)
-#define OAREPORT_REASON_TRIGGER2 (1<<2)
-#define OAREPORT_REASON_CTX_SWITCH (1<<3)
-#define OAREPORT_REASON_GO_TRANSITION (1<<4)
-
-struct brw_perf_query_object {
- struct gl_perf_query_object base;
- struct intel_perf_query_object *query;
-};
-
-/** Downcasting convenience macro. */
-static inline struct brw_perf_query_object *
-brw_perf_query(struct gl_perf_query_object *o)
-{
- return (struct brw_perf_query_object *) o;
-}
-
-#define MI_RPC_BO_SIZE 4096
-#define MI_RPC_BO_END_OFFSET_BYTES (MI_RPC_BO_SIZE / 2)
-#define MI_FREQ_START_OFFSET_BYTES (3072)
-#define MI_FREQ_END_OFFSET_BYTES (3076)
-
-/******************************************************************************/
-
-static bool
-brw_is_perf_query_ready(struct gl_context *ctx,
- struct gl_perf_query_object *o);
-
-static void
-dump_perf_query_callback(void *query_void, void *brw_void)
-{
- struct brw_context *ctx = brw_void;
- struct intel_perf_context *perf_ctx = ctx->perf_ctx;
- struct gl_perf_query_object *o = query_void;
- struct brw_perf_query_object * brw_query = brw_perf_query(o);
- struct intel_perf_query_object *obj = brw_query->query;
-
- DBG("%4d: %-6s %-8s ",
- o->Id,
- o->Used ? "Dirty," : "New,",
- o->Active ? "Active," : (o->Ready ? "Ready," : "Pending,"));
- intel_perf_dump_query(perf_ctx, obj, &ctx->batch);
-}
-
-static void
-dump_perf_queries(struct brw_context *brw)
-{
- struct gl_context *ctx = &brw->ctx;
- intel_perf_dump_query_count(brw->perf_ctx);
- _mesa_HashWalk(ctx->PerfQuery.Objects, dump_perf_query_callback, brw);
-}
-
-/**
- * Driver hook for glGetPerfQueryInfoINTEL().
- */
-static void
-brw_get_perf_query_info(struct gl_context *ctx,
- unsigned query_index,
- const char **name,
- GLuint *data_size,
- GLuint *n_counters,
- GLuint *n_active)
-{
- struct brw_context *brw = brw_context(ctx);
- struct intel_perf_context *perf_ctx = brw->perf_ctx;
- struct intel_perf_config *perf_cfg = intel_perf_config(perf_ctx);
- const struct intel_perf_query_info *query = &perf_cfg->queries[query_index];
-
- *name = query->name;
- *data_size = query->data_size;
- *n_counters = query->n_counters;
- *n_active = intel_perf_active_queries(perf_ctx, query);
-}
-
-static GLuint
-intel_counter_type_enum_to_gl_type(enum intel_perf_counter_type type)
-{
- switch (type) {
- case INTEL_PERF_COUNTER_TYPE_EVENT: return GL_PERFQUERY_COUNTER_EVENT_INTEL;
- case INTEL_PERF_COUNTER_TYPE_DURATION_NORM: return GL_PERFQUERY_COUNTER_DURATION_NORM_INTEL;
- case INTEL_PERF_COUNTER_TYPE_DURATION_RAW: return GL_PERFQUERY_COUNTER_DURATION_RAW_INTEL;
- case INTEL_PERF_COUNTER_TYPE_THROUGHPUT: return GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
- case INTEL_PERF_COUNTER_TYPE_RAW: return GL_PERFQUERY_COUNTER_RAW_INTEL;
- case INTEL_PERF_COUNTER_TYPE_TIMESTAMP: return GL_PERFQUERY_COUNTER_TIMESTAMP_INTEL;
- default:
- unreachable("Unknown counter type");
- }
-}
-
-static GLuint
-intel_counter_data_type_to_gl_type(enum intel_perf_counter_data_type type)
-{
- switch (type) {
- case INTEL_PERF_COUNTER_DATA_TYPE_BOOL32: return GL_PERFQUERY_COUNTER_DATA_BOOL32_INTEL;
- case INTEL_PERF_COUNTER_DATA_TYPE_UINT32: return GL_PERFQUERY_COUNTER_DATA_UINT32_INTEL;
- case INTEL_PERF_COUNTER_DATA_TYPE_UINT64: return GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
- case INTEL_PERF_COUNTER_DATA_TYPE_FLOAT: return GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
- case INTEL_PERF_COUNTER_DATA_TYPE_DOUBLE: return GL_PERFQUERY_COUNTER_DATA_DOUBLE_INTEL;
- default:
- unreachable("Unknown counter data type");
- }
-}
-
-/**
- * Driver hook for glGetPerfCounterInfoINTEL().
- */
-static void
-brw_get_perf_counter_info(struct gl_context *ctx,
- unsigned query_index,
- unsigned counter_index,
- const char **name,
- const char **desc,
- GLuint *offset,
- GLuint *data_size,
- GLuint *type_enum,
- GLuint *data_type_enum,
- GLuint64 *raw_max)
-{
- struct brw_context *brw = brw_context(ctx);
- struct intel_perf_config *perf_cfg = intel_perf_config(brw->perf_ctx);
- const struct intel_perf_query_info *query =
- &perf_cfg->queries[query_index];
- const struct intel_perf_query_counter *counter =
- &query->counters[counter_index];
-
- *name = counter->name;
- *desc = counter->desc;
- *offset = counter->offset;
- *data_size = intel_perf_query_counter_get_size(counter);
- *type_enum = intel_counter_type_enum_to_gl_type(counter->type);
- *data_type_enum = intel_counter_data_type_to_gl_type(counter->data_type);
- *raw_max = counter->raw_max;
-}
-
-enum OaReadStatus {
- OA_READ_STATUS_ERROR,
- OA_READ_STATUS_UNFINISHED,
- OA_READ_STATUS_FINISHED,
-};
-
-/******************************************************************************/
-
-/**
- * Driver hook for glBeginPerfQueryINTEL().
- */
-static bool
-brw_begin_perf_query(struct gl_context *ctx,
- struct gl_perf_query_object *o)
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_perf_query_object *brw_query = brw_perf_query(o);
- struct intel_perf_query_object *obj = brw_query->query;
- struct intel_perf_context *perf_ctx = brw->perf_ctx;
-
- /* We can assume the frontend hides mistaken attempts to Begin a
- * query object multiple times before its End. Similarly if an
- * application reuses a query object before results have arrived
- * the frontend will wait for prior results so we don't need
- * to support abandoning in-flight results.
- */
- assert(!o->Active);
- assert(!o->Used || o->Ready); /* no in-flight query to worry about */
-
- DBG("Begin(%d)\n", o->Id);
-
- bool ret = intel_perf_begin_query(perf_ctx, obj);
-
- if (INTEL_DEBUG(DEBUG_PERFMON))
- dump_perf_queries(brw);
-
- return ret;
-}
-
-/**
- * Driver hook for glEndPerfQueryINTEL().
- */
-static void
-brw_end_perf_query(struct gl_context *ctx,
- struct gl_perf_query_object *o)
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_perf_query_object *brw_query = brw_perf_query(o);
- struct intel_perf_query_object *obj = brw_query->query;
- struct intel_perf_context *perf_ctx = brw->perf_ctx;
-
- DBG("End(%d)\n", o->Id);
- intel_perf_end_query(perf_ctx, obj);
-}
-
-static void
-brw_wait_perf_query(struct gl_context *ctx, struct gl_perf_query_object *o)
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_perf_query_object *brw_query = brw_perf_query(o);
- struct intel_perf_query_object *obj = brw_query->query;
-
- assert(!o->Ready);
-
- intel_perf_wait_query(brw->perf_ctx, obj, &brw->batch);
-}
-
-static bool
-brw_is_perf_query_ready(struct gl_context *ctx,
- struct gl_perf_query_object *o)
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_perf_query_object *brw_query = brw_perf_query(o);
- struct intel_perf_query_object *obj = brw_query->query;
-
- if (o->Ready)
- return true;
-
- return intel_perf_is_query_ready(brw->perf_ctx, obj, &brw->batch);
-}
-
-/**
- * Driver hook for glGetPerfQueryDataINTEL().
- */
-static bool
-brw_get_perf_query_data(struct gl_context *ctx,
- struct gl_perf_query_object *o,
- GLsizei data_size,
- GLuint *data,
- GLuint *bytes_written)
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_perf_query_object *brw_query = brw_perf_query(o);
- struct intel_perf_query_object *obj = brw_query->query;
-
- assert(brw_is_perf_query_ready(ctx, o));
-
- DBG("GetData(%d)\n", o->Id);
-
- if (INTEL_DEBUG(DEBUG_PERFMON))
- dump_perf_queries(brw);
-
- /* We expect that the frontend only calls this hook when it knows
- * that results are available.
- */
- assert(o->Ready);
-
- intel_perf_get_query_data(brw->perf_ctx, obj, &brw->batch,
- data_size, data, bytes_written);
-
- return true;
-}
-
-static struct gl_perf_query_object *
-brw_new_perf_query_object(struct gl_context *ctx, unsigned query_index)
-{
- struct brw_context *brw = brw_context(ctx);
- struct intel_perf_context *perf_ctx = brw->perf_ctx;
- struct intel_perf_query_object * obj = intel_perf_new_query(perf_ctx, query_index);
- if (unlikely(!obj))
- return NULL;
-
- struct brw_perf_query_object *brw_query = calloc(1, sizeof(struct brw_perf_query_object));
- if (unlikely(!brw_query)) {
- intel_perf_delete_query(perf_ctx, obj);
- return NULL;
- }
-
- brw_query->query = obj;
- return &brw_query->base;
-}
-
-/**
- * Driver hook for glDeletePerfQueryINTEL().
- */
-static void
-brw_delete_perf_query(struct gl_context *ctx,
- struct gl_perf_query_object *o)
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_perf_query_object *brw_query = brw_perf_query(o);
- struct intel_perf_query_object *obj = brw_query->query;
- struct intel_perf_context *perf_ctx = brw->perf_ctx;
-
- /* We can assume that the frontend waits for a query to complete
- * before ever calling into here, so we don't have to worry about
- * deleting an in-flight query object.
- */
- assert(!o->Active);
- assert(!o->Used || o->Ready);
-
- DBG("Delete(%d)\n", o->Id);
-
- intel_perf_delete_query(perf_ctx, obj);
- free(brw_query);
-}
-
-/******************************************************************************/
-/* intel_device_info will have incorrect default topology values for unsupported
- * kernels. Verify kernel support to ensure OA metrics are accurate.
- */
-static bool
-oa_metrics_kernel_support(int fd, const struct intel_device_info *devinfo)
-{
- if (devinfo->ver >= 10) {
- /* topology uAPI required for CNL+ (kernel 4.17+) make a call to the api
- * to verify support
- */
- struct drm_i915_query_item item = {
- .query_id = DRM_I915_QUERY_TOPOLOGY_INFO,
- };
- struct drm_i915_query query = {
- .num_items = 1,
- .items_ptr = (uintptr_t) &item,
- };
-
- /* kernel 4.17+ supports the query */
- return drmIoctl(fd, DRM_IOCTL_I915_QUERY, &query) == 0;
- }
-
- if (devinfo->ver >= 8) {
- /* 4.13+ api required for gfx8 - gfx9 */
- int mask;
- struct drm_i915_getparam gp = {
- .param = I915_PARAM_SLICE_MASK,
- .value = &mask,
- };
- /* kernel 4.13+ supports this parameter */
- return drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp) == 0;
- }
-
- if (devinfo->ver == 7)
- /* default topology values are correct for HSW */
- return true;
-
- /* oa not supported before gen 7*/
- return false;
-}
-
-static void *
-brw_oa_bo_alloc(void *bufmgr, const char *name, uint64_t size)
-{
- return brw_bo_alloc(bufmgr, name, size, BRW_MEMZONE_OTHER);
-}
-
-static void
-brw_oa_emit_mi_report_perf_count(void *c,
- void *bo,
- uint32_t offset_in_bytes,
- uint32_t report_id)
-{
- struct brw_context *ctx = c;
- ctx->vtbl.emit_mi_report_perf_count(ctx,
- bo,
- offset_in_bytes,
- report_id);
-}
-
-typedef void (*bo_unreference_t)(void *);
-typedef void *(*bo_map_t)(void *, void *, unsigned flags);
-typedef void (*bo_unmap_t)(void *);
-typedef void (* emit_mi_report_t)(void *, void *, uint32_t, uint32_t);
-typedef void (*emit_mi_flush_t)(void *);
-
-static void
-brw_oa_batchbuffer_flush(void *c, const char *file, int line)
-{
- struct brw_context *ctx = c;
- _brw_batch_flush_fence(ctx, -1, NULL, file, line);
-}
-
-static void
-brw_oa_emit_stall_at_pixel_scoreboard(void *c)
-{
- struct brw_context *brw = c;
- brw_emit_end_of_pipe_sync(brw, PIPE_CONTROL_STALL_AT_SCOREBOARD);
-}
-
-static void
-brw_perf_store_register(struct brw_context *brw, struct brw_bo *bo,
- uint32_t reg, uint32_t reg_size,
- uint32_t offset)
-{
- if (reg_size == 8) {
- brw_store_register_mem64(brw, bo, reg, offset);
- } else {
- assert(reg_size == 4);
- brw_store_register_mem32(brw, bo, reg, offset);
- }
-}
-
-typedef void (*store_register_mem_t)(void *ctx, void *bo,
- uint32_t reg, uint32_t reg_size,
- uint32_t offset);
-typedef bool (*batch_references_t)(void *batch, void *bo);
-typedef void (*bo_wait_rendering_t)(void *bo);
-typedef int (*bo_busy_t)(void *bo);
-
-static unsigned
-brw_init_perf_query_info(struct gl_context *ctx)
-{
- struct brw_context *brw = brw_context(ctx);
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- struct intel_perf_context *perf_ctx = brw->perf_ctx;
- struct intel_perf_config *perf_cfg = intel_perf_config(perf_ctx);
-
- if (perf_cfg)
- return perf_cfg->n_queries;
-
- if (!oa_metrics_kernel_support(brw->screen->fd, devinfo))
- return 0;
-
- perf_cfg = intel_perf_new(brw->mem_ctx);
-
- perf_cfg->vtbl.bo_alloc = brw_oa_bo_alloc;
- perf_cfg->vtbl.bo_unreference = (bo_unreference_t)brw_bo_unreference;
- perf_cfg->vtbl.bo_map = (bo_map_t)brw_bo_map;
- perf_cfg->vtbl.bo_unmap = (bo_unmap_t)brw_bo_unmap;
- perf_cfg->vtbl.emit_stall_at_pixel_scoreboard =
- (emit_mi_flush_t)brw_oa_emit_stall_at_pixel_scoreboard;
- perf_cfg->vtbl.emit_mi_report_perf_count =
- (emit_mi_report_t)brw_oa_emit_mi_report_perf_count;
- perf_cfg->vtbl.batchbuffer_flush = brw_oa_batchbuffer_flush;
- perf_cfg->vtbl.store_register_mem =
- (store_register_mem_t) brw_perf_store_register;
- perf_cfg->vtbl.batch_references = (batch_references_t)brw_batch_references;
- perf_cfg->vtbl.bo_wait_rendering = (bo_wait_rendering_t)brw_bo_wait_rendering;
- perf_cfg->vtbl.bo_busy = (bo_busy_t)brw_bo_busy;
-
- intel_perf_init_metrics(perf_cfg, devinfo, brw->screen->fd,
- true /* pipeline stats */,
- true /* register snapshots */);
- intel_perf_init_context(perf_ctx, perf_cfg, brw->mem_ctx, brw, brw->bufmgr,
- devinfo, brw->hw_ctx, brw->screen->fd);
-
- return perf_cfg->n_queries;
-}
-
-void
-brw_init_performance_queries(struct brw_context *brw)
-{
- struct gl_context *ctx = &brw->ctx;
-
- ctx->Driver.InitPerfQueryInfo = brw_init_perf_query_info;
- ctx->Driver.GetPerfQueryInfo = brw_get_perf_query_info;
- ctx->Driver.GetPerfCounterInfo = brw_get_perf_counter_info;
- ctx->Driver.NewPerfQueryObject = brw_new_perf_query_object;
- ctx->Driver.DeletePerfQuery = brw_delete_perf_query;
- ctx->Driver.BeginPerfQuery = brw_begin_perf_query;
- ctx->Driver.EndPerfQuery = brw_end_perf_query;
- ctx->Driver.WaitPerfQuery = brw_wait_perf_query;
- ctx->Driver.IsPerfQueryReady = brw_is_perf_query_ready;
- ctx->Driver.GetPerfQueryData = brw_get_perf_query_data;
-}
+++ /dev/null
-/*
- * Copyright © 2010 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "brw_context.h"
-#include "brw_defines.h"
-#include "brw_state.h"
-#include "brw_batch.h"
-#include "brw_fbo.h"
-
-/**
- * Emit a PIPE_CONTROL with various flushing flags.
- *
- * The caller is responsible for deciding what flags are appropriate for the
- * given generation.
- */
-void
-brw_emit_pipe_control_flush(struct brw_context *brw, uint32_t flags)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- if (devinfo->ver >= 6 &&
- (flags & PIPE_CONTROL_CACHE_FLUSH_BITS) &&
- (flags & PIPE_CONTROL_CACHE_INVALIDATE_BITS)) {
- /* A pipe control command with flush and invalidate bits set
- * simultaneously is an inherently racy operation on Gfx6+ if the
- * contents of the flushed caches were intended to become visible from
- * any of the invalidated caches. Split it in two PIPE_CONTROLs, the
- * first one should stall the pipeline to make sure that the flushed R/W
- * caches are coherent with memory once the specified R/O caches are
- * invalidated. On pre-Gfx6 hardware the (implicit) R/O cache
- * invalidation seems to happen at the bottom of the pipeline together
- * with any write cache flush, so this shouldn't be a concern. In order
- * to ensure a full stall, we do an end-of-pipe sync.
- */
- brw_emit_end_of_pipe_sync(brw, (flags & PIPE_CONTROL_CACHE_FLUSH_BITS));
- flags &= ~(PIPE_CONTROL_CACHE_FLUSH_BITS | PIPE_CONTROL_CS_STALL);
- }
-
- brw->vtbl.emit_raw_pipe_control(brw, flags, NULL, 0, 0);
-}
-
-/**
- * Emit a PIPE_CONTROL that writes to a buffer object.
- *
- * \p flags should contain one of the following items:
- * - PIPE_CONTROL_WRITE_IMMEDIATE
- * - PIPE_CONTROL_WRITE_TIMESTAMP
- * - PIPE_CONTROL_WRITE_DEPTH_COUNT
- */
-void
-brw_emit_pipe_control_write(struct brw_context *brw, uint32_t flags,
- struct brw_bo *bo, uint32_t offset,
- uint64_t imm)
-{
- brw->vtbl.emit_raw_pipe_control(brw, flags, bo, offset, imm);
-}
-
-/**
- * Restriction [DevSNB, DevIVB]:
- *
- * Prior to changing Depth/Stencil Buffer state (i.e. any combination of
- * 3DSTATE_DEPTH_BUFFER, 3DSTATE_CLEAR_PARAMS, 3DSTATE_STENCIL_BUFFER,
- * 3DSTATE_HIER_DEPTH_BUFFER) SW must first issue a pipelined depth stall
- * (PIPE_CONTROL with Depth Stall bit set), followed by a pipelined depth
- * cache flush (PIPE_CONTROL with Depth Flush Bit set), followed by
- * another pipelined depth stall (PIPE_CONTROL with Depth Stall bit set),
- * unless SW can otherwise guarantee that the pipeline from WM onwards is
- * already flushed (e.g., via a preceding MI_FLUSH).
- */
-void
-brw_emit_depth_stall_flushes(struct brw_context *brw)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- assert(devinfo->ver >= 6);
-
- /* Starting on BDW, these pipe controls are unnecessary.
- *
- * WM HW will internally manage the draining pipe and flushing of the caches
- * when this command is issued. The PIPE_CONTROL restrictions are removed.
- */
- if (devinfo->ver >= 8)
- return;
-
- brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_STALL);
- brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_CACHE_FLUSH);
- brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_STALL);
-}
-
-/**
- * From the Ivybridge PRM, Volume 2 Part 1, Section 3.2 (VS Stage Input):
- * "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth
- * stall needs to be sent just prior to any 3DSTATE_VS, 3DSTATE_URB_VS,
- * 3DSTATE_CONSTANT_VS, 3DSTATE_BINDING_TABLE_POINTER_VS,
- * 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one PIPE_CONTROL needs
- * to be sent before any combination of VS associated 3DSTATE."
- */
-void
-gfx7_emit_vs_workaround_flush(struct brw_context *brw)
-{
- ASSERTED const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- assert(devinfo->ver == 7);
- brw_emit_pipe_control_write(brw,
- PIPE_CONTROL_WRITE_IMMEDIATE
- | PIPE_CONTROL_DEPTH_STALL,
- brw->workaround_bo,
- brw->workaround_bo_offset, 0);
-}
-
-/**
- * From the PRM, Volume 2a:
- *
- * "Indirect State Pointers Disable
- *
- * At the completion of the post-sync operation associated with this pipe
- * control packet, the indirect state pointers in the hardware are
- * considered invalid; the indirect pointers are not saved in the context.
- * If any new indirect state commands are executed in the command stream
- * while the pipe control is pending, the new indirect state commands are
- * preserved.
- *
- * [DevIVB+]: Using Invalidate State Pointer (ISP) only inhibits context
- * restoring of Push Constant (3DSTATE_CONSTANT_*) commands. Push Constant
- * commands are only considered as Indirect State Pointers. Once ISP is
- * issued in a context, SW must initialize by programming push constant
- * commands for all the shaders (at least to zero length) before attempting
- * any rendering operation for the same context."
- *
- * 3DSTATE_CONSTANT_* packets are restored during a context restore,
- * even though they point to a BO that has been already unreferenced at
- * the end of the previous batch buffer. This has been fine so far since
- * we are protected by these scratch page (every address not covered by
- * a BO should be pointing to the scratch page). But on CNL, it is
- * causing a GPU hang during context restore at the 3DSTATE_CONSTANT_*
- * instruction.
- *
- * The flag "Indirect State Pointers Disable" in PIPE_CONTROL tells the
- * hardware to ignore previous 3DSTATE_CONSTANT_* packets during a
- * context restore, so the mentioned hang doesn't happen. However,
- * software must program push constant commands for all stages prior to
- * rendering anything, so we flag them as dirty.
- *
- * Finally, we also make sure to stall at pixel scoreboard to make sure the
- * constants have been loaded into the EUs prior to disable the push constants
- * so that it doesn't hang a previous 3DPRIMITIVE.
- */
-void
-gfx7_emit_isp_disable(struct brw_context *brw)
-{
- brw->vtbl.emit_raw_pipe_control(brw,
- PIPE_CONTROL_STALL_AT_SCOREBOARD |
- PIPE_CONTROL_CS_STALL,
- NULL, 0, 0);
- brw->vtbl.emit_raw_pipe_control(brw,
- PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE |
- PIPE_CONTROL_CS_STALL,
- NULL, 0, 0);
-
- brw->vs.base.push_constants_dirty = true;
- brw->tcs.base.push_constants_dirty = true;
- brw->tes.base.push_constants_dirty = true;
- brw->gs.base.push_constants_dirty = true;
- brw->wm.base.push_constants_dirty = true;
-}
-
-/**
- * Emit a PIPE_CONTROL command for gfx7 with the CS Stall bit set.
- */
-void
-gfx7_emit_cs_stall_flush(struct brw_context *brw)
-{
- brw_emit_pipe_control_write(brw,
- PIPE_CONTROL_CS_STALL
- | PIPE_CONTROL_WRITE_IMMEDIATE,
- brw->workaround_bo,
- brw->workaround_bo_offset, 0);
-}
-
-/**
- * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
- * implementing two workarounds on gfx6. From section 1.4.7.1
- * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
- *
- * [DevSNB-C+{W/A}] Before any depth stall flush (including those
- * produced by non-pipelined state commands), software needs to first
- * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
- * 0.
- *
- * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
- * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
- *
- * And the workaround for these two requires this workaround first:
- *
- * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
- * BEFORE the pipe-control with a post-sync op and no write-cache
- * flushes.
- *
- * And this last workaround is tricky because of the requirements on
- * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
- * volume 2 part 1:
- *
- * "1 of the following must also be set:
- * - Render Target Cache Flush Enable ([12] of DW1)
- * - Depth Cache Flush Enable ([0] of DW1)
- * - Stall at Pixel Scoreboard ([1] of DW1)
- * - Depth Stall ([13] of DW1)
- * - Post-Sync Operation ([13] of DW1)
- * - Notify Enable ([8] of DW1)"
- *
- * The cache flushes require the workaround flush that triggered this
- * one, so we can't use it. Depth stall would trigger the same.
- * Post-sync nonzero is what triggered this second workaround, so we
- * can't use that one either. Notify enable is IRQs, which aren't
- * really our business. That leaves only stall at scoreboard.
- */
-void
-brw_emit_post_sync_nonzero_flush(struct brw_context *brw)
-{
- brw_emit_pipe_control_flush(brw,
- PIPE_CONTROL_CS_STALL |
- PIPE_CONTROL_STALL_AT_SCOREBOARD);
-
- brw_emit_pipe_control_write(brw, PIPE_CONTROL_WRITE_IMMEDIATE,
- brw->workaround_bo,
- brw->workaround_bo_offset, 0);
-}
-
-/*
- * From Sandybridge PRM, volume 2, "1.7.2 End-of-Pipe Synchronization":
- *
- * Write synchronization is a special case of end-of-pipe
- * synchronization that requires that the render cache and/or depth
- * related caches are flushed to memory, where the data will become
- * globally visible. This type of synchronization is required prior to
- * SW (CPU) actually reading the result data from memory, or initiating
- * an operation that will use as a read surface (such as a texture
- * surface) a previous render target and/or depth/stencil buffer
- *
- *
- * From Haswell PRM, volume 2, part 1, "End-of-Pipe Synchronization":
- *
- * Exercising the write cache flush bits (Render Target Cache Flush
- * Enable, Depth Cache Flush Enable, DC Flush) in PIPE_CONTROL only
- * ensures the write caches are flushed and doesn't guarantee the data
- * is globally visible.
- *
- * SW can track the completion of the end-of-pipe-synchronization by
- * using "Notify Enable" and "PostSync Operation - Write Immediate
- * Data" in the PIPE_CONTROL command.
- */
-void
-brw_emit_end_of_pipe_sync(struct brw_context *brw, uint32_t flags)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- if (devinfo->ver >= 6) {
- /* From Sandybridge PRM, volume 2, "1.7.3.1 Writing a Value to Memory":
- *
- * "The most common action to perform upon reaching a synchronization
- * point is to write a value out to memory. An immediate value
- * (included with the synchronization command) may be written."
- *
- *
- * From Broadwell PRM, volume 7, "End-of-Pipe Synchronization":
- *
- * "In case the data flushed out by the render engine is to be read
- * back in to the render engine in coherent manner, then the render
- * engine has to wait for the fence completion before accessing the
- * flushed data. This can be achieved by following means on various
- * products: PIPE_CONTROL command with CS Stall and the required
- * write caches flushed with Post-Sync-Operation as Write Immediate
- * Data.
- *
- * Example:
- * - Workload-1 (3D/GPGPU/MEDIA)
- * - PIPE_CONTROL (CS Stall, Post-Sync-Operation Write Immediate
- * Data, Required Write Cache Flush bits set)
- * - Workload-2 (Can use the data produce or output by Workload-1)
- */
- brw_emit_pipe_control_write(brw,
- flags | PIPE_CONTROL_CS_STALL |
- PIPE_CONTROL_WRITE_IMMEDIATE,
- brw->workaround_bo,
- brw->workaround_bo_offset, 0);
-
- if (devinfo->platform == INTEL_PLATFORM_HSW) {
- /* Haswell needs addition work-arounds:
- *
- * From Haswell PRM, volume 2, part 1, "End-of-Pipe Synchronization":
- *
- * Option 1:
- * PIPE_CONTROL command with the CS Stall and the required write
- * caches flushed with Post-SyncOperation as Write Immediate Data
- * followed by eight dummy MI_STORE_DATA_IMM (write to scratch
- * spce) commands.
- *
- * Example:
- * - Workload-1
- * - PIPE_CONTROL (CS Stall, Post-Sync-Operation Write
- * Immediate Data, Required Write Cache Flush bits set)
- * - MI_STORE_DATA_IMM (8 times) (Dummy data, Scratch Address)
- * - Workload-2 (Can use the data produce or output by
- * Workload-1)
- *
- * Unfortunately, both the PRMs and the internal docs are a bit
- * out-of-date in this regard. What the windows driver does (and
- * this appears to actually work) is to emit a register read from the
- * memory address written by the pipe control above.
- *
- * What register we load into doesn't matter. We choose an indirect
- * rendering register because we know it always exists and it's one
- * of the first registers the command parser allows us to write. If
- * you don't have command parser support in your kernel (pre-4.2),
- * this will get turned into MI_NOOP and you won't get the
- * workaround. Unfortunately, there's just not much we can do in
- * that case. This register is perfectly safe to write since we
- * always re-load all of the indirect draw registers right before
- * 3DPRIMITIVE when needed anyway.
- */
- brw_load_register_mem(brw, GFX7_3DPRIM_START_INSTANCE,
- brw->workaround_bo, brw->workaround_bo_offset);
- }
- } else {
- /* On gfx4-5, a regular pipe control seems to suffice. */
- brw_emit_pipe_control_flush(brw, flags);
- }
-}
-
-/* Emit a pipelined flush to either flush render and texture cache for
- * reading from a FBO-drawn texture, or flush so that frontbuffer
- * render appears on the screen in DRI1.
- *
- * This is also used for the always_flush_cache driconf debug option.
- */
-void
-brw_emit_mi_flush(struct brw_context *brw)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- int flags = PIPE_CONTROL_RENDER_TARGET_FLUSH;
- if (devinfo->ver >= 6) {
- flags |= PIPE_CONTROL_INSTRUCTION_INVALIDATE |
- PIPE_CONTROL_CONST_CACHE_INVALIDATE |
- PIPE_CONTROL_DATA_CACHE_FLUSH |
- PIPE_CONTROL_DEPTH_CACHE_FLUSH |
- PIPE_CONTROL_VF_CACHE_INVALIDATE |
- PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
- PIPE_CONTROL_CS_STALL;
- }
- brw_emit_pipe_control_flush(brw, flags);
-}
-
-static bool
-init_identifier_bo(struct brw_context *brw)
-{
- void *bo_map;
-
- if (!can_do_exec_capture(brw->screen))
- return true;
-
- bo_map = brw_bo_map(NULL, brw->workaround_bo, MAP_READ | MAP_WRITE);
- if (!bo_map)
- return false;
-
- brw->workaround_bo->kflags |= EXEC_OBJECT_CAPTURE;
- brw->workaround_bo_offset =
- ALIGN(intel_debug_write_identifiers(bo_map, 4096, "i965") + 8, 8);
-
- brw_bo_unmap(brw->workaround_bo);
-
- return true;
-}
-
-int
-brw_init_pipe_control(struct brw_context *brw,
- const struct intel_device_info *devinfo)
-{
- switch (devinfo->ver) {
- case 11:
- brw->vtbl.emit_raw_pipe_control = gfx11_emit_raw_pipe_control;
- break;
- case 9:
- brw->vtbl.emit_raw_pipe_control = gfx9_emit_raw_pipe_control;
- break;
- case 8:
- brw->vtbl.emit_raw_pipe_control = gfx8_emit_raw_pipe_control;
- break;
- case 7:
- brw->vtbl.emit_raw_pipe_control =
- devinfo->verx10 == 75 ?
- gfx75_emit_raw_pipe_control : gfx7_emit_raw_pipe_control;
- break;
- case 6:
- brw->vtbl.emit_raw_pipe_control = gfx6_emit_raw_pipe_control;
- break;
- case 5:
- brw->vtbl.emit_raw_pipe_control = gfx5_emit_raw_pipe_control;
- break;
- case 4:
- brw->vtbl.emit_raw_pipe_control =
- devinfo->verx10 == 45 ?
- gfx45_emit_raw_pipe_control : gfx4_emit_raw_pipe_control;
- break;
- default:
- unreachable("Unhandled Gen.");
- }
-
- if (devinfo->ver < 6)
- return 0;
-
- /* We can't just use brw_state_batch to get a chunk of space for
- * the gfx6 workaround because it involves actually writing to
- * the buffer, and the kernel doesn't let us write to the batch.
- */
- brw->workaround_bo = brw_bo_alloc(brw->bufmgr, "workaround", 4096,
- BRW_MEMZONE_OTHER);
- if (brw->workaround_bo == NULL)
- return -ENOMEM;
-
- if (!init_identifier_bo(brw))
- return -ENOMEM; /* Couldn't map workaround_bo?? */
-
- brw->workaround_bo_offset = 0;
- brw->pipe_controls_since_last_cs_stall = 0;
-
- return 0;
-}
-
-void
-brw_fini_pipe_control(struct brw_context *brw)
-{
- brw_bo_unreference(brw->workaround_bo);
-}
+++ /dev/null
-/*
- * Copyright © 2017 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#ifndef BRW_PIPE_CONTROL_DOT_H
-#define BRW_PIPE_CONTROL_DOT_H
-
-struct brw_context;
-struct intel_device_info;
-struct brw_bo;
-
-/** @{
- *
- * PIPE_CONTROL operation, a combination MI_FLUSH and register write with
- * additional flushing control.
- *
- * The bits here are not the actual hardware values. The actual values
- * shift around a bit per-generation, so we just have flags for each
- * potential operation, and use genxml to encode the actual packet.
- */
-enum pipe_control_flags
-{
- PIPE_CONTROL_FLUSH_LLC = (1 << 1),
- PIPE_CONTROL_LRI_POST_SYNC_OP = (1 << 2),
- PIPE_CONTROL_STORE_DATA_INDEX = (1 << 3),
- PIPE_CONTROL_CS_STALL = (1 << 4),
- PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET = (1 << 5),
- PIPE_CONTROL_SYNC_GFDT = (1 << 6),
- PIPE_CONTROL_TLB_INVALIDATE = (1 << 7),
- PIPE_CONTROL_MEDIA_STATE_CLEAR = (1 << 8),
- PIPE_CONTROL_WRITE_IMMEDIATE = (1 << 9),
- PIPE_CONTROL_WRITE_DEPTH_COUNT = (1 << 10),
- PIPE_CONTROL_WRITE_TIMESTAMP = (1 << 11),
- PIPE_CONTROL_DEPTH_STALL = (1 << 12),
- PIPE_CONTROL_RENDER_TARGET_FLUSH = (1 << 13),
- PIPE_CONTROL_INSTRUCTION_INVALIDATE = (1 << 14),
- PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE = (1 << 15),
- PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE = (1 << 16),
- PIPE_CONTROL_NOTIFY_ENABLE = (1 << 17),
- PIPE_CONTROL_FLUSH_ENABLE = (1 << 18),
- PIPE_CONTROL_DATA_CACHE_FLUSH = (1 << 19),
- PIPE_CONTROL_VF_CACHE_INVALIDATE = (1 << 20),
- PIPE_CONTROL_CONST_CACHE_INVALIDATE = (1 << 21),
- PIPE_CONTROL_STATE_CACHE_INVALIDATE = (1 << 22),
- PIPE_CONTROL_STALL_AT_SCOREBOARD = (1 << 23),
- PIPE_CONTROL_DEPTH_CACHE_FLUSH = (1 << 24),
-};
-
-#define PIPE_CONTROL_CACHE_FLUSH_BITS \
- (PIPE_CONTROL_DEPTH_CACHE_FLUSH | PIPE_CONTROL_DATA_CACHE_FLUSH | \
- PIPE_CONTROL_RENDER_TARGET_FLUSH)
-
-#define PIPE_CONTROL_CACHE_INVALIDATE_BITS \
- (PIPE_CONTROL_STATE_CACHE_INVALIDATE | PIPE_CONTROL_CONST_CACHE_INVALIDATE | \
- PIPE_CONTROL_VF_CACHE_INVALIDATE | PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | \
- PIPE_CONTROL_INSTRUCTION_INVALIDATE)
-
-/** @} */
-
-int brw_init_pipe_control(struct brw_context *brw,
- const struct intel_device_info *info);
-void brw_fini_pipe_control(struct brw_context *brw);
-
-void brw_emit_pipe_control_flush(struct brw_context *brw, uint32_t flags);
-void brw_emit_pipe_control_write(struct brw_context *brw, uint32_t flags,
- struct brw_bo *bo, uint32_t offset,
- uint64_t imm);
-void brw_emit_end_of_pipe_sync(struct brw_context *brw, uint32_t flags);
-void brw_emit_mi_flush(struct brw_context *brw);
-void brw_emit_post_sync_nonzero_flush(struct brw_context *brw);
-void brw_emit_depth_stall_flushes(struct brw_context *brw);
-void gfx7_emit_vs_workaround_flush(struct brw_context *brw);
-void gfx7_emit_cs_stall_flush(struct brw_context *brw);
-void gfx7_emit_isp_disable(struct brw_context *brw);
-
-#endif
+++ /dev/null
-/*
- * Copyright 2006 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portionsalloc
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "main/accum.h"
-#include "main/enums.h"
-#include "main/state.h"
-#include "main/stencil.h"
-#include "main/bufferobj.h"
-#include "main/context.h"
-#include "swrast/swrast.h"
-
-#include "brw_context.h"
-#include "brw_pixel.h"
-
-#define FILE_DEBUG_FLAG DEBUG_PIXEL
-
-static GLenum
-effective_func(GLenum func, bool src_alpha_is_one)
-{
- if (src_alpha_is_one) {
- if (func == GL_SRC_ALPHA)
- return GL_ONE;
- if (func == GL_ONE_MINUS_SRC_ALPHA)
- return GL_ZERO;
- }
-
- return func;
-}
-
-/**
- * Check if any fragment operations are in effect which might effect
- * glDraw/CopyPixels.
- */
-bool
-brw_check_blit_fragment_ops(struct gl_context * ctx, bool src_alpha_is_one)
-{
- if (ctx->NewState)
- _mesa_update_state(ctx);
-
- if (_mesa_arb_fragment_program_enabled(ctx)) {
- DBG("fallback due to fragment program\n");
- return false;
- }
-
- if (ctx->Color.BlendEnabled &&
- (effective_func(ctx->Color.Blend[0].SrcRGB, src_alpha_is_one) != GL_ONE ||
- effective_func(ctx->Color.Blend[0].DstRGB, src_alpha_is_one) != GL_ZERO ||
- ctx->Color.Blend[0].EquationRGB != GL_FUNC_ADD ||
- effective_func(ctx->Color.Blend[0].SrcA, src_alpha_is_one) != GL_ONE ||
- effective_func(ctx->Color.Blend[0].DstA, src_alpha_is_one) != GL_ZERO ||
- ctx->Color.Blend[0].EquationA != GL_FUNC_ADD)) {
- DBG("fallback due to blend\n");
- return false;
- }
-
- if (ctx->Texture._MaxEnabledTexImageUnit != -1) {
- DBG("fallback due to texturing\n");
- return false;
- }
-
- if (GET_COLORMASK(ctx->Color.ColorMask, 0) != 0xf) {
- DBG("fallback due to color masking\n");
- return false;
- }
-
- if (ctx->Color.AlphaEnabled) {
- DBG("fallback due to alpha\n");
- return false;
- }
-
- if (ctx->Depth.Test) {
- DBG("fallback due to depth test\n");
- return false;
- }
-
- if (ctx->Fog.Enabled) {
- DBG("fallback due to fog\n");
- return false;
- }
-
- if (ctx->_ImageTransferState) {
- DBG("fallback due to image transfer\n");
- return false;
- }
-
- if (_mesa_stencil_is_enabled(ctx)) {
- DBG("fallback due to image stencil\n");
- return false;
- }
-
- if (ctx->Pixel.ZoomX != 1.0F || ctx->Pixel.ZoomY != 1.0F) {
- DBG("fallback due to pixel zoom\n");
- return false;
- }
-
- if (ctx->RenderMode != GL_RENDER) {
- DBG("fallback due to render mode\n");
- return false;
- }
-
- return true;
-}
-
-void
-brw_init_pixel_functions(struct dd_function_table *functions)
-{
- functions->Bitmap = brw_bitmap;
- functions->CopyPixels = brw_copypixels;
- functions->DrawPixels = brw_drawpixels;
- functions->ReadPixels = brw_readpixels;
-}
+++ /dev/null
-/*
- * Copyright 2006 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef BRW_PIXEL_H
-#define BRW_PIXEL_H
-
-#include "main/mtypes.h"
-
-void brw_init_pixel_functions(struct dd_function_table *functions);
-bool brw_check_blit_fragment_ops(struct gl_context *ctx,
- bool src_alpha_is_one);
-
-void brw_readpixels(struct gl_context *ctx,
- GLint x, GLint y,
- GLsizei width, GLsizei height,
- GLenum format, GLenum type,
- const struct gl_pixelstore_attrib *pack,
- GLvoid *pixels);
-
-void brw_drawpixels(struct gl_context *ctx,
- GLint x, GLint y,
- GLsizei width, GLsizei height,
- GLenum format,
- GLenum type,
- const struct gl_pixelstore_attrib *unpack,
- const GLvoid *pixels);
-
-void brw_copypixels(struct gl_context *ctx,
- GLint srcx, GLint srcy,
- GLsizei width, GLsizei height,
- GLint destx, GLint desty, GLenum type);
-
-void brw_bitmap(struct gl_context *ctx,
- GLint x, GLint y,
- GLsizei width, GLsizei height,
- const struct gl_pixelstore_attrib *unpack,
- const GLubyte *pixels);
-
-#endif
+++ /dev/null
-/*
- * Copyright 2006 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portionsalloc
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "main/blend.h"
-#include "main/enums.h"
-#include "main/image.h"
-#include "main/colormac.h"
-#include "main/condrender.h"
-#include "main/mtypes.h"
-#include "main/macros.h"
-#include "main/pbo.h"
-#include "main/bufferobj.h"
-#include "main/state.h"
-#include "main/texobj.h"
-#include "main/context.h"
-#include "main/fbobject.h"
-#include "swrast/swrast.h"
-#include "drivers/common/meta.h"
-
-#include "brw_context.h"
-#include "brw_screen.h"
-#include "brw_batch.h"
-#include "brw_blit.h"
-#include "brw_fbo.h"
-#include "brw_image.h"
-#include "brw_buffers.h"
-#include "brw_pixel.h"
-
-
-#define FILE_DEBUG_FLAG DEBUG_PIXEL
-
-
-/* Unlike the other intel_pixel_* functions, the expectation here is
- * that the incoming data is not in a PBO. With the XY_TEXT blit
- * method, there's no benefit haveing it in a PBO, but we could
- * implement a path based on XY_MONO_SRC_COPY_BLIT which might benefit
- * PBO bitmaps. I think they are probably pretty rare though - I
- * wonder if Xgl uses them?
- */
-static const GLubyte *
-map_pbo(struct gl_context *ctx,
- GLsizei width, GLsizei height,
- const struct gl_pixelstore_attrib *unpack,
- const GLubyte *bitmap)
-{
- GLubyte *buf;
-
- if (!_mesa_validate_pbo_access(2, unpack, width, height, 1,
- GL_COLOR_INDEX, GL_BITMAP,
- INT_MAX, (const GLvoid *) bitmap)) {
- _mesa_error(ctx, GL_INVALID_OPERATION,"glBitmap(invalid PBO access)");
- return NULL;
- }
-
- buf = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0, unpack->BufferObj->Size,
- GL_MAP_READ_BIT,
- unpack->BufferObj,
- MAP_INTERNAL);
- if (!buf) {
- _mesa_error(ctx, GL_INVALID_OPERATION, "glBitmap(PBO is mapped)");
- return NULL;
- }
-
- return ADD_POINTERS(buf, bitmap);
-}
-
-static bool test_bit( const GLubyte *src, GLuint bit )
-{
- return (src[bit/8] & (1<<(bit % 8))) ? 1 : 0;
-}
-
-static void set_bit( GLubyte *dest, GLuint bit )
-{
- dest[bit/8] |= 1 << (bit % 8);
-}
-
-/* Extract a rectangle's worth of data from the bitmap. Called
- * per chunk of HW-sized bitmap.
- */
-static GLuint
-get_bitmap_rect(GLsizei width, GLsizei height,
- const struct gl_pixelstore_attrib *unpack,
- const GLubyte *bitmap,
- GLuint x, GLuint y,
- GLuint w, GLuint h,
- GLubyte *dest,
- GLuint row_align,
- bool invert)
-{
- GLuint src_offset = (x + unpack->SkipPixels) & 0x7;
- GLuint mask = unpack->LsbFirst ? 0 : 7;
- GLuint bit = 0;
- GLint row, col;
- GLint first, last;
- GLint incr;
- GLuint count = 0;
-
- DBG("%s %d,%d %dx%d bitmap %dx%d skip %d src_offset %d mask %d\n",
- __func__, x,y,w,h,width,height,unpack->SkipPixels, src_offset, mask);
-
- if (invert) {
- first = h-1;
- last = 0;
- incr = -1;
- }
- else {
- first = 0;
- last = h-1;
- incr = 1;
- }
-
- /* Require that dest be pre-zero'd.
- */
- for (row = first; row != (last+incr); row += incr) {
- const GLubyte *rowsrc = _mesa_image_address2d(unpack, bitmap,
- width, height,
- GL_COLOR_INDEX, GL_BITMAP,
- y + row, x);
-
- for (col = 0; col < w; col++, bit++) {
- if (test_bit(rowsrc, (col + src_offset) ^ mask)) {
- set_bit(dest, bit ^ 7);
- count++;
- }
- }
-
- if (row_align)
- bit = ALIGN(bit, row_align);
- }
-
- return count;
-}
-
-/**
- * Returns the low Y value of the vertical range given, flipped according to
- * whether the framebuffer is or not.
- */
-static inline int
-y_flip(struct gl_framebuffer *fb, int y, int height)
-{
- if (fb->FlipY)
- return fb->Height - y - height;
- else
- return y;
-}
-
-/*
- * Render a bitmap.
- */
-static bool
-do_blit_bitmap(struct gl_context *ctx,
- GLint dstx, GLint dsty,
- GLsizei width, GLsizei height,
- const struct gl_pixelstore_attrib *unpack,
- const GLubyte *bitmap)
-{
- struct brw_context *brw = brw_context(ctx);
- struct gl_framebuffer *fb = ctx->DrawBuffer;
- struct brw_renderbuffer *irb;
- GLfloat tmpColor[4];
- GLubyte ubcolor[4];
- GLuint color;
- GLsizei bitmap_width = width;
- GLsizei bitmap_height = height;
- GLint px, py;
- GLuint stipple[32];
- GLint orig_dstx = dstx;
- GLint orig_dsty = dsty;
-
- /* Update draw buffer bounds */
- _mesa_update_state(ctx);
-
- if (ctx->Depth.Test) {
- /* The blit path produces incorrect results when depth testing is on.
- * It seems the blit Z coord is always 1.0 (the far plane) so fragments
- * will likely be obscured by other, closer geometry.
- */
- return false;
- }
-
- brw_prepare_render(brw);
-
- if (fb->_NumColorDrawBuffers != 1) {
- perf_debug("accelerated glBitmap() only supports rendering to a "
- "single color buffer\n");
- return false;
- }
-
- irb = brw_renderbuffer(fb->_ColorDrawBuffers[0]);
-
- if (unpack->BufferObj) {
- bitmap = map_pbo(ctx, width, height, unpack, bitmap);
- if (bitmap == NULL)
- return true; /* even though this is an error, we're done */
- }
-
- COPY_4V(tmpColor, ctx->Current.RasterColor);
-
- if (_mesa_need_secondary_color(ctx)) {
- ADD_3V(tmpColor, tmpColor, ctx->Current.RasterSecondaryColor);
- }
-
- UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[0], tmpColor[0]);
- UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[1], tmpColor[1]);
- UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[2], tmpColor[2]);
- UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[3], tmpColor[3]);
-
- switch (_mesa_get_render_format(ctx, brw_rb_format(irb))) {
- case MESA_FORMAT_B8G8R8A8_UNORM:
- case MESA_FORMAT_B8G8R8X8_UNORM:
- color = PACK_COLOR_8888(ubcolor[3], ubcolor[0], ubcolor[1], ubcolor[2]);
- break;
- case MESA_FORMAT_B5G6R5_UNORM:
- color = PACK_COLOR_565(ubcolor[0], ubcolor[1], ubcolor[2]);
- break;
- default:
- perf_debug("Unsupported format %s in accelerated glBitmap()\n",
- _mesa_get_format_name(irb->mt->format));
- return false;
- }
-
- if (!brw_check_blit_fragment_ops(ctx, tmpColor[3] == 1.0F))
- return false;
-
- /* Clip to buffer bounds and scissor. */
- if (!_mesa_clip_to_region(fb->_Xmin, fb->_Ymin,
- fb->_Xmax, fb->_Ymax,
- &dstx, &dsty, &width, &height))
- goto out;
-
- dsty = y_flip(fb, dsty, height);
-
-#define DY 32
-#define DX 32
-
- /* The blitter has no idea about fast color clears, so we need to resolve
- * the miptree before we do anything.
- */
- brw_miptree_access_raw(brw, irb->mt, irb->mt_level, irb->mt_layer, true);
-
- /* Chop it all into chunks that can be digested by hardware: */
- for (py = 0; py < height; py += DY) {
- for (px = 0; px < width; px += DX) {
- int h = MIN2(DY, height - py);
- int w = MIN2(DX, width - px);
- GLuint sz = ALIGN(ALIGN(w,8) * h, 64)/8;
- const enum gl_logicop_mode logic_op = ctx->Color.ColorLogicOpEnabled ?
- ctx->Color._LogicOp : COLOR_LOGICOP_COPY;
-
- assert(sz <= sizeof(stipple));
- memset(stipple, 0, sz);
-
- /* May need to adjust this when padding has been introduced in
- * sz above:
- *
- * Have to translate destination coordinates back into source
- * coordinates.
- */
- int count = get_bitmap_rect(bitmap_width, bitmap_height, unpack,
- bitmap,
- -orig_dstx + (dstx + px),
- -orig_dsty + y_flip(fb, dsty + py, h),
- w, h,
- (GLubyte *)stipple,
- 8,
- fb->FlipY);
- if (count == 0)
- continue;
-
- if (!brw_emit_immediate_color_expand_blit(brw,
- irb->mt->cpp,
- (GLubyte *)stipple,
- sz,
- color,
- irb->mt->surf.row_pitch_B,
- irb->mt->bo,
- irb->mt->offset,
- irb->mt->surf.tiling,
- dstx + px,
- dsty + py,
- w, h,
- logic_op)) {
- return false;
- }
-
- if (ctx->Query.CurrentOcclusionObject)
- ctx->Query.CurrentOcclusionObject->Result += count;
- }
- }
-out:
-
- if (INTEL_DEBUG(DEBUG_SYNC))
- brw_batch_flush(brw);
-
- if (unpack->BufferObj) {
- /* done with PBO so unmap it now */
- ctx->Driver.UnmapBuffer(ctx, unpack->BufferObj, MAP_INTERNAL);
- }
-
- return true;
-}
-
-
-/* There are a large number of possible ways to implement bitmap on
- * this hardware, most of them have some sort of drawback. Here are a
- * few that spring to mind:
- *
- * Blit:
- * - XY_MONO_SRC_BLT_CMD
- * - use XY_SETUP_CLIP_BLT for cliprect clipping.
- * - XY_TEXT_BLT
- * - XY_TEXT_IMMEDIATE_BLT
- * - blit per cliprect, subject to maximum immediate data size.
- * - XY_COLOR_BLT
- * - per pixel or run of pixels
- * - XY_PIXEL_BLT
- * - good for sparse bitmaps
- *
- * 3D engine:
- * - Point per pixel
- * - Translate bitmap to an alpha texture and render as a quad
- * - Chop bitmap up into 32x32 squares and render w/polygon stipple.
- */
-void
-brw_bitmap(struct gl_context * ctx,
- GLint x, GLint y,
- GLsizei width, GLsizei height,
- const struct gl_pixelstore_attrib *unpack,
- const GLubyte * pixels)
-{
- struct brw_context *brw = brw_context(ctx);
-
- if (!_mesa_check_conditional_render(ctx))
- return;
-
- if (brw->screen->devinfo.ver < 6 &&
- do_blit_bitmap(ctx, x, y, width, height, unpack, pixels))
- return;
-
- _mesa_meta_Bitmap(ctx, x, y, width, height, unpack, pixels);
-}
+++ /dev/null
-/*
- * Copyright 2003 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "main/image.h"
-#include "main/state.h"
-#include "main/stencil.h"
-#include "main/mtypes.h"
-#include "main/condrender.h"
-#include "main/fbobject.h"
-#include "drivers/common/meta.h"
-
-#include "brw_context.h"
-#include "brw_buffers.h"
-#include "brw_mipmap_tree.h"
-#include "brw_pixel.h"
-#include "brw_fbo.h"
-#include "brw_blit.h"
-#include "brw_batch.h"
-
-#define FILE_DEBUG_FLAG DEBUG_PIXEL
-
-/**
- * CopyPixels with the blitter. Don't support zooming, pixel transfer, etc.
- */
-static bool
-do_blit_copypixels(struct gl_context * ctx,
- GLint srcx, GLint srcy,
- GLsizei width, GLsizei height,
- GLint dstx, GLint dsty, GLenum type)
-{
- struct brw_context *brw = brw_context(ctx);
- struct gl_framebuffer *fb = ctx->DrawBuffer;
- struct gl_framebuffer *read_fb = ctx->ReadBuffer;
- GLint orig_dstx;
- GLint orig_dsty;
- GLint orig_srcx;
- GLint orig_srcy;
- struct brw_renderbuffer *draw_irb = NULL;
- struct brw_renderbuffer *read_irb = NULL;
-
- /* Update draw buffer bounds */
- _mesa_update_state(ctx);
-
- brw_prepare_render(brw);
-
- switch (type) {
- case GL_COLOR:
- if (fb->_NumColorDrawBuffers != 1) {
- perf_debug("glCopyPixels() fallback: MRT\n");
- return false;
- }
-
- draw_irb = brw_renderbuffer(fb->_ColorDrawBuffers[0]);
- read_irb = brw_renderbuffer(read_fb->_ColorReadBuffer);
- break;
- case GL_DEPTH_STENCIL_EXT:
- draw_irb = brw_renderbuffer(fb->Attachment[BUFFER_DEPTH].Renderbuffer);
- read_irb =
- brw_renderbuffer(read_fb->Attachment[BUFFER_DEPTH].Renderbuffer);
- break;
- case GL_DEPTH:
- perf_debug("glCopyPixels() fallback: GL_DEPTH\n");
- return false;
- case GL_STENCIL:
- perf_debug("glCopyPixels() fallback: GL_STENCIL\n");
- return false;
- default:
- perf_debug("glCopyPixels(): Unknown type\n");
- return false;
- }
-
- if (!draw_irb) {
- perf_debug("glCopyPixels() fallback: missing draw buffer\n");
- return false;
- }
-
- if (!read_irb) {
- perf_debug("glCopyPixels() fallback: missing read buffer\n");
- return false;
- }
-
- if (draw_irb->mt->surf.samples > 1 || read_irb->mt->surf.samples > 1) {
- perf_debug("glCopyPixels() fallback: multisampled buffers\n");
- return false;
- }
-
- if (ctx->_ImageTransferState) {
- perf_debug("glCopyPixels(): Unsupported image transfer state\n");
- return false;
- }
-
- if (ctx->Depth.Test) {
- perf_debug("glCopyPixels(): Unsupported depth test state\n");
- return false;
- }
-
- if (brw->stencil_enabled) {
- perf_debug("glCopyPixels(): Unsupported stencil test state\n");
- return false;
- }
-
- if (ctx->Fog.Enabled ||
- ctx->Texture._MaxEnabledTexImageUnit != -1 ||
- _mesa_arb_fragment_program_enabled(ctx)) {
- perf_debug("glCopyPixels(): Unsupported fragment shader state\n");
- return false;
- }
-
- if (ctx->Color.AlphaEnabled ||
- ctx->Color.BlendEnabled) {
- perf_debug("glCopyPixels(): Unsupported blend state\n");
- return false;
- }
-
- if (GET_COLORMASK(ctx->Color.ColorMask, 0) != 0xf) {
- perf_debug("glCopyPixels(): Unsupported color mask state\n");
- return false;
- }
-
- if (ctx->Pixel.ZoomX != 1.0F || ctx->Pixel.ZoomY != 1.0F) {
- perf_debug("glCopyPixels(): Unsupported pixel zoom\n");
- return false;
- }
-
- brw_batch_flush(brw);
-
- /* Clip to destination buffer. */
- orig_dstx = dstx;
- orig_dsty = dsty;
- if (!_mesa_clip_to_region(fb->_Xmin, fb->_Ymin,
- fb->_Xmax, fb->_Ymax,
- &dstx, &dsty, &width, &height))
- goto out;
- /* Adjust src coords for our post-clipped destination origin */
- srcx += dstx - orig_dstx;
- srcy += dsty - orig_dsty;
-
- /* Clip to source buffer. */
- orig_srcx = srcx;
- orig_srcy = srcy;
- if (!_mesa_clip_to_region(0, 0,
- read_fb->Width, read_fb->Height,
- &srcx, &srcy, &width, &height))
- goto out;
- /* Adjust dst coords for our post-clipped source origin */
- dstx += srcx - orig_srcx;
- dsty += srcy - orig_srcy;
-
- if (!brw_miptree_blit(brw,
- read_irb->mt, read_irb->mt_level, read_irb->mt_layer,
- srcx, srcy, read_fb->FlipY,
- draw_irb->mt, draw_irb->mt_level, draw_irb->mt_layer,
- dstx, dsty, fb->FlipY,
- width, height,
- (ctx->Color.ColorLogicOpEnabled ?
- ctx->Color._LogicOp : COLOR_LOGICOP_COPY))) {
- DBG("%s: blit failure\n", __func__);
- return false;
- }
-
- if (ctx->Query.CurrentOcclusionObject)
- ctx->Query.CurrentOcclusionObject->Result += width * height;
-
-out:
-
- DBG("%s: success\n", __func__);
- return true;
-}
-
-
-void
-brw_copypixels(struct gl_context *ctx,
- GLint srcx, GLint srcy,
- GLsizei width, GLsizei height,
- GLint destx, GLint desty, GLenum type)
-{
- struct brw_context *brw = brw_context(ctx);
-
- DBG("%s\n", __func__);
-
- if (!_mesa_check_conditional_render(ctx))
- return;
-
- if (brw->screen->devinfo.ver < 6 &&
- do_blit_copypixels(ctx, srcx, srcy, width, height, destx, desty, type))
- return;
-
- /* this will use swrast if needed */
- _mesa_meta_CopyPixels(ctx, srcx, srcy, width, height, destx, desty, type);
-}
+++ /dev/null
-/*
- * Copyright 2006 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portionsalloc
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "main/enums.h"
-#include "main/image.h"
-#include "main/glformats.h"
-#include "main/mtypes.h"
-#include "main/condrender.h"
-#include "main/fbobject.h"
-#include "main/teximage.h"
-#include "main/texobj.h"
-#include "main/texstate.h"
-#include "main/bufferobj.h"
-#include "swrast/swrast.h"
-#include "drivers/common/meta.h"
-
-#include "brw_context.h"
-#include "brw_screen.h"
-#include "brw_blit.h"
-#include "brw_buffers.h"
-#include "brw_fbo.h"
-#include "brw_mipmap_tree.h"
-#include "brw_pixel.h"
-#include "brw_buffer_objects.h"
-
-#define FILE_DEBUG_FLAG DEBUG_PIXEL
-
-static bool
-do_blit_drawpixels(struct gl_context * ctx,
- GLint x, GLint y, GLsizei width, GLsizei height,
- GLenum format, GLenum type,
- const struct gl_pixelstore_attrib *unpack,
- const GLvoid * pixels)
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_buffer_object *src = brw_buffer_object(unpack->BufferObj);
- GLuint src_offset;
- struct brw_bo *src_buffer;
-
- DBG("%s\n", __func__);
-
- if (!brw_check_blit_fragment_ops(ctx, false))
- return false;
-
- if (ctx->DrawBuffer->_NumColorDrawBuffers != 1) {
- DBG("%s: fallback due to MRT\n", __func__);
- return false;
- }
-
- brw_prepare_render(brw);
-
- struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[0];
- struct brw_renderbuffer *irb = brw_renderbuffer(rb);
-
- mesa_format src_format = _mesa_format_from_format_and_type(format, type);
- if (_mesa_format_is_mesa_array_format(src_format))
- src_format = _mesa_format_from_array_format(src_format);
- mesa_format dst_format = irb->mt->format;
-
- /* We can safely discard sRGB encode/decode for the DrawPixels interface */
- src_format = _mesa_get_srgb_format_linear(src_format);
- dst_format = _mesa_get_srgb_format_linear(dst_format);
-
- if (!brw_miptree_blit_compatible_formats(src_format, dst_format)) {
- DBG("%s: bad format for blit\n", __func__);
- return false;
- }
-
- if (unpack->SwapBytes || unpack->LsbFirst ||
- unpack->SkipPixels || unpack->SkipRows) {
- DBG("%s: bad packing params\n", __func__);
- return false;
- }
-
- int src_stride = _mesa_image_row_stride(unpack, width, format, type);
- bool src_flip = false;
- /* Mesa flips the src_stride for unpack->Invert, but we want our mt to have
- * a normal src_stride.
- */
- if (unpack->Invert) {
- src_stride = -src_stride;
- src_flip = true;
- }
-
- src_offset = (GLintptr)pixels;
- src_offset += _mesa_image_offset(2, unpack, width, height,
- format, type, 0, 0, 0);
-
- src_buffer = brw_bufferobj_buffer(brw, src, src_offset,
- height * src_stride, false);
-
- struct brw_mipmap_tree *pbo_mt =
- brw_miptree_create_for_bo(brw,
- src_buffer,
- irb->mt->format,
- src_offset,
- width, height, 1,
- src_stride,
- ISL_TILING_LINEAR,
- MIPTREE_CREATE_DEFAULT);
- if (!pbo_mt)
- return false;
-
- if (!brw_miptree_blit(brw,
- pbo_mt, 0, 0,
- 0, 0, src_flip,
- irb->mt, irb->mt_level, irb->mt_layer,
- x, y, ctx->DrawBuffer->FlipY,
- width, height, COLOR_LOGICOP_COPY)) {
- DBG("%s: blit failed\n", __func__);
- brw_miptree_release(&pbo_mt);
- return false;
- }
-
- brw_miptree_release(&pbo_mt);
-
- if (ctx->Query.CurrentOcclusionObject)
- ctx->Query.CurrentOcclusionObject->Result += width * height;
-
- DBG("%s: success\n", __func__);
- return true;
-}
-
-void
-brw_drawpixels(struct gl_context *ctx,
- GLint x, GLint y,
- GLsizei width, GLsizei height,
- GLenum format,
- GLenum type,
- const struct gl_pixelstore_attrib *unpack,
- const GLvoid *pixels)
-{
- struct brw_context *brw = brw_context(ctx);
-
- if (!_mesa_check_conditional_render(ctx))
- return;
-
- if (format == GL_STENCIL_INDEX) {
- _swrast_DrawPixels(ctx, x, y, width, height, format, type,
- unpack, pixels);
- return;
- }
-
- if (brw->screen->devinfo.ver < 6 &&
- unpack->BufferObj) {
- if (do_blit_drawpixels(ctx, x, y, width, height, format, type, unpack,
- pixels)) {
- return;
- }
-
- perf_debug("%s: fallback to generic code in PBO case\n", __func__);
- }
-
- _mesa_meta_DrawPixels(ctx, x, y, width, height, format, type,
- unpack, pixels);
-}
+++ /dev/null
-/*
- * Copyright 2003 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "main/enums.h"
-#include "main/mtypes.h"
-#include "main/macros.h"
-#include "main/fbobject.h"
-#include "main/image.h"
-#include "main/bufferobj.h"
-#include "main/readpix.h"
-#include "main/state.h"
-#include "main/glformats.h"
-#include "program/prog_instruction.h"
-#include "drivers/common/meta.h"
-
-#include "brw_context.h"
-#include "brw_blorp.h"
-#include "brw_screen.h"
-#include "brw_batch.h"
-#include "brw_buffers.h"
-#include "brw_fbo.h"
-#include "brw_mipmap_tree.h"
-#include "brw_pixel.h"
-#include "brw_buffer_objects.h"
-
-#define FILE_DEBUG_FLAG DEBUG_PIXEL
-
-/**
- * \brief A fast path for glReadPixels
- *
- * This fast path is taken when the source format is BGRA, RGBA,
- * A or L and when the texture memory is X- or Y-tiled. It downloads
- * the source data by directly mapping the memory without a GTT fence.
- * This then needs to be de-tiled on the CPU before presenting the data to
- * the user in the linear fasion.
- *
- * This is a performance win over the conventional texture download path.
- * In the conventional texture download path, the texture is either mapped
- * through the GTT or copied to a linear buffer with the blitter before
- * handing off to a software path. This allows us to avoid round-tripping
- * through the GPU (in the case where we would be blitting) and do only a
- * single copy operation.
- */
-static bool
-brw_readpixels_tiled_memcpy(struct gl_context *ctx,
- GLint xoffset, GLint yoffset,
- GLsizei width, GLsizei height,
- GLenum format, GLenum type,
- GLvoid * pixels,
- const struct gl_pixelstore_attrib *pack)
-{
- struct brw_context *brw = brw_context(ctx);
- struct gl_renderbuffer *rb = ctx->ReadBuffer->_ColorReadBuffer;
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- /* This path supports reading from color buffers only */
- if (rb == NULL)
- return false;
-
- struct brw_renderbuffer *irb = brw_renderbuffer(rb);
- int dst_pitch;
-
- /* The miptree's buffer. */
- struct brw_bo *bo;
-
- uint32_t cpp;
- isl_memcpy_type copy_type;
-
- /* This fastpath is restricted to specific renderbuffer types:
- * a 2D BGRA, RGBA, L8 or A8 texture. It could be generalized to support
- * more types.
- */
- if (!devinfo->has_llc ||
- !(type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) ||
- pixels == NULL ||
- pack->BufferObj ||
- pack->Alignment > 4 ||
- pack->SkipPixels > 0 ||
- pack->SkipRows > 0 ||
- (pack->RowLength != 0 && pack->RowLength != width) ||
- pack->SwapBytes ||
- pack->LsbFirst ||
- pack->Invert)
- return false;
-
- /* Only a simple blit, no scale, bias or other mapping. */
- if (ctx->_ImageTransferState)
- return false;
-
- /* It is possible that the renderbuffer (or underlying texture) is
- * multisampled. Since ReadPixels from a multisampled buffer requires a
- * multisample resolve, we can't handle this here
- */
- if (rb->NumSamples > 1)
- return false;
-
- /* We can't handle copying from RGBX or BGRX because the tiled_memcpy
- * function doesn't set the last channel to 1. Note this checks BaseFormat
- * rather than TexFormat in case the RGBX format is being simulated with an
- * RGBA format.
- */
- if (rb->_BaseFormat == GL_RGB)
- return false;
-
- copy_type = brw_miptree_get_memcpy_type(rb->Format, format, type, &cpp);
- if (copy_type == ISL_MEMCPY_INVALID)
- return false;
-
- if (!irb->mt ||
- (irb->mt->surf.tiling != ISL_TILING_X &&
- irb->mt->surf.tiling != ISL_TILING_Y0)) {
- /* The algorithm is written only for X- or Y-tiled memory. */
- return false;
- }
-
- /* tiled_to_linear() assumes that if the object is swizzled, it is using
- * I915_BIT6_SWIZZLE_9_10 for X and I915_BIT6_SWIZZLE_9 for Y. This is only
- * true on gfx5 and above.
- *
- * The killer on top is that some gfx4 have an L-shaped swizzle mode, where
- * parts of the memory aren't swizzled at all. Userspace just can't handle
- * that.
- */
- if (devinfo->ver < 5 && devinfo->has_bit6_swizzle)
- return false;
-
- /* Since we are going to read raw data to the miptree, we need to resolve
- * any pending fast color clears before we start.
- */
- brw_miptree_access_raw(brw, irb->mt, irb->mt_level, irb->mt_layer, false);
-
- bo = irb->mt->bo;
-
- if (brw_batch_references(&brw->batch, bo)) {
- perf_debug("Flushing before mapping a referenced bo.\n");
- brw_batch_flush(brw);
- }
-
- void *map = brw_bo_map(brw, bo, MAP_READ | MAP_RAW);
- if (map == NULL) {
- DBG("%s: failed to map bo\n", __func__);
- return false;
- }
-
- unsigned slice_offset_x, slice_offset_y;
- brw_miptree_get_image_offset(irb->mt, irb->mt_level, irb->mt_layer,
- &slice_offset_x, &slice_offset_y);
- xoffset += slice_offset_x;
- yoffset += slice_offset_y;
-
- dst_pitch = _mesa_image_row_stride(pack, width, format, type);
-
- /* For a window-system renderbuffer, the buffer is actually flipped
- * vertically, so we need to handle that. Since the detiling function
- * can only really work in the forwards direction, we have to be a
- * little creative. First, we compute the Y-offset of the first row of
- * the renderbuffer (in renderbuffer coordinates). We then match that
- * with the last row of the client's data. Finally, we give
- * tiled_to_linear a negative pitch so that it walks through the
- * client's data backwards as it walks through the renderbufer forwards.
- */
- if (ctx->ReadBuffer->FlipY) {
- yoffset = rb->Height - yoffset - height;
- pixels += (ptrdiff_t) (height - 1) * dst_pitch;
- dst_pitch = -dst_pitch;
- }
-
- /* We postponed printing this message until having committed to executing
- * the function.
- */
- DBG("%s: x,y=(%d,%d) (w,h)=(%d,%d) format=0x%x type=0x%x "
- "mesa_format=0x%x tiling=%d "
- "pack=(alignment=%d row_length=%d skip_pixels=%d skip_rows=%d)\n",
- __func__, xoffset, yoffset, width, height,
- format, type, rb->Format, irb->mt->surf.tiling,
- pack->Alignment, pack->RowLength, pack->SkipPixels,
- pack->SkipRows);
-
- isl_memcpy_tiled_to_linear(
- xoffset * cpp, (xoffset + width) * cpp,
- yoffset, yoffset + height,
- pixels,
- map + irb->mt->offset,
- dst_pitch, irb->mt->surf.row_pitch_B,
- devinfo->has_bit6_swizzle,
- irb->mt->surf.tiling,
- copy_type
- );
-
- brw_bo_unmap(bo);
- return true;
-}
-
-static bool
-brw_readpixels_blorp(struct gl_context *ctx,
- unsigned x, unsigned y,
- unsigned w, unsigned h,
- GLenum format, GLenum type, const void *pixels,
- const struct gl_pixelstore_attrib *packing)
-{
- struct brw_context *brw = brw_context(ctx);
- struct gl_renderbuffer *rb = ctx->ReadBuffer->_ColorReadBuffer;
- if (!rb)
- return false;
-
- struct brw_renderbuffer *irb = brw_renderbuffer(rb);
-
- /* _mesa_get_readpixels_transfer_ops() includes the cases of read
- * color clamping along with the ctx->_ImageTransferState.
- */
- if (_mesa_get_readpixels_transfer_ops(ctx, rb->Format, format,
- type, GL_FALSE))
- return false;
-
- GLenum dst_base_format = _mesa_unpack_format_to_base_format(format);
- if (_mesa_need_rgb_to_luminance_conversion(rb->_BaseFormat,
- dst_base_format))
- return false;
-
- unsigned swizzle;
- if (irb->Base.Base._BaseFormat == GL_RGB) {
- swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE);
- } else {
- swizzle = SWIZZLE_XYZW;
- }
-
- return brw_blorp_download_miptree(brw, irb->mt, rb->Format, swizzle,
- irb->mt_level, x, y, irb->mt_layer,
- w, h, 1, GL_TEXTURE_2D, format, type,
- ctx->ReadBuffer->FlipY, pixels, packing);
-}
-
-void
-brw_readpixels(struct gl_context *ctx,
- GLint x, GLint y, GLsizei width, GLsizei height,
- GLenum format, GLenum type,
- const struct gl_pixelstore_attrib *pack, GLvoid *pixels)
-{
- bool ok;
-
- struct brw_context *brw = brw_context(ctx);
- bool dirty;
-
- DBG("%s\n", __func__);
-
- /* Reading pixels wont dirty the front buffer, so reset the dirty
- * flag after calling brw_prepare_render().
- */
- dirty = brw->front_buffer_dirty;
- brw_prepare_render(brw);
- brw->front_buffer_dirty = dirty;
-
- if (pack->BufferObj) {
- if (brw_readpixels_blorp(ctx, x, y, width, height,
- format, type, pixels, pack))
- return;
-
- perf_debug("%s: fallback to CPU mapping in PBO case\n", __func__);
- }
-
- ok = brw_readpixels_tiled_memcpy(ctx, x, y, width, height,
- format, type, pixels, pack);
- if(ok)
- return;
-
- /* Update Mesa state before calling _mesa_readpixels().
- * XXX this may not be needed since ReadPixels no longer uses the
- * span code.
- */
-
- if (ctx->NewState)
- _mesa_update_state(ctx);
-
- _mesa_readpixels(ctx, x, y, width, height, format, type, pack, pixels);
-
- /* There's an brw_prepare_render() call in intelSpanRenderStart(). */
- brw->front_buffer_dirty = dirty;
-}
+++ /dev/null
-/*
- * Copyright © 2012 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- * Jordan Justen <jordan.l.justen@intel.com>
- *
- */
-
-#include "main/bufferobj.h"
-#include "main/varray.h"
-#include "vbo/vbo.h"
-
-#include "brw_context.h"
-#include "brw_defines.h"
-#include "brw_draw.h"
-
-#include "brw_batch.h"
-
-
-#define UPDATE_MIN2(a, b) (a) = MIN2((a), (b))
-#define UPDATE_MAX2(a, b) (a) = MAX2((a), (b))
-
-/*
- * Notes on primitive restart:
- * The code below is used when the driver does not fully support primitive
- * restart (for example, if it only does restart index of ~0).
- *
- * We map the index buffer, find the restart indexes, unmap
- * the index buffer then draw the sub-primitives delineated by the restarts.
- *
- * A couple possible optimizations:
- * 1. Save the list of sub-primitive (start, count) values in a list attached
- * to the index buffer for re-use in subsequent draws. The list would be
- * invalidated when the contents of the buffer changed.
- * 2. If drawing triangle strips or quad strips, create a new index buffer
- * that uses duplicated vertices to render the disjoint strips as one
- * long strip. We'd have to be careful to avoid using too much memory
- * for this.
- *
- * Finally, some apps might perform better if they don't use primitive restart
- * at all rather than this fallback path. Set MESA_EXTENSION_OVERRIDE to
- * "-GL_NV_primitive_restart" to test that.
- */
-
-
-struct sub_primitive
-{
- GLuint start;
- GLuint count;
- GLuint min_index;
- GLuint max_index;
-};
-
-
-/**
- * Scan the elements array to find restart indexes. Return an array
- * of struct sub_primitive to indicate how to draw the sub-primitives
- * are delineated by the restart index.
- */
-static struct sub_primitive *
-find_sub_primitives(const void *elements, unsigned element_size,
- unsigned start, unsigned end, unsigned restart_index,
- unsigned *num_sub_prims)
-{
- const unsigned max_prims = end - start;
- struct sub_primitive *sub_prims;
- unsigned i, cur_start, cur_count;
- GLuint scan_index;
- unsigned scan_num;
-
- sub_prims =
- malloc(max_prims * sizeof(struct sub_primitive));
-
- if (!sub_prims) {
- *num_sub_prims = 0;
- return NULL;
- }
-
- cur_start = start;
- cur_count = 0;
- scan_num = 0;
-
-#define IB_INDEX_READ(TYPE, INDEX) (((const GL##TYPE *) elements)[INDEX])
-
-#define SCAN_ELEMENTS(TYPE) \
- sub_prims[scan_num].min_index = (GL##TYPE) 0xffffffff; \
- sub_prims[scan_num].max_index = 0; \
- for (i = start; i < end; i++) { \
- scan_index = IB_INDEX_READ(TYPE, i); \
- if (scan_index == restart_index) { \
- if (cur_count > 0) { \
- assert(scan_num < max_prims); \
- sub_prims[scan_num].start = cur_start; \
- sub_prims[scan_num].count = cur_count; \
- scan_num++; \
- sub_prims[scan_num].min_index = (GL##TYPE) 0xffffffff; \
- sub_prims[scan_num].max_index = 0; \
- } \
- cur_start = i + 1; \
- cur_count = 0; \
- } \
- else { \
- UPDATE_MIN2(sub_prims[scan_num].min_index, scan_index); \
- UPDATE_MAX2(sub_prims[scan_num].max_index, scan_index); \
- cur_count++; \
- } \
- } \
- if (cur_count > 0) { \
- assert(scan_num < max_prims); \
- sub_prims[scan_num].start = cur_start; \
- sub_prims[scan_num].count = cur_count; \
- scan_num++; \
- }
-
- switch (element_size) {
- case 1:
- SCAN_ELEMENTS(ubyte);
- break;
- case 2:
- SCAN_ELEMENTS(ushort);
- break;
- case 4:
- SCAN_ELEMENTS(uint);
- break;
- default:
- assert(0 && "bad index_size in find_sub_primitives()");
- }
-
-#undef SCAN_ELEMENTS
-
- *num_sub_prims = scan_num;
-
- return sub_prims;
-}
-
-
-/**
- * Handle primitive restart in software.
- *
- * This function breaks up calls into the driver so primitive restart
- * support is not required in the driver.
- */
-static void
-vbo_sw_primitive_restart_common_start(struct gl_context *ctx,
- const struct _mesa_prim *prims,
- GLuint nr_prims,
- const struct _mesa_index_buffer *ib,
- GLuint num_instances,
- GLuint base_instance,
- struct gl_buffer_object *indirect,
- GLsizeiptr indirect_offset,
- bool primitive_restart,
- unsigned restart_index)
-{
- GLuint prim_num;
- struct _mesa_prim new_prim;
- struct _mesa_index_buffer new_ib;
- struct sub_primitive *sub_prims;
- struct sub_primitive *sub_prim;
- GLuint num_sub_prims;
- GLuint sub_prim_num;
- GLuint end_index;
- GLuint sub_end_index;
- struct _mesa_prim temp_prim;
- GLboolean map_ib = ib->obj && !ib->obj->Mappings[MAP_INTERNAL].Pointer;
- const void *ptr;
-
- /* If there is an indirect buffer, map it and extract the draw params */
- if (indirect) {
- const uint32_t *indirect_params;
- if (!ctx->Driver.MapBufferRange(ctx, 0, indirect->Size, GL_MAP_READ_BIT,
- indirect, MAP_INTERNAL)) {
-
- /* something went wrong with mapping, give up */
- _mesa_error(ctx, GL_OUT_OF_MEMORY,
- "failed to map indirect buffer for sw primitive restart");
- return;
- }
-
- assert(nr_prims == 1);
- new_prim = prims[0];
- indirect_params = (const uint32_t *)
- ADD_POINTERS(indirect->Mappings[MAP_INTERNAL].Pointer,
- indirect_offset);
-
- new_prim.count = indirect_params[0];
- new_prim.start = indirect_params[2];
- new_prim.basevertex = indirect_params[3];
-
- num_instances = indirect_params[1];
- base_instance = indirect_params[4];
-
- new_ib = *ib;
- new_ib.count = new_prim.count;
-
- prims = &new_prim;
- ib = &new_ib;
-
- ctx->Driver.UnmapBuffer(ctx, indirect, MAP_INTERNAL);
- }
-
- /* Find the sub-primitives. These are regions in the index buffer which
- * are split based on the primitive restart index value.
- */
- if (map_ib) {
- ctx->Driver.MapBufferRange(ctx, 0, ib->obj->Size, GL_MAP_READ_BIT,
- ib->obj, MAP_INTERNAL);
- }
-
- if (ib->obj)
- ptr = ADD_POINTERS(ib->obj->Mappings[MAP_INTERNAL].Pointer, ib->ptr);
- else
- ptr = ib->ptr;
-
- sub_prims = find_sub_primitives(ptr, 1 << ib->index_size_shift,
- prims[0].start, prims[0].start + ib->count,
- restart_index, &num_sub_prims);
-
- if (map_ib) {
- ctx->Driver.UnmapBuffer(ctx, ib->obj, MAP_INTERNAL);
- }
-
- /* Loop over the primitives, and use the located sub-primitives to draw
- * each primitive with a break to implement each primitive restart.
- */
- for (prim_num = 0; prim_num < nr_prims; prim_num++) {
- end_index = prims[prim_num].start + prims[prim_num].count;
- memcpy(&temp_prim, &prims[prim_num], sizeof (temp_prim));
- /* Loop over the sub-primitives drawing sub-ranges of the primitive. */
- for (sub_prim_num = 0; sub_prim_num < num_sub_prims; sub_prim_num++) {
- sub_prim = &sub_prims[sub_prim_num];
- sub_end_index = sub_prim->start + sub_prim->count;
- if (prims[prim_num].start <= sub_prim->start) {
- temp_prim.start = MAX2(prims[prim_num].start, sub_prim->start);
- temp_prim.count = MIN2(sub_end_index, end_index) - temp_prim.start;
- if ((temp_prim.start == sub_prim->start) &&
- (temp_prim.count == sub_prim->count)) {
- ctx->Driver.Draw(ctx, &temp_prim, 1, ib, true, false, 0,
- sub_prim->min_index, sub_prim->max_index,
- num_instances, base_instance);
- } else {
- ctx->Driver.Draw(ctx, &temp_prim, 1, ib,
- false, false, 0, -1, -1,
- num_instances, base_instance);
- }
- }
- if (sub_end_index >= end_index) {
- break;
- }
- }
- }
-
- free(sub_prims);
-}
-
-static void
-vbo_sw_primitive_restart(struct gl_context *ctx,
- const struct _mesa_prim *prims,
- GLuint nr_prims,
- const struct _mesa_index_buffer *ib,
- GLuint num_instances,
- GLuint base_instance,
- struct gl_buffer_object *indirect,
- GLsizeiptr indirect_offset,
- bool primitive_restart,
- unsigned restart_index)
-{
- unsigned i;
- for (i = 1; i < nr_prims; i++) {
- if (prims[i].start != prims[0].start)
- break;
- }
-
- vbo_sw_primitive_restart_common_start(ctx, &prims[0], i, ib,
- num_instances, base_instance,
- indirect, indirect_offset,
- primitive_restart,
- restart_index);
- if (i != nr_prims) {
- vbo_sw_primitive_restart(ctx, &prims[i], nr_prims - i, ib,
- num_instances, base_instance,
- indirect, indirect_offset,
- primitive_restart,
- restart_index);
- }
-}
-
-/**
- * Check if the hardware's cut index support can handle the primitive
- * restart index value (pre-Haswell only).
- */
-static bool
-can_cut_index_handle_restart_index(struct gl_context *ctx,
- const struct _mesa_index_buffer *ib,
- unsigned restart_index)
-{
- /* The FixedIndex variant means 0xFF, 0xFFFF, or 0xFFFFFFFF based on
- * the index buffer type, which corresponds exactly to the hardware.
- */
- if (ctx->Array.PrimitiveRestartFixedIndex)
- return true;
-
- bool cut_index_will_work;
-
- switch (ib->index_size_shift) {
- case 0:
- cut_index_will_work = restart_index == 0xff;
- break;
- case 1:
- cut_index_will_work = restart_index == 0xffff;
- break;
- case 2:
- cut_index_will_work = restart_index == 0xffffffff;
- break;
- default:
- unreachable("not reached");
- }
-
- return cut_index_will_work;
-}
-
-/**
- * Check if the hardware's cut index support can handle the primitive
- * restart case.
- */
-static bool
-can_cut_index_handle_prims(struct gl_context *ctx,
- const struct _mesa_prim *prim,
- GLuint nr_prims,
- const struct _mesa_index_buffer *ib,
- unsigned restart_index)
-{
- struct brw_context *brw = brw_context(ctx);
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- /* Otherwise Haswell can do it all. */
- if (devinfo->verx10 >= 75)
- return true;
-
- if (!can_cut_index_handle_restart_index(ctx, ib, restart_index)) {
- /* The primitive restart index can't be handled, so take
- * the software path
- */
- return false;
- }
-
- for (unsigned i = 0; i < nr_prims; i++) {
- switch (prim[i].mode) {
- case GL_POINTS:
- case GL_LINES:
- case GL_LINE_STRIP:
- case GL_TRIANGLES:
- case GL_TRIANGLE_STRIP:
- case GL_LINES_ADJACENCY:
- case GL_LINE_STRIP_ADJACENCY:
- case GL_TRIANGLES_ADJACENCY:
- case GL_TRIANGLE_STRIP_ADJACENCY:
- /* Cut index supports these primitive types */
- break;
- default:
- /* Cut index does not support these primitive types */
- //case GL_LINE_LOOP:
- //case GL_TRIANGLE_FAN:
- //case GL_QUADS:
- //case GL_QUAD_STRIP:
- //case GL_POLYGON:
- return false;
- }
- }
-
- return true;
-}
-
-/**
- * Check if primitive restart is enabled, and if so, handle it properly.
- *
- * In some cases the support will be handled in software. When available
- * hardware will handle primitive restart.
- */
-GLboolean
-brw_handle_primitive_restart(struct gl_context *ctx,
- const struct _mesa_prim *prims,
- GLuint nr_prims,
- const struct _mesa_index_buffer *ib,
- GLuint num_instances, GLuint base_instance,
- bool primitive_restart,
- unsigned restart_index)
-{
- struct brw_context *brw = brw_context(ctx);
-
- /* We only need to handle cases where there is an index buffer. */
- if (ib == NULL) {
- return GL_FALSE;
- }
-
- /* If we have set the in_progress flag, then we are in the middle
- * of handling the primitive restart draw.
- */
- if (brw->prim_restart.in_progress) {
- return GL_FALSE;
- }
-
- /* If PrimitiveRestart is not enabled, then we aren't concerned about
- * handling this draw.
- */
- if (!primitive_restart) {
- return GL_FALSE;
- }
-
- /* Signal that we are in the process of handling the
- * primitive restart draw
- */
- brw->prim_restart.in_progress = true;
-
- if (can_cut_index_handle_prims(ctx, prims, nr_prims, ib, restart_index)) {
- /* Cut index should work for primitive restart, so use it
- */
- brw->prim_restart.enable_cut_index = true;
- brw->prim_restart.restart_index = restart_index;
- brw_draw_prims(ctx, prims, nr_prims, ib, false, primitive_restart,
- restart_index, -1, -1,
- num_instances, base_instance);
- brw->prim_restart.enable_cut_index = false;
- } else {
- /* Not all the primitive draw modes are supported by the cut index,
- * so take the software path
- */
- struct gl_buffer_object *indirect_data = brw->draw.draw_indirect_data;
-
- /* Clear this to make the draw direct. */
- brw->draw.draw_indirect_data = NULL;
-
- vbo_sw_primitive_restart(ctx, prims, nr_prims, ib, num_instances,
- base_instance, indirect_data,
- brw->draw.draw_indirect_offset,
- primitive_restart, restart_index);
- }
-
- brw->prim_restart.in_progress = false;
-
- /* The primitive restart draw was completed, so return true. */
- return GL_TRUE;
-}
+++ /dev/null
-/*
- Copyright (C) Intel Corp. 2006. All Rights Reserved.
- Intel funded Tungsten Graphics to
- develop this 3D driver.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
- **********************************************************************/
- /*
- * Authors:
- * Keith Whitwell <keithw@vmware.com>
- */
-
-#include <pthread.h>
-#include "main/glspirv.h"
-#include "program/prog_parameter.h"
-#include "program/prog_print.h"
-#include "program/prog_to_nir.h"
-#include "program/program.h"
-#include "program/programopt.h"
-#include "tnl/tnl.h"
-#include "util/ralloc.h"
-#include "compiler/glsl/ir.h"
-#include "compiler/glsl/program.h"
-#include "compiler/glsl/gl_nir.h"
-#include "compiler/glsl/glsl_to_nir.h"
-
-#include "brw_program.h"
-#include "brw_context.h"
-#include "compiler/brw_nir.h"
-#include "brw_defines.h"
-#include "brw_batch.h"
-
-#include "brw_cs.h"
-#include "brw_gs.h"
-#include "brw_vs.h"
-#include "brw_wm.h"
-#include "brw_state.h"
-
-#include "main/shaderapi.h"
-#include "main/shaderobj.h"
-
-static bool
-brw_nir_lower_uniforms(nir_shader *nir, bool is_scalar)
-{
- if (is_scalar) {
- nir_assign_var_locations(nir, nir_var_uniform, &nir->num_uniforms,
- type_size_scalar_bytes);
- return nir_lower_io(nir, nir_var_uniform, type_size_scalar_bytes, 0);
- } else {
- nir_assign_var_locations(nir, nir_var_uniform, &nir->num_uniforms,
- type_size_vec4_bytes);
- return nir_lower_io(nir, nir_var_uniform, type_size_vec4_bytes, 0);
- }
-}
-
-static struct gl_program *brw_new_program(struct gl_context *ctx,
- gl_shader_stage stage,
- GLuint id, bool is_arb_asm);
-
-nir_shader *
-brw_create_nir(struct brw_context *brw,
- const struct gl_shader_program *shader_prog,
- struct gl_program *prog,
- gl_shader_stage stage,
- bool is_scalar)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- struct gl_context *ctx = &brw->ctx;
- const nir_shader_compiler_options *options =
- ctx->Const.ShaderCompilerOptions[stage].NirOptions;
- nir_shader *nir;
-
- /* First, lower the GLSL/Mesa IR or SPIR-V to NIR */
- if (shader_prog) {
- if (shader_prog->data->spirv) {
- nir = _mesa_spirv_to_nir(ctx, shader_prog, stage, options);
- } else {
- nir = glsl_to_nir(ctx, shader_prog, stage, options);
-
- /* Remap the locations to slots so those requiring two slots will
- * occupy two locations. For instance, if we have in the IR code a
- * dvec3 attr0 in location 0 and vec4 attr1 in location 1, in NIR attr0
- * will use locations/slots 0 and 1, and attr1 will use location/slot 2
- */
- if (nir->info.stage == MESA_SHADER_VERTEX)
- nir_remap_dual_slot_attributes(nir, &prog->DualSlotInputs);
- }
- assert (nir);
-
- nir_remove_dead_variables(nir, nir_var_shader_in | nir_var_shader_out,
- NULL);
- nir_validate_shader(nir, "after glsl_to_nir or spirv_to_nir");
- NIR_PASS_V(nir, nir_lower_io_to_temporaries,
- nir_shader_get_entrypoint(nir), true, false);
- } else {
- nir = prog_to_nir(prog, options);
- NIR_PASS_V(nir, nir_lower_regs_to_ssa); /* turn registers into SSA */
- }
- nir_validate_shader(nir, "before brw_preprocess_nir");
-
- nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
-
- if (!ctx->SoftFP64 && ((nir->info.bit_sizes_int | nir->info.bit_sizes_float) & 64) &&
- (options->lower_doubles_options & nir_lower_fp64_full_software)) {
- ctx->SoftFP64 = glsl_float64_funcs_to_nir(ctx, options);
- }
-
- brw_preprocess_nir(brw->screen->compiler, nir, ctx->SoftFP64);
-
- if (stage == MESA_SHADER_TESS_CTRL) {
- /* Lower gl_PatchVerticesIn from a sys. value to a uniform on Gfx8+. */
- static const gl_state_index16 tokens[STATE_LENGTH] =
- { STATE_TCS_PATCH_VERTICES_IN };
- nir_lower_patch_vertices(nir, 0, devinfo->ver >= 8 ? tokens : NULL);
- }
-
- if (stage == MESA_SHADER_TESS_EVAL) {
- /* Lower gl_PatchVerticesIn to a constant if we have a TCS, or
- * a uniform if we don't.
- */
- struct gl_linked_shader *tcs =
- shader_prog->_LinkedShaders[MESA_SHADER_TESS_CTRL];
- uint32_t static_patch_vertices =
- tcs ? tcs->Program->nir->info.tess.tcs_vertices_out : 0;
- static const gl_state_index16 tokens[STATE_LENGTH] =
- { STATE_TES_PATCH_VERTICES_IN };
- nir_lower_patch_vertices(nir, static_patch_vertices, tokens);
- }
-
- if (stage == MESA_SHADER_FRAGMENT) {
- static const struct nir_lower_wpos_ytransform_options wpos_options = {
- .state_tokens = {STATE_FB_WPOS_Y_TRANSFORM, 0, 0},
- .fs_coord_pixel_center_integer = 1,
- .fs_coord_origin_upper_left = 1,
- };
-
- bool progress = false;
- NIR_PASS(progress, nir, nir_lower_wpos_ytransform, &wpos_options);
- if (progress) {
- _mesa_add_state_reference(prog->Parameters,
- wpos_options.state_tokens);
- }
- }
-
- return nir;
-}
-
-static void
-shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
-{
- assert(glsl_type_is_vector_or_scalar(type));
-
- uint32_t comp_size = glsl_type_is_boolean(type)
- ? 4 : glsl_get_bit_size(type) / 8;
- unsigned length = glsl_get_vector_elements(type);
- *size = comp_size * length,
- *align = comp_size * (length == 3 ? 4 : length);
-}
-
-void
-brw_nir_lower_resources(nir_shader *nir, struct gl_shader_program *shader_prog,
- struct gl_program *prog,
- const struct intel_device_info *devinfo)
-{
- NIR_PASS_V(nir, brw_nir_lower_uniforms, nir->options->lower_to_scalar);
- NIR_PASS_V(prog->nir, gl_nir_lower_samplers, shader_prog);
- BITSET_COPY(prog->info.textures_used, prog->nir->info.textures_used);
- BITSET_COPY(prog->info.textures_used_by_txf, prog->nir->info.textures_used_by_txf);
-
- NIR_PASS_V(prog->nir, brw_nir_lower_storage_image, devinfo);
-
- if (prog->nir->info.stage == MESA_SHADER_COMPUTE &&
- shader_prog->data->spirv) {
- NIR_PASS_V(prog->nir, nir_lower_vars_to_explicit_types,
- nir_var_mem_shared, shared_type_info);
- NIR_PASS_V(prog->nir, nir_lower_explicit_io,
- nir_var_mem_shared, nir_address_format_32bit_offset);
- }
-
- NIR_PASS_V(prog->nir, gl_nir_lower_buffers, shader_prog);
- /* Do a round of constant folding to clean up address calculations */
- NIR_PASS_V(prog->nir, nir_opt_constant_folding);
-}
-
-void
-brw_shader_gather_info(nir_shader *nir, struct gl_program *prog)
-{
- nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
-
- /* Copy the info we just generated back into the gl_program */
- const char *prog_name = prog->info.name;
- const char *prog_label = prog->info.label;
- prog->info = nir->info;
- prog->info.name = prog_name;
- prog->info.label = prog_label;
-}
-
-static unsigned
-get_new_program_id(struct brw_screen *screen)
-{
- return p_atomic_inc_return(&screen->program_id);
-}
-
-static struct gl_program *
-brw_new_program(struct gl_context *ctx,
- gl_shader_stage stage,
- GLuint id, bool is_arb_asm)
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_program *prog = rzalloc(NULL, struct brw_program);
-
- if (prog) {
- prog->id = get_new_program_id(brw->screen);
-
- return _mesa_init_gl_program(&prog->program, stage, id, is_arb_asm);
- }
-
- return NULL;
-}
-
-static void
-brw_delete_program(struct gl_context *ctx, struct gl_program *prog)
-{
- struct brw_context *brw = brw_context(ctx);
-
- /* Beware! prog's refcount has reached zero, and it's about to be freed.
- *
- * In brw_upload_pipeline_state(), we compare brw->programs[i] to
- * ctx->FooProgram._Current, and flag BRW_NEW_FOO_PROGRAM if the
- * pointer has changed.
- *
- * We cannot leave brw->programs[i] as a dangling pointer to the dead
- * program. malloc() may allocate the same memory for a new gl_program,
- * causing us to see matching pointers...but totally different programs.
- *
- * We cannot set brw->programs[i] to NULL, either. If we've deleted the
- * active program, Mesa may set ctx->FooProgram._Current to NULL. That
- * would cause us to see matching pointers (NULL == NULL), and fail to
- * detect that a program has changed since our last draw.
- *
- * So, set it to a bogus gl_program pointer that will never match,
- * causing us to properly reevaluate the state on our next draw.
- *
- * Getting this wrong causes heisenbugs which are very hard to catch,
- * as you need a very specific allocation pattern to hit the problem.
- */
- static const struct gl_program deleted_program;
-
- for (int i = 0; i < MESA_SHADER_STAGES; i++) {
- if (brw->programs[i] == prog)
- brw->programs[i] = (struct gl_program *) &deleted_program;
- }
-
- _mesa_delete_program( ctx, prog );
-}
-
-
-static GLboolean
-brw_program_string_notify(struct gl_context *ctx,
- GLenum target,
- struct gl_program *prog)
-{
- assert(target == GL_VERTEX_PROGRAM_ARB || !prog->arb.IsPositionInvariant);
-
- struct brw_context *brw = brw_context(ctx);
- const struct brw_compiler *compiler = brw->screen->compiler;
-
- switch (target) {
- case GL_FRAGMENT_PROGRAM_ARB: {
- struct brw_program *newFP = brw_program(prog);
- const struct brw_program *curFP =
- brw_program_const(brw->programs[MESA_SHADER_FRAGMENT]);
-
- if (newFP == curFP)
- brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
- _mesa_program_fragment_position_to_sysval(&newFP->program);
- newFP->id = get_new_program_id(brw->screen);
-
- prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_FRAGMENT, true);
-
- brw_nir_lower_resources(prog->nir, NULL, prog, &brw->screen->devinfo);
-
- brw_shader_gather_info(prog->nir, prog);
-
- brw_fs_precompile(ctx, prog);
- break;
- }
- case GL_VERTEX_PROGRAM_ARB: {
- struct brw_program *newVP = brw_program(prog);
- const struct brw_program *curVP =
- brw_program_const(brw->programs[MESA_SHADER_VERTEX]);
-
- if (newVP == curVP)
- brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM;
- if (newVP->program.arb.IsPositionInvariant) {
- _mesa_insert_mvp_code(ctx, &newVP->program);
- }
- newVP->id = get_new_program_id(brw->screen);
-
- /* Also tell tnl about it:
- */
- _tnl_program_string(ctx, target, prog);
-
- prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_VERTEX,
- compiler->scalar_stage[MESA_SHADER_VERTEX]);
-
- brw_nir_lower_resources(prog->nir, NULL, prog, &brw->screen->devinfo);
-
- brw_shader_gather_info(prog->nir, prog);
-
- brw_vs_precompile(ctx, prog);
- break;
- }
- default:
- /*
- * driver->ProgramStringNotify is only called for ARB programs, fixed
- * function vertex programs, and ir_to_mesa (which isn't used by the
- * i965 back-end). Therefore, even after geometry shaders are added,
- * this function should only ever be called with a target of
- * GL_VERTEX_PROGRAM_ARB or GL_FRAGMENT_PROGRAM_ARB.
- */
- unreachable("Unexpected target in brwProgramStringNotify");
- }
-
- return true;
-}
-
-static void
-brw_memory_barrier(struct gl_context *ctx, GLbitfield barriers)
-{
- struct brw_context *brw = brw_context(ctx);
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- unsigned bits = PIPE_CONTROL_DATA_CACHE_FLUSH | PIPE_CONTROL_CS_STALL;
- assert(devinfo->ver >= 7 && devinfo->ver <= 11);
-
- if (barriers & (GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT |
- GL_ELEMENT_ARRAY_BARRIER_BIT |
- GL_COMMAND_BARRIER_BIT))
- bits |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
-
- if (barriers & GL_UNIFORM_BARRIER_BIT)
- bits |= (PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
- PIPE_CONTROL_CONST_CACHE_INVALIDATE);
-
- if (barriers & GL_TEXTURE_FETCH_BARRIER_BIT)
- bits |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
-
- if (barriers & (GL_TEXTURE_UPDATE_BARRIER_BIT |
- GL_PIXEL_BUFFER_BARRIER_BIT))
- bits |= (PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
- PIPE_CONTROL_RENDER_TARGET_FLUSH);
-
- if (barriers & GL_FRAMEBUFFER_BARRIER_BIT)
- bits |= (PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
- PIPE_CONTROL_RENDER_TARGET_FLUSH);
-
- /* Typed surface messages are handled by the render cache on IVB, so we
- * need to flush it too.
- */
- if (devinfo->verx10 == 70)
- bits |= PIPE_CONTROL_RENDER_TARGET_FLUSH;
-
- brw_emit_pipe_control_flush(brw, bits);
-}
-
-static void
-brw_framebuffer_fetch_barrier(struct gl_context *ctx)
-{
- struct brw_context *brw = brw_context(ctx);
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- if (!ctx->Extensions.EXT_shader_framebuffer_fetch) {
- if (devinfo->ver >= 6) {
- brw_emit_pipe_control_flush(brw,
- PIPE_CONTROL_RENDER_TARGET_FLUSH |
- PIPE_CONTROL_CS_STALL);
- brw_emit_pipe_control_flush(brw,
- PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
- } else {
- brw_emit_pipe_control_flush(brw,
- PIPE_CONTROL_RENDER_TARGET_FLUSH);
- }
- }
-}
-
-void
-brw_get_scratch_bo(struct brw_context *brw,
- struct brw_bo **scratch_bo, int size)
-{
- struct brw_bo *old_bo = *scratch_bo;
-
- if (old_bo && old_bo->size < size) {
- brw_bo_unreference(old_bo);
- old_bo = NULL;
- }
-
- if (!old_bo) {
- *scratch_bo =
- brw_bo_alloc(brw->bufmgr, "scratch bo", size, BRW_MEMZONE_SCRATCH);
- }
-}
-
-/**
- * Reserve enough scratch space for the given stage to hold \p per_thread_size
- * bytes times the given \p thread_count.
- */
-void
-brw_alloc_stage_scratch(struct brw_context *brw,
- struct brw_stage_state *stage_state,
- unsigned per_thread_size)
-{
- if (stage_state->per_thread_scratch >= per_thread_size)
- return;
-
- stage_state->per_thread_scratch = per_thread_size;
-
- if (stage_state->scratch_bo)
- brw_bo_unreference(stage_state->scratch_bo);
-
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- assert(stage_state->stage < ARRAY_SIZE(devinfo->max_scratch_ids));
- unsigned max_ids = devinfo->max_scratch_ids[stage_state->stage];
- stage_state->scratch_bo =
- brw_bo_alloc(brw->bufmgr, "shader scratch space",
- per_thread_size * max_ids, BRW_MEMZONE_SCRATCH);
-}
-
-void
-brw_init_frag_prog_functions(struct dd_function_table *functions)
-{
- assert(functions->ProgramStringNotify == _tnl_program_string);
-
- functions->NewProgram = brw_new_program;
- functions->DeleteProgram = brw_delete_program;
- functions->ProgramStringNotify = brw_program_string_notify;
-
- functions->LinkShader = brw_link_shader;
-
- functions->MemoryBarrier = brw_memory_barrier;
- functions->FramebufferFetchBarrier = brw_framebuffer_fetch_barrier;
-}
-
-struct shader_times {
- uint64_t time;
- uint64_t written;
- uint64_t reset;
-};
-
-void
-brw_init_shader_time(struct brw_context *brw)
-{
- const int max_entries = 2048;
- brw->shader_time.bo =
- brw_bo_alloc(brw->bufmgr, "shader time",
- max_entries * BRW_SHADER_TIME_STRIDE * 3,
- BRW_MEMZONE_OTHER);
- brw->shader_time.names = rzalloc_array(brw, const char *, max_entries);
- brw->shader_time.ids = rzalloc_array(brw, int, max_entries);
- brw->shader_time.types = rzalloc_array(brw, enum shader_time_shader_type,
- max_entries);
- brw->shader_time.cumulative = rzalloc_array(brw, struct shader_times,
- max_entries);
- brw->shader_time.max_entries = max_entries;
-}
-
-static int
-compare_time(const void *a, const void *b)
-{
- uint64_t * const *a_val = a;
- uint64_t * const *b_val = b;
-
- /* We don't just subtract because we're turning the value to an int. */
- if (**a_val < **b_val)
- return -1;
- else if (**a_val == **b_val)
- return 0;
- else
- return 1;
-}
-
-static void
-print_shader_time_line(const char *stage, const char *name,
- int shader_num, uint64_t time, uint64_t total)
-{
- fprintf(stderr, "%-6s%-18s", stage, name);
-
- if (shader_num != 0)
- fprintf(stderr, "%4d: ", shader_num);
- else
- fprintf(stderr, " : ");
-
- fprintf(stderr, "%16lld (%7.2f Gcycles) %4.1f%%\n",
- (long long)time,
- (double)time / 1000000000.0,
- (double)time / total * 100.0);
-}
-
-static void
-brw_report_shader_time(struct brw_context *brw)
-{
- if (!brw->shader_time.bo || !brw->shader_time.num_entries)
- return;
-
- uint64_t scaled[brw->shader_time.num_entries];
- uint64_t *sorted[brw->shader_time.num_entries];
- uint64_t total_by_type[ST_CS + 1];
- memset(total_by_type, 0, sizeof(total_by_type));
- double total = 0;
- for (int i = 0; i < brw->shader_time.num_entries; i++) {
- uint64_t written = 0, reset = 0;
- enum shader_time_shader_type type = brw->shader_time.types[i];
-
- sorted[i] = &scaled[i];
-
- switch (type) {
- case ST_VS:
- case ST_TCS:
- case ST_TES:
- case ST_GS:
- case ST_FS8:
- case ST_FS16:
- case ST_FS32:
- case ST_CS:
- written = brw->shader_time.cumulative[i].written;
- reset = brw->shader_time.cumulative[i].reset;
- break;
-
- default:
- /* I sometimes want to print things that aren't the 3 shader times.
- * Just print the sum in that case.
- */
- written = 1;
- reset = 0;
- break;
- }
-
- uint64_t time = brw->shader_time.cumulative[i].time;
- if (written) {
- scaled[i] = time / written * (written + reset);
- } else {
- scaled[i] = time;
- }
-
- switch (type) {
- case ST_VS:
- case ST_TCS:
- case ST_TES:
- case ST_GS:
- case ST_FS8:
- case ST_FS16:
- case ST_FS32:
- case ST_CS:
- total_by_type[type] += scaled[i];
- break;
- default:
- break;
- }
-
- total += scaled[i];
- }
-
- if (total == 0) {
- fprintf(stderr, "No shader time collected yet\n");
- return;
- }
-
- qsort(sorted, brw->shader_time.num_entries, sizeof(sorted[0]), compare_time);
-
- fprintf(stderr, "\n");
- fprintf(stderr, "type ID cycles spent %% of total\n");
- for (int s = 0; s < brw->shader_time.num_entries; s++) {
- const char *stage;
- /* Work back from the sorted pointers times to a time to print. */
- int i = sorted[s] - scaled;
-
- if (scaled[i] == 0)
- continue;
-
- int shader_num = brw->shader_time.ids[i];
- const char *shader_name = brw->shader_time.names[i];
-
- switch (brw->shader_time.types[i]) {
- case ST_VS:
- stage = "vs";
- break;
- case ST_TCS:
- stage = "tcs";
- break;
- case ST_TES:
- stage = "tes";
- break;
- case ST_GS:
- stage = "gs";
- break;
- case ST_FS8:
- stage = "fs8";
- break;
- case ST_FS16:
- stage = "fs16";
- break;
- case ST_FS32:
- stage = "fs32";
- break;
- case ST_CS:
- stage = "cs";
- break;
- default:
- stage = "other";
- break;
- }
-
- print_shader_time_line(stage, shader_name, shader_num,
- scaled[i], total);
- }
-
- fprintf(stderr, "\n");
- print_shader_time_line("total", "vs", 0, total_by_type[ST_VS], total);
- print_shader_time_line("total", "tcs", 0, total_by_type[ST_TCS], total);
- print_shader_time_line("total", "tes", 0, total_by_type[ST_TES], total);
- print_shader_time_line("total", "gs", 0, total_by_type[ST_GS], total);
- print_shader_time_line("total", "fs8", 0, total_by_type[ST_FS8], total);
- print_shader_time_line("total", "fs16", 0, total_by_type[ST_FS16], total);
- print_shader_time_line("total", "fs32", 0, total_by_type[ST_FS32], total);
- print_shader_time_line("total", "cs", 0, total_by_type[ST_CS], total);
-}
-
-static void
-brw_collect_shader_time(struct brw_context *brw)
-{
- if (!brw->shader_time.bo)
- return;
-
- /* This probably stalls on the last rendering. We could fix that by
- * delaying reading the reports, but it doesn't look like it's a big
- * overhead compared to the cost of tracking the time in the first place.
- */
- void *bo_map = brw_bo_map(brw, brw->shader_time.bo, MAP_READ | MAP_WRITE);
-
- for (int i = 0; i < brw->shader_time.num_entries; i++) {
- uint32_t *times = bo_map + i * 3 * BRW_SHADER_TIME_STRIDE;
-
- brw->shader_time.cumulative[i].time += times[BRW_SHADER_TIME_STRIDE * 0 / 4];
- brw->shader_time.cumulative[i].written += times[BRW_SHADER_TIME_STRIDE * 1 / 4];
- brw->shader_time.cumulative[i].reset += times[BRW_SHADER_TIME_STRIDE * 2 / 4];
- }
-
- /* Zero the BO out to clear it out for our next collection.
- */
- memset(bo_map, 0, brw->shader_time.bo->size);
- brw_bo_unmap(brw->shader_time.bo);
-}
-
-void
-brw_collect_and_report_shader_time(struct brw_context *brw)
-{
- brw_collect_shader_time(brw);
-
- if (brw->shader_time.report_time == 0 ||
- get_time() - brw->shader_time.report_time >= 1.0) {
- brw_report_shader_time(brw);
- brw->shader_time.report_time = get_time();
- }
-}
-
-/**
- * Chooses an index in the shader_time buffer and sets up tracking information
- * for our printouts.
- *
- * Note that this holds on to references to the underlying programs, which may
- * change their lifetimes compared to normal operation.
- */
-int
-brw_get_shader_time_index(struct brw_context *brw, struct gl_program *prog,
- enum shader_time_shader_type type, bool is_glsl_sh)
-{
- int shader_time_index = brw->shader_time.num_entries++;
- assert(shader_time_index < brw->shader_time.max_entries);
- brw->shader_time.types[shader_time_index] = type;
-
- const char *name;
- if (prog->Id == 0) {
- name = "ff";
- } else if (is_glsl_sh) {
- name = prog->info.label ?
- ralloc_strdup(brw->shader_time.names, prog->info.label) : "glsl";
- } else {
- name = "prog";
- }
-
- brw->shader_time.names[shader_time_index] = name;
- brw->shader_time.ids[shader_time_index] = prog->Id;
-
- return shader_time_index;
-}
-
-void
-brw_destroy_shader_time(struct brw_context *brw)
-{
- brw_bo_unreference(brw->shader_time.bo);
- brw->shader_time.bo = NULL;
-}
-
-void
-brw_stage_prog_data_free(const void *p)
-{
- struct brw_stage_prog_data *prog_data = (struct brw_stage_prog_data *)p;
-
- ralloc_free(prog_data->param);
- ralloc_free(prog_data->pull_param);
-}
-
-void
-brw_dump_arb_asm(const char *stage, struct gl_program *prog)
-{
- fprintf(stderr, "ARB_%s_program %d ir for native %s shader\n",
- stage, prog->Id, stage);
- _mesa_print_program(prog);
-}
-
-void
-brw_setup_tex_for_precompile(const struct intel_device_info *devinfo,
- struct brw_sampler_prog_key_data *tex,
- const struct gl_program *prog)
-{
- const bool has_shader_channel_select = devinfo->verx10 >= 75;
- unsigned sampler_count = util_last_bit(prog->SamplersUsed);
- for (unsigned i = 0; i < sampler_count; i++) {
- if (!has_shader_channel_select && (prog->ShadowSamplers & (1 << i))) {
- /* Assume DEPTH_TEXTURE_MODE is the default: X, X, X, 1 */
- tex->swizzles[i] =
- MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_ONE);
- } else {
- /* Color sampler: assume no swizzling. */
- tex->swizzles[i] = SWIZZLE_XYZW;
- }
- }
-}
-
-/**
- * Sets up the starting offsets for the groups of binding table entries
- * common to all pipeline stages.
- *
- * Unused groups are initialized to 0xd0d0d0d0 to make it obvious that they're
- * unused but also make sure that addition of small offsets to them will
- * trigger some of our asserts that surface indices are < BRW_MAX_SURFACES.
- */
-uint32_t
-brw_assign_common_binding_table_offsets(const struct intel_device_info *devinfo,
- const struct gl_program *prog,
- struct brw_stage_prog_data *stage_prog_data,
- uint32_t next_binding_table_offset)
-{
- int num_textures = util_last_bit(prog->SamplersUsed);
-
- stage_prog_data->binding_table.texture_start = next_binding_table_offset;
- next_binding_table_offset += num_textures;
-
- if (prog->info.num_ubos) {
- assert(prog->info.num_ubos <= BRW_MAX_UBO);
- stage_prog_data->binding_table.ubo_start = next_binding_table_offset;
- next_binding_table_offset += prog->info.num_ubos;
- } else {
- stage_prog_data->binding_table.ubo_start = 0xd0d0d0d0;
- }
-
- if (prog->info.num_ssbos || prog->info.num_abos) {
- assert(prog->info.num_abos <= BRW_MAX_ABO);
- assert(prog->info.num_ssbos <= BRW_MAX_SSBO);
- stage_prog_data->binding_table.ssbo_start = next_binding_table_offset;
- next_binding_table_offset += prog->info.num_abos + prog->info.num_ssbos;
- } else {
- stage_prog_data->binding_table.ssbo_start = 0xd0d0d0d0;
- }
-
- if (INTEL_DEBUG(DEBUG_SHADER_TIME)) {
- stage_prog_data->binding_table.shader_time_start = next_binding_table_offset;
- next_binding_table_offset++;
- } else {
- stage_prog_data->binding_table.shader_time_start = 0xd0d0d0d0;
- }
-
- if (prog->info.uses_texture_gather) {
- if (devinfo->ver >= 8) {
- stage_prog_data->binding_table.gather_texture_start =
- stage_prog_data->binding_table.texture_start;
- } else {
- stage_prog_data->binding_table.gather_texture_start = next_binding_table_offset;
- next_binding_table_offset += num_textures;
- }
- } else {
- stage_prog_data->binding_table.gather_texture_start = 0xd0d0d0d0;
- }
-
- if (prog->info.num_images) {
- stage_prog_data->binding_table.image_start = next_binding_table_offset;
- next_binding_table_offset += prog->info.num_images;
- } else {
- stage_prog_data->binding_table.image_start = 0xd0d0d0d0;
- }
-
- /* This may or may not be used depending on how the compile goes. */
- stage_prog_data->binding_table.pull_constants_start = next_binding_table_offset;
- next_binding_table_offset++;
-
- /* Plane 0 is just the regular texture section */
- stage_prog_data->binding_table.plane_start[0] = stage_prog_data->binding_table.texture_start;
-
- stage_prog_data->binding_table.plane_start[1] = next_binding_table_offset;
- next_binding_table_offset += num_textures;
-
- stage_prog_data->binding_table.plane_start[2] = next_binding_table_offset;
- next_binding_table_offset += num_textures;
-
- /* Set the binding table size. Some callers may append new entries
- * and increase this accordingly.
- */
- stage_prog_data->binding_table.size_bytes = next_binding_table_offset * 4;
-
- assert(next_binding_table_offset <= BRW_MAX_SURFACES);
- return next_binding_table_offset;
-}
-
-void
-brw_populate_default_key(const struct brw_compiler *compiler,
- union brw_any_prog_key *prog_key,
- struct gl_shader_program *sh_prog,
- struct gl_program *prog)
-{
- switch (prog->info.stage) {
- case MESA_SHADER_VERTEX:
- brw_vs_populate_default_key(compiler, &prog_key->vs, prog);
- break;
- case MESA_SHADER_TESS_CTRL:
- brw_tcs_populate_default_key(compiler, &prog_key->tcs, sh_prog, prog);
- break;
- case MESA_SHADER_TESS_EVAL:
- brw_tes_populate_default_key(compiler, &prog_key->tes, sh_prog, prog);
- break;
- case MESA_SHADER_GEOMETRY:
- brw_gs_populate_default_key(compiler, &prog_key->gs, prog);
- break;
- case MESA_SHADER_FRAGMENT:
- brw_wm_populate_default_key(compiler, &prog_key->wm, prog);
- break;
- case MESA_SHADER_COMPUTE:
- brw_cs_populate_default_key(compiler, &prog_key->cs, prog);
- break;
- default:
- unreachable("Unsupported stage!");
- }
-}
-
-void
-brw_debug_recompile(struct brw_context *brw,
- gl_shader_stage stage,
- unsigned api_id,
- struct brw_base_prog_key *key)
-{
- const struct brw_compiler *compiler = brw->screen->compiler;
- enum brw_cache_id cache_id = brw_stage_cache_id(stage);
-
- brw_shader_perf_log(compiler, brw, "Recompiling %s shader for program %d\n",
- _mesa_shader_stage_to_string(stage), api_id);
-
- const void *old_key =
- brw_find_previous_compile(&brw->cache, cache_id, key->program_string_id);
-
- brw_debug_key_recompile(compiler, brw, stage, old_key, key);
-}
+++ /dev/null
-/*
- * Copyright © 2011 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#ifndef BRW_PROGRAM_H
-#define BRW_PROGRAM_H
-
-#include "compiler/brw_compiler.h"
-#include "nir.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct brw_context;
-struct blob;
-struct blob_reader;
-
-enum brw_param_domain {
- BRW_PARAM_DOMAIN_BUILTIN = 0,
- BRW_PARAM_DOMAIN_PARAMETER,
- BRW_PARAM_DOMAIN_UNIFORM,
- BRW_PARAM_DOMAIN_IMAGE,
-};
-
-#define BRW_PARAM(domain, val) (BRW_PARAM_DOMAIN_##domain << 24 | (val))
-#define BRW_PARAM_DOMAIN(param) ((uint32_t)(param) >> 24)
-#define BRW_PARAM_VALUE(param) ((uint32_t)(param) & 0x00ffffff)
-
-#define BRW_PARAM_PARAMETER(idx, comp) \
- BRW_PARAM(PARAMETER, ((idx) << 2) | (comp))
-#define BRW_PARAM_PARAMETER_IDX(param) (BRW_PARAM_VALUE(param) >> 2)
-#define BRW_PARAM_PARAMETER_COMP(param) (BRW_PARAM_VALUE(param) & 0x3)
-
-#define BRW_PARAM_UNIFORM(idx) BRW_PARAM(UNIFORM, (idx))
-#define BRW_PARAM_UNIFORM_IDX(param) BRW_PARAM_VALUE(param)
-
-#define BRW_PARAM_IMAGE(idx, offset) BRW_PARAM(IMAGE, ((idx) << 8) | (offset))
-#define BRW_PARAM_IMAGE_IDX(value) (BRW_PARAM_VALUE(value) >> 8)
-#define BRW_PARAM_IMAGE_OFFSET(value) (BRW_PARAM_VALUE(value) & 0xf)
-
-struct nir_shader *brw_create_nir(struct brw_context *brw,
- const struct gl_shader_program *shader_prog,
- struct gl_program *prog,
- gl_shader_stage stage,
- bool is_scalar);
-
-void brw_nir_lower_resources(nir_shader *nir,
- struct gl_shader_program *shader_prog,
- struct gl_program *prog,
- const struct intel_device_info *devinfo);
-
-void brw_shader_gather_info(nir_shader *nir, struct gl_program *prog);
-
-void brw_setup_tex_for_precompile(const struct intel_device_info *devinfo,
- struct brw_sampler_prog_key_data *tex,
- const struct gl_program *prog);
-
-void brw_populate_base_prog_key(struct gl_context *ctx,
- const struct brw_program *prog,
- struct brw_base_prog_key *key);
-void brw_populate_default_base_prog_key(const struct intel_device_info *devinfo,
- const struct brw_program *prog,
- struct brw_base_prog_key *key);
-void brw_debug_recompile(struct brw_context *brw, gl_shader_stage stage,
- unsigned api_id, struct brw_base_prog_key *key);
-
-uint32_t
-brw_assign_common_binding_table_offsets(const struct intel_device_info *devinfo,
- const struct gl_program *prog,
- struct brw_stage_prog_data *stage_prog_data,
- uint32_t next_binding_table_offset);
-
-void
-brw_populate_default_key(const struct brw_compiler *compiler,
- union brw_any_prog_key *prog_key,
- struct gl_shader_program *sh_prog,
- struct gl_program *prog);
-
-void
-brw_stage_prog_data_free(const void *prog_data);
-
-void
-brw_dump_arb_asm(const char *stage, struct gl_program *prog);
-
-bool brw_vs_precompile(struct gl_context *ctx, struct gl_program *prog);
-bool brw_tcs_precompile(struct gl_context *ctx,
- struct gl_shader_program *shader_prog,
- struct gl_program *prog);
-bool brw_tes_precompile(struct gl_context *ctx,
- struct gl_shader_program *shader_prog,
- struct gl_program *prog);
-bool brw_gs_precompile(struct gl_context *ctx, struct gl_program *prog);
-bool brw_fs_precompile(struct gl_context *ctx, struct gl_program *prog);
-bool brw_cs_precompile(struct gl_context *ctx, struct gl_program *prog);
-
-GLboolean brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog);
-
-void brw_upload_tcs_prog(struct brw_context *brw);
-void brw_tcs_populate_key(struct brw_context *brw,
- struct brw_tcs_prog_key *key);
-void brw_tcs_populate_default_key(const struct brw_compiler *compiler,
- struct brw_tcs_prog_key *key,
- struct gl_shader_program *sh_prog,
- struct gl_program *prog);
-void brw_upload_tes_prog(struct brw_context *brw);
-void brw_tes_populate_key(struct brw_context *brw,
- struct brw_tes_prog_key *key);
-void brw_tes_populate_default_key(const struct brw_compiler *compiler,
- struct brw_tes_prog_key *key,
- struct gl_shader_program *sh_prog,
- struct gl_program *prog);
-
-void brw_write_blob_program_data(struct blob *binary, gl_shader_stage stage,
- const void *program,
- struct brw_stage_prog_data *prog_data);
-bool brw_read_blob_program_data(struct blob_reader *binary,
- struct gl_program *prog, gl_shader_stage stage,
- const uint8_t **program,
- struct brw_stage_prog_data *prog_data);
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
-#endif
+++ /dev/null
-/*
- * Copyright (c) 2017 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-#include <stdint.h>
-
-#include "compiler/nir/nir_serialize.h"
-#include "util/build_id.h"
-#include "util/mesa-sha1.h"
-
-#include "brw_context.h"
-#include "brw_program.h"
-#include "brw_state.h"
-
-static uint8_t driver_sha1[20];
-
-void
-brw_program_binary_init(unsigned device_id)
-{
- const struct build_id_note *note =
- build_id_find_nhdr_for_addr(brw_program_binary_init);
- assert(note);
-
- /**
- * With Mesa's megadrivers, taking the sha1 of i965_dri.so may not be
- * unique. Therefore, we make a sha1 of the "i965" string and the sha1
- * build id from i965_dri.so.
- */
- struct mesa_sha1 ctx;
- _mesa_sha1_init(&ctx);
- char renderer[10];
- assert(device_id < 0x10000);
- int len = snprintf(renderer, sizeof(renderer), "i965_%04x", device_id);
- assert(len == sizeof(renderer) - 1);
- _mesa_sha1_update(&ctx, renderer, len);
- _mesa_sha1_update(&ctx, build_id_data(note), build_id_length(note));
- _mesa_sha1_final(&ctx, driver_sha1);
-}
-
-void
-brw_get_program_binary_driver_sha1(struct gl_context *ctx, uint8_t *sha1)
-{
- memcpy(sha1, driver_sha1, sizeof(uint8_t) * 20);
-}
-
-enum driver_cache_blob_part {
- END_PART,
- INTEL_PART,
- NIR_PART,
-};
-
-static bool
-blob_parts_valid(void *blob, uint32_t size)
-{
- struct blob_reader reader;
- blob_reader_init(&reader, blob, size);
-
- do {
- uint32_t part_type = blob_read_uint32(&reader);
- if (reader.overrun)
- return false;
- if (part_type == END_PART)
- return reader.current == reader.end;
- switch ((enum driver_cache_blob_part)part_type) {
- case INTEL_PART:
- case NIR_PART:
- /* Read the uint32_t part-size and skip over it */
- blob_skip_bytes(&reader, blob_read_uint32(&reader));
- if (reader.overrun)
- return false;
- break;
- default:
- return false;
- }
- } while (true);
-}
-
-static bool
-blob_has_part(void *blob, uint32_t size, enum driver_cache_blob_part part)
-{
- struct blob_reader reader;
- blob_reader_init(&reader, blob, size);
-
- assert(blob_parts_valid(blob, size));
- do {
- uint32_t part_type = blob_read_uint32(&reader);
- if (part_type == END_PART)
- return false;
- if (part_type == part)
- return true;
- blob_skip_bytes(&reader, blob_read_uint32(&reader));
- } while (true);
-}
-
-static bool
-driver_blob_is_ready(void *blob, uint32_t size, bool with_intel_program)
-{
- if (!blob) {
- return false;
- } else if (!blob_parts_valid(blob, size)) {
- unreachable("Driver blob format is bad!");
- return false;
- } else if (blob_has_part(blob, size, INTEL_PART) == with_intel_program) {
- return true;
- } else {
- return false;
- }
-}
-
-static void
-serialize_nir_part(struct blob *writer, struct gl_program *prog)
-{
- blob_write_uint32(writer, NIR_PART);
- intptr_t size_offset = blob_reserve_uint32(writer);
- size_t nir_start = writer->size;
- nir_serialize(writer, prog->nir, false);
- blob_overwrite_uint32(writer, size_offset, writer->size - nir_start);
-}
-
-void
-brw_program_serialize_nir(struct gl_context *ctx, struct gl_program *prog)
-{
- if (driver_blob_is_ready(prog->driver_cache_blob,
- prog->driver_cache_blob_size, false))
- return;
-
- if (prog->driver_cache_blob)
- ralloc_free(prog->driver_cache_blob);
-
- struct blob writer;
- blob_init(&writer);
- serialize_nir_part(&writer, prog);
- blob_write_uint32(&writer, END_PART);
- prog->driver_cache_blob = ralloc_size(NULL, writer.size);
- memcpy(prog->driver_cache_blob, writer.data, writer.size);
- prog->driver_cache_blob_size = writer.size;
- blob_finish(&writer);
-}
-
-static bool
-deserialize_intel_program(struct blob_reader *reader, struct gl_context *ctx,
- struct gl_program *prog, gl_shader_stage stage)
-{
- struct brw_context *brw = brw_context(ctx);
-
- union brw_any_prog_key prog_key;
- blob_copy_bytes(reader, &prog_key, brw_prog_key_size(stage));
- prog_key.base.program_string_id = brw_program(prog)->id;
-
- enum brw_cache_id cache_id = brw_stage_cache_id(stage);
-
- const uint8_t *program;
- struct brw_stage_prog_data *prog_data =
- ralloc_size(NULL, sizeof(union brw_any_prog_data));
-
- if (!brw_read_blob_program_data(reader, prog, stage, &program, prog_data)) {
- ralloc_free(prog_data);
- return false;
- }
-
- uint32_t offset;
- void *out_prog_data;
- brw_upload_cache(&brw->cache, cache_id, &prog_key, brw_prog_key_size(stage),
- program, prog_data->program_size, prog_data,
- brw_prog_data_size(stage), &offset, &out_prog_data);
-
- ralloc_free(prog_data);
-
- return true;
-}
-
-void
-brw_program_deserialize_driver_blob(struct gl_context *ctx,
- struct gl_program *prog,
- gl_shader_stage stage)
-{
- if (!prog->driver_cache_blob)
- return;
-
- struct blob_reader reader;
- blob_reader_init(&reader, prog->driver_cache_blob,
- prog->driver_cache_blob_size);
-
- do {
- uint32_t part_type = blob_read_uint32(&reader);
- if ((enum driver_cache_blob_part)part_type == END_PART)
- break;
- switch ((enum driver_cache_blob_part)part_type) {
- case INTEL_PART: {
- ASSERTED uint32_t gen_size = blob_read_uint32(&reader);
- assert(!reader.overrun &&
- (uintptr_t)(reader.end - reader.current) > gen_size);
- deserialize_intel_program(&reader, ctx, prog, stage);
- break;
- }
- case NIR_PART: {
- ASSERTED uint32_t nir_size = blob_read_uint32(&reader);
- assert(!reader.overrun &&
- (uintptr_t)(reader.end - reader.current) > nir_size);
- const struct nir_shader_compiler_options *options =
- ctx->Const.ShaderCompilerOptions[stage].NirOptions;
- prog->nir = nir_deserialize(NULL, options, &reader);
- break;
- }
- default:
- unreachable("Unsupported blob part type!");
- break;
- }
- } while (true);
-
- ralloc_free(prog->driver_cache_blob);
- prog->driver_cache_blob = NULL;
- prog->driver_cache_blob_size = 0;
-}
-
-/* This is just a wrapper around brw_program_deserialize_nir() as i965
- * doesn't need gl_shader_program like other drivers do.
- */
-void
-brw_deserialize_program_binary(struct gl_context *ctx,
- struct gl_shader_program *shProg,
- struct gl_program *prog)
-{
- brw_program_deserialize_driver_blob(ctx, prog, prog->info.stage);
-}
-
-static void
-serialize_intel_part(struct blob *writer, struct gl_context *ctx,
- struct gl_shader_program *sh_prog,
- struct gl_program *prog)
-{
- struct brw_context *brw = brw_context(ctx);
-
- union brw_any_prog_key key;
- brw_populate_default_key(brw->screen->compiler, &key, sh_prog, prog);
-
- const gl_shader_stage stage = prog->info.stage;
- uint32_t offset = 0;
- void *prog_data = NULL;
- if (brw_search_cache(&brw->cache, brw_stage_cache_id(stage), &key,
- brw_prog_key_size(stage), &offset, &prog_data,
- false)) {
- const void *program_map = brw->cache.map + offset;
- /* TODO: Improve perf for non-LLC. It would be best to save it at
- * program generation time when the program is in normal memory
- * accessible with cache to the CPU. Another easier change would be to
- * use _mesa_streaming_load_memcpy to read from the program mapped
- * memory.
- */
- blob_write_uint32(writer, INTEL_PART);
- intptr_t size_offset = blob_reserve_uint32(writer);
- size_t gen_start = writer->size;
- blob_write_bytes(writer, &key, brw_prog_key_size(stage));
- brw_write_blob_program_data(writer, stage, program_map, prog_data);
- blob_overwrite_uint32(writer, size_offset, writer->size - gen_start);
- }
-}
-
-void
-brw_serialize_program_binary(struct gl_context *ctx,
- struct gl_shader_program *sh_prog,
- struct gl_program *prog)
-{
- if (driver_blob_is_ready(prog->driver_cache_blob,
- prog->driver_cache_blob_size, true))
- return;
-
- if (prog->driver_cache_blob) {
- if (!prog->nir) {
- /* If we loaded from the disk shader cache, then the nir might not
- * have been deserialized yet.
- */
- brw_program_deserialize_driver_blob(ctx, prog, prog->info.stage);
- }
- ralloc_free(prog->driver_cache_blob);
- }
-
- struct blob writer;
- blob_init(&writer);
- serialize_nir_part(&writer, prog);
- serialize_intel_part(&writer, ctx, sh_prog, prog);
- blob_write_uint32(&writer, END_PART);
- prog->driver_cache_blob = ralloc_size(NULL, writer.size);
- memcpy(prog->driver_cache_blob, writer.data, writer.size);
- prog->driver_cache_blob_size = writer.size;
- blob_finish(&writer);
-}
-
-void
-brw_write_blob_program_data(struct blob *binary, gl_shader_stage stage,
- const void *program,
- struct brw_stage_prog_data *prog_data)
-{
- /* Write prog_data to blob. */
- blob_write_bytes(binary, prog_data, brw_prog_data_size(stage));
-
- /* Write program to blob. */
- blob_write_bytes(binary, program, prog_data->program_size);
-
- /* Write push params */
- blob_write_bytes(binary, prog_data->param,
- sizeof(uint32_t) * prog_data->nr_params);
-
- /* Write pull params */
- blob_write_bytes(binary, prog_data->pull_param,
- sizeof(uint32_t) * prog_data->nr_pull_params);
-}
-
-bool
-brw_read_blob_program_data(struct blob_reader *binary, struct gl_program *prog,
- gl_shader_stage stage, const uint8_t **program,
- struct brw_stage_prog_data *prog_data)
-{
- /* Read shader prog_data from blob. */
- blob_copy_bytes(binary, prog_data, brw_prog_data_size(stage));
- if (binary->overrun)
- return false;
-
- /* Read shader program from blob. */
- *program = blob_read_bytes(binary, prog_data->program_size);
-
- /* Read push params */
- prog_data->param = rzalloc_array(NULL, uint32_t, prog_data->nr_params);
- blob_copy_bytes(binary, prog_data->param,
- sizeof(uint32_t) * prog_data->nr_params);
-
- /* Read pull params */
- prog_data->pull_param = rzalloc_array(NULL, uint32_t,
- prog_data->nr_pull_params);
- blob_copy_bytes(binary, prog_data->pull_param,
- sizeof(uint32_t) * prog_data->nr_pull_params);
-
- return !binary->overrun;
-}
+++ /dev/null
-/*
- Copyright (C) Intel Corp. 2006. All Rights Reserved.
- Intel funded Tungsten Graphics to
- develop this 3D driver.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
- **********************************************************************/
- /*
- * Authors:
- * Keith Whitwell <keithw@vmware.com>
- */
-
-/** @file brw_program_cache.c
- *
- * This file implements a simple program cache for 965. The consumers can
- * query the hash table of programs using a cache_id and program key, and
- * receive the corresponding program buffer object (plus associated auxiliary
- * data) in return. Objects in the cache may not have relocations
- * (pointers to other BOs) in them.
- *
- * The inner workings are a simple hash table based on a FNV-1a of the
- * key data.
- *
- * Replacement is not implemented. Instead, when the cache gets too
- * big we throw out all of the cache data and let it get regenerated.
- */
-
-#include "main/streaming-load-memcpy.h"
-#include "x86/common_x86_asm.h"
-#include "brw_batch.h"
-#include "brw_state.h"
-#include "brw_wm.h"
-#include "brw_gs.h"
-#include "brw_cs.h"
-#include "brw_program.h"
-#include "compiler/brw_eu.h"
-#include "util/u_memory.h"
-#define XXH_INLINE_ALL
-#include "util/xxhash.h"
-
-#define FILE_DEBUG_FLAG DEBUG_STATE
-
-struct brw_cache_item {
- /**
- * Effectively part of the key, cache_id identifies what kind of state
- * buffer is involved, and also which dirty flag should set.
- */
- enum brw_cache_id cache_id;
-
- /** 32-bit hash of the key data */
- GLuint hash;
-
- /** for variable-sized keys */
- GLuint key_size;
- GLuint prog_data_size;
- const struct brw_base_prog_key *key;
-
- uint32_t offset;
- uint32_t size;
-
- struct brw_cache_item *next;
-};
-
-enum brw_cache_id
-brw_stage_cache_id(gl_shader_stage stage)
-{
- static const enum brw_cache_id stage_ids[] = {
- BRW_CACHE_VS_PROG,
- BRW_CACHE_TCS_PROG,
- BRW_CACHE_TES_PROG,
- BRW_CACHE_GS_PROG,
- BRW_CACHE_FS_PROG,
- BRW_CACHE_CS_PROG,
- };
- assert((int)stage >= 0 && stage < ARRAY_SIZE(stage_ids));
- return stage_ids[stage];
-}
-
-static GLuint
-hash_key(struct brw_cache_item *item)
-{
- uint32_t hash = 0;
- hash = XXH32(&item->cache_id, sizeof(item->cache_id), hash);
- hash = XXH32(item->key, item->key_size, hash);
-
- return hash;
-}
-
-static int
-brw_cache_item_equals(const struct brw_cache_item *a,
- const struct brw_cache_item *b)
-{
- return a->cache_id == b->cache_id &&
- a->hash == b->hash &&
- a->key_size == b->key_size &&
- (memcmp(a->key, b->key, a->key_size) == 0);
-}
-
-static struct brw_cache_item *
-search_cache(struct brw_cache *cache, GLuint hash,
- struct brw_cache_item *lookup)
-{
- struct brw_cache_item *c;
-
-#if 0
- int bucketcount = 0;
-
- for (c = cache->items[hash % cache->size]; c; c = c->next)
- bucketcount++;
-
- fprintf(stderr, "bucket %d/%d = %d/%d items\n", hash % cache->size,
- cache->size, bucketcount, cache->n_items);
-#endif
-
- for (c = cache->items[hash % cache->size]; c; c = c->next) {
- if (brw_cache_item_equals(lookup, c))
- return c;
- }
-
- return NULL;
-}
-
-
-static void
-rehash(struct brw_cache *cache)
-{
- struct brw_cache_item **items;
- struct brw_cache_item *c, *next;
- GLuint size, i;
-
- size = cache->size * 3;
- items = calloc(size, sizeof(*items));
-
- for (i = 0; i < cache->size; i++)
- for (c = cache->items[i]; c; c = next) {
- next = c->next;
- c->next = items[c->hash % size];
- items[c->hash % size] = c;
- }
-
- free(cache->items);
- cache->items = items;
- cache->size = size;
-}
-
-
-/**
- * Returns the buffer object matching cache_id and key, or NULL.
- */
-bool
-brw_search_cache(struct brw_cache *cache, enum brw_cache_id cache_id,
- const void *key, GLuint key_size, uint32_t *inout_offset,
- void *inout_prog_data, bool flag_state)
-{
- struct brw_cache_item *item;
- struct brw_cache_item lookup;
- GLuint hash;
-
- lookup.cache_id = cache_id;
- lookup.key = key;
- lookup.key_size = key_size;
- hash = hash_key(&lookup);
- lookup.hash = hash;
-
- item = search_cache(cache, hash, &lookup);
-
- if (item == NULL)
- return false;
-
- void *prog_data = ((char *) item->key) + item->key_size;
-
- if (item->offset != *inout_offset ||
- prog_data != *((void **) inout_prog_data)) {
- if (likely(flag_state))
- cache->brw->ctx.NewDriverState |= (1 << cache_id);
- *inout_offset = item->offset;
- *((void **) inout_prog_data) = prog_data;
- }
-
- return true;
-}
-
-static void
-brw_cache_new_bo(struct brw_cache *cache, uint32_t new_size)
-{
- struct brw_context *brw = cache->brw;
- struct brw_bo *new_bo;
-
- perf_debug("Copying to larger program cache: %u kB -> %u kB\n",
- (unsigned) cache->bo->size / 1024, new_size / 1024);
-
- new_bo = brw_bo_alloc(brw->bufmgr, "program cache", new_size,
- BRW_MEMZONE_SHADER);
- if (can_do_exec_capture(brw->screen))
- new_bo->kflags |= EXEC_OBJECT_CAPTURE;
-
- void *map = brw_bo_map(brw, new_bo, MAP_READ | MAP_WRITE |
- MAP_ASYNC | MAP_PERSISTENT);
-
- /* Copy any existing data that needs to be saved. */
- if (cache->next_offset != 0) {
-#ifdef USE_SSE41
- if (!cache->bo->cache_coherent && cpu_has_sse4_1)
- _mesa_streaming_load_memcpy(map, cache->map, cache->next_offset);
- else
-#endif
- memcpy(map, cache->map, cache->next_offset);
- }
-
- brw_bo_unmap(cache->bo);
- brw_bo_unreference(cache->bo);
- cache->bo = new_bo;
- cache->map = map;
-
- /* Since we have a new BO in place, we need to signal the units
- * that depend on it (state base address on gfx5+, or unit state before).
- */
- brw->ctx.NewDriverState |= BRW_NEW_PROGRAM_CACHE;
- brw->batch.state_base_address_emitted = false;
-}
-
-/**
- * Attempts to find an item in the cache with identical data.
- */
-static const struct brw_cache_item *
-brw_lookup_prog(const struct brw_cache *cache,
- enum brw_cache_id cache_id,
- const void *data, unsigned data_size)
-{
- unsigned i;
- const struct brw_cache_item *item;
-
- for (i = 0; i < cache->size; i++) {
- for (item = cache->items[i]; item; item = item->next) {
- if (item->cache_id != cache_id || item->size != data_size ||
- memcmp(cache->map + item->offset, data, item->size) != 0)
- continue;
-
- return item;
- }
- }
-
- return NULL;
-}
-
-static uint32_t
-brw_alloc_item_data(struct brw_cache *cache, uint32_t size)
-{
- uint32_t offset;
-
- /* Allocate space in the cache BO for our new program. */
- if (cache->next_offset + size > cache->bo->size) {
- uint32_t new_size = cache->bo->size * 2;
-
- while (cache->next_offset + size > new_size)
- new_size *= 2;
-
- brw_cache_new_bo(cache, new_size);
- }
-
- offset = cache->next_offset;
-
- /* Programs are always 64-byte aligned, so set up the next one now */
- cache->next_offset = ALIGN(offset + size, 64);
-
- return offset;
-}
-
-const void *
-brw_find_previous_compile(struct brw_cache *cache,
- enum brw_cache_id cache_id,
- unsigned program_string_id)
-{
- for (unsigned i = 0; i < cache->size; i++) {
- for (struct brw_cache_item *c = cache->items[i]; c; c = c->next) {
- if (c->cache_id == cache_id &&
- c->key->program_string_id == program_string_id) {
- return c->key;
- }
- }
- }
-
- return NULL;
-}
-
-void
-brw_upload_cache(struct brw_cache *cache,
- enum brw_cache_id cache_id,
- const void *key,
- GLuint key_size,
- const void *data,
- GLuint data_size,
- const void *prog_data,
- GLuint prog_data_size,
- uint32_t *out_offset,
- void *out_prog_data)
-{
- struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item);
- const struct brw_cache_item *matching_data =
- brw_lookup_prog(cache, cache_id, data, data_size);
- GLuint hash;
- void *tmp;
-
- item->cache_id = cache_id;
- item->size = data_size;
- item->key = key;
- item->key_size = key_size;
- item->prog_data_size = prog_data_size;
- hash = hash_key(item);
- item->hash = hash;
-
- /* If we can find a matching prog in the cache already, then reuse the
- * existing stuff without creating new copy into the underlying buffer
- * object. This is notably useful for programs generating shaders at
- * runtime, where multiple shaders may compile to the same thing in our
- * backend.
- */
- if (matching_data) {
- item->offset = matching_data->offset;
- } else {
- item->offset = brw_alloc_item_data(cache, data_size);
-
- /* Copy data to the buffer */
- memcpy(cache->map + item->offset, data, data_size);
- }
-
- /* Set up the memory containing the key and prog_data */
- tmp = malloc(key_size + prog_data_size);
-
- memcpy(tmp, key, key_size);
- memcpy(tmp + key_size, prog_data, prog_data_size);
-
- item->key = tmp;
-
- if (cache->n_items > cache->size * 1.5f)
- rehash(cache);
-
- hash %= cache->size;
- item->next = cache->items[hash];
- cache->items[hash] = item;
- cache->n_items++;
-
- *out_offset = item->offset;
- *(void **)out_prog_data = (void *)((char *)item->key + item->key_size);
- cache->brw->ctx.NewDriverState |= 1 << cache_id;
-}
-
-void
-brw_init_caches(struct brw_context *brw)
-{
- struct brw_cache *cache = &brw->cache;
-
- cache->brw = brw;
-
- cache->size = 7;
- cache->n_items = 0;
- cache->items =
- calloc(cache->size, sizeof(struct brw_cache_item *));
-
- cache->bo = brw_bo_alloc(brw->bufmgr, "program cache", 16384,
- BRW_MEMZONE_SHADER);
- if (can_do_exec_capture(brw->screen))
- cache->bo->kflags |= EXEC_OBJECT_CAPTURE;
-
- cache->map = brw_bo_map(brw, cache->bo, MAP_READ | MAP_WRITE |
- MAP_ASYNC | MAP_PERSISTENT);
-}
-
-static void
-brw_clear_cache(struct brw_context *brw, struct brw_cache *cache)
-{
- struct brw_cache_item *c, *next;
- GLuint i;
-
- DBG("%s\n", __func__);
-
- for (i = 0; i < cache->size; i++) {
- for (c = cache->items[i]; c; c = next) {
- next = c->next;
- if (c->cache_id == BRW_CACHE_VS_PROG ||
- c->cache_id == BRW_CACHE_TCS_PROG ||
- c->cache_id == BRW_CACHE_TES_PROG ||
- c->cache_id == BRW_CACHE_GS_PROG ||
- c->cache_id == BRW_CACHE_FS_PROG ||
- c->cache_id == BRW_CACHE_CS_PROG) {
- const void *item_prog_data = ((char *)c->key) + c->key_size;
- brw_stage_prog_data_free(item_prog_data);
- }
- free((void *)c->key);
- free(c);
- }
- cache->items[i] = NULL;
- }
-
- cache->n_items = 0;
-
- /* Start putting programs into the start of the BO again, since
- * we'll never find the old results.
- */
- cache->next_offset = 0;
-
- /* We need to make sure that the programs get regenerated, since
- * any offsets leftover in brw_context will no longer be valid.
- */
- brw->NewGLState = ~0;
- brw->ctx.NewDriverState = ~0ull;
- brw->state.pipelines[BRW_RENDER_PIPELINE].mesa = ~0;
- brw->state.pipelines[BRW_RENDER_PIPELINE].brw = ~0ull;
- brw->state.pipelines[BRW_COMPUTE_PIPELINE].mesa = ~0;
- brw->state.pipelines[BRW_COMPUTE_PIPELINE].brw = ~0ull;
-
- /* Also, NULL out any stale program pointers. */
- brw->vs.base.prog_data = NULL;
- brw->tcs.base.prog_data = NULL;
- brw->tes.base.prog_data = NULL;
- brw->gs.base.prog_data = NULL;
- brw->wm.base.prog_data = NULL;
- brw->cs.base.prog_data = NULL;
-
- brw_batch_flush(brw);
-}
-
-void
-brw_program_cache_check_size(struct brw_context *brw)
-{
- /* un-tuned guess. Each object is generally a page, so 2000 of them is 8 MB of
- * state cache.
- */
- if (brw->cache.n_items > 2000) {
- perf_debug("Exceeded state cache size limit. Clearing the set "
- "of compiled programs, which will trigger recompiles\n");
- brw_clear_cache(brw, &brw->cache);
- brw_cache_new_bo(&brw->cache, brw->cache.bo->size);
- }
-}
-
-
-static void
-brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache)
-{
-
- DBG("%s\n", __func__);
-
- /* This can be NULL if context creation failed early on */
- if (cache->bo) {
- brw_bo_unmap(cache->bo);
- brw_bo_unreference(cache->bo);
- cache->bo = NULL;
- cache->map = NULL;
- }
- brw_clear_cache(brw, cache);
- free(cache->items);
- cache->items = NULL;
- cache->size = 0;
-}
-
-
-void
-brw_destroy_caches(struct brw_context *brw)
-{
- brw_destroy_cache(brw, &brw->cache);
-}
-
-static const char *
-cache_name(enum brw_cache_id cache_id)
-{
- switch (cache_id) {
- case BRW_CACHE_VS_PROG:
- return "VS kernel";
- case BRW_CACHE_TCS_PROG:
- return "TCS kernel";
- case BRW_CACHE_TES_PROG:
- return "TES kernel";
- case BRW_CACHE_FF_GS_PROG:
- return "Fixed-function GS kernel";
- case BRW_CACHE_GS_PROG:
- return "GS kernel";
- case BRW_CACHE_CLIP_PROG:
- return "CLIP kernel";
- case BRW_CACHE_SF_PROG:
- return "SF kernel";
- case BRW_CACHE_FS_PROG:
- return "FS kernel";
- case BRW_CACHE_CS_PROG:
- return "CS kernel";
- default:
- return "unknown";
- }
-}
-
-void
-brw_print_program_cache(struct brw_context *brw)
-{
- const struct brw_cache *cache = &brw->cache;
- struct brw_cache_item *item;
-
- for (unsigned i = 0; i < cache->size; i++) {
- for (item = cache->items[i]; item; item = item->next) {
- fprintf(stderr, "%s:\n", cache_name(i));
- brw_disassemble_with_labels(&brw->screen->devinfo, cache->map,
- item->offset, item->size, stderr);
- }
- }
-}
+++ /dev/null
-/*
- * Copyright © 2008 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- * Eric Anholt <eric@anholt.net>
- *
- */
-
-/** @file brw_queryobj.c
- *
- * Support for query objects (GL_ARB_occlusion_query, GL_ARB_timer_query,
- * GL_EXT_transform_feedback, and friends).
- *
- * The hardware provides a PIPE_CONTROL command that can report the number of
- * fragments that passed the depth test, or the hardware timer. They are
- * appropriately synced with the stage of the pipeline for our extensions'
- * needs.
- */
-#include "main/queryobj.h"
-
-#include "brw_context.h"
-#include "brw_defines.h"
-#include "brw_state.h"
-#include "brw_batch.h"
-
-/* As best we know currently, the Gen HW timestamps are 36bits across
- * all platforms, which we need to account for when calculating a
- * delta to measure elapsed time.
- *
- * The timestamps read via glGetTimestamp() / brw_get_timestamp() sometimes
- * only have 32bits due to a kernel bug and so in that case we make sure to
- * treat all raw timestamps as 32bits so they overflow consistently and remain
- * comparable. (Note: the timestamps being passed here are not from the kernel
- * so we don't need to be taking the upper 32bits in this buggy kernel case we
- * are just clipping to 32bits here for consistency.)
- */
-uint64_t
-brw_raw_timestamp_delta(struct brw_context *brw, uint64_t time0, uint64_t time1)
-{
- if (brw->screen->hw_has_timestamp == 2) {
- /* Kernel clips timestamps to 32bits in this case, so we also clip
- * PIPE_CONTROL timestamps for consistency.
- */
- return (uint32_t)time1 - (uint32_t)time0;
- } else {
- if (time0 > time1) {
- return (1ULL << 36) + time1 - time0;
- } else {
- return time1 - time0;
- }
- }
-}
-
-/**
- * Emit PIPE_CONTROLs to write the current GPU timestamp into a buffer.
- */
-void
-brw_write_timestamp(struct brw_context *brw, struct brw_bo *query_bo, int idx)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- if (devinfo->ver == 6) {
- /* Emit Sandybridge workaround flush: */
- brw_emit_pipe_control_flush(brw,
- PIPE_CONTROL_CS_STALL |
- PIPE_CONTROL_STALL_AT_SCOREBOARD);
- }
-
- uint32_t flags = PIPE_CONTROL_WRITE_TIMESTAMP;
-
- if (devinfo->ver == 9 && devinfo->gt == 4)
- flags |= PIPE_CONTROL_CS_STALL;
-
- brw_emit_pipe_control_write(brw, flags,
- query_bo, idx * sizeof(uint64_t), 0);
-}
-
-/**
- * Emit PIPE_CONTROLs to write the PS_DEPTH_COUNT register into a buffer.
- */
-void
-brw_write_depth_count(struct brw_context *brw, struct brw_bo *query_bo, int idx)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- uint32_t flags = PIPE_CONTROL_WRITE_DEPTH_COUNT | PIPE_CONTROL_DEPTH_STALL;
-
- if (devinfo->ver == 9 && devinfo->gt == 4)
- flags |= PIPE_CONTROL_CS_STALL;
-
- if (devinfo->ver >= 10) {
- /* "Driver must program PIPE_CONTROL with only Depth Stall Enable bit set
- * prior to programming a PIPE_CONTROL with Write PS Depth Count Post sync
- * operation."
- */
- brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_STALL);
- }
-
- brw_emit_pipe_control_write(brw, flags,
- query_bo, idx * sizeof(uint64_t), 0);
-}
-
-/**
- * Wait on the query object's BO and calculate the final result.
- */
-static void
-brw_queryobj_get_results(struct gl_context *ctx,
- struct brw_query_object *query)
-{
- struct brw_context *brw = brw_context(ctx);
- UNUSED const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- int i;
- uint64_t *results;
-
- assert(devinfo->ver < 6);
-
- if (query->bo == NULL)
- return;
-
- /* If the application has requested the query result, but this batch is
- * still contributing to it, flush it now so the results will be present
- * when mapped.
- */
- if (brw_batch_references(&brw->batch, query->bo))
- brw_batch_flush(brw);
-
- if (unlikely(brw->perf_debug)) {
- if (brw_bo_busy(query->bo)) {
- perf_debug("Stalling on the GPU waiting for a query object.\n");
- }
- }
-
- results = brw_bo_map(brw, query->bo, MAP_READ);
- switch (query->Base.Target) {
- case GL_TIME_ELAPSED_EXT:
- /* The query BO contains the starting and ending timestamps.
- * Subtract the two and convert to nanoseconds.
- */
- query->Base.Result = brw_raw_timestamp_delta(brw, results[0], results[1]);
- query->Base.Result = intel_device_info_timebase_scale(devinfo, query->Base.Result);
- break;
-
- case GL_TIMESTAMP:
- /* The query BO contains a single timestamp value in results[0]. */
- query->Base.Result = intel_device_info_timebase_scale(devinfo, results[0]);
-
- /* Ensure the scaled timestamp overflows according to
- * GL_QUERY_COUNTER_BITS
- */
- query->Base.Result &= (1ull << ctx->Const.QueryCounterBits.Timestamp) - 1;
- break;
-
- case GL_SAMPLES_PASSED_ARB:
- /* Loop over pairs of values from the BO, which are the PS_DEPTH_COUNT
- * value at the start and end of the batchbuffer. Subtract them to
- * get the number of fragments which passed the depth test in each
- * individual batch, and add those differences up to get the number
- * of fragments for the entire query.
- *
- * Note that query->Base.Result may already be non-zero. We may have
- * run out of space in the query's BO and allocated a new one. If so,
- * this function was already called to accumulate the results so far.
- */
- for (i = 0; i < query->last_index; i++) {
- query->Base.Result += results[i * 2 + 1] - results[i * 2];
- }
- break;
-
- case GL_ANY_SAMPLES_PASSED:
- case GL_ANY_SAMPLES_PASSED_CONSERVATIVE:
- /* If the starting and ending PS_DEPTH_COUNT from any of the batches
- * differ, then some fragments passed the depth test.
- */
- for (i = 0; i < query->last_index; i++) {
- if (results[i * 2 + 1] != results[i * 2]) {
- query->Base.Result = GL_TRUE;
- break;
- }
- }
- break;
-
- default:
- unreachable("Unrecognized query target in brw_queryobj_get_results()");
- }
- brw_bo_unmap(query->bo);
-
- /* Now that we've processed the data stored in the query's buffer object,
- * we can release it.
- */
- brw_bo_unreference(query->bo);
- query->bo = NULL;
-}
-
-/**
- * The NewQueryObject() driver hook.
- *
- * Allocates and initializes a new query object.
- */
-static struct gl_query_object *
-brw_new_query_object(struct gl_context *ctx, GLuint id)
-{
- struct brw_query_object *query;
-
- query = calloc(1, sizeof(struct brw_query_object));
-
- query->Base.Id = id;
- query->Base.Result = 0;
- query->Base.Active = false;
- query->Base.Ready = true;
-
- return &query->Base;
-}
-
-/**
- * The DeleteQuery() driver hook.
- */
-static void
-brw_delete_query(struct gl_context *ctx, struct gl_query_object *q)
-{
- struct brw_query_object *query = (struct brw_query_object *)q;
-
- brw_bo_unreference(query->bo);
- _mesa_delete_query(ctx, q);
-}
-
-/**
- * Gfx4-5 driver hook for glBeginQuery().
- *
- * Initializes driver structures and emits any GPU commands required to begin
- * recording data for the query.
- */
-static void
-brw_begin_query(struct gl_context *ctx, struct gl_query_object *q)
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_query_object *query = (struct brw_query_object *)q;
- UNUSED const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- assert(devinfo->ver < 6);
-
- switch (query->Base.Target) {
- case GL_TIME_ELAPSED_EXT:
- /* For timestamp queries, we record the starting time right away so that
- * we measure the full time between BeginQuery and EndQuery. There's
- * some debate about whether this is the right thing to do. Our decision
- * is based on the following text from the ARB_timer_query extension:
- *
- * "(5) Should the extension measure total time elapsed between the full
- * completion of the BeginQuery and EndQuery commands, or just time
- * spent in the graphics library?
- *
- * RESOLVED: This extension will measure the total time elapsed
- * between the full completion of these commands. Future extensions
- * may implement a query to determine time elapsed at different stages
- * of the graphics pipeline."
- *
- * We write a starting timestamp now (at index 0). At EndQuery() time,
- * we'll write a second timestamp (at index 1), and subtract the two to
- * obtain the time elapsed. Notably, this includes time elapsed while
- * the system was doing other work, such as running other applications.
- */
- brw_bo_unreference(query->bo);
- query->bo =
- brw_bo_alloc(brw->bufmgr, "timer query", 4096, BRW_MEMZONE_OTHER);
- brw_write_timestamp(brw, query->bo, 0);
- break;
-
- case GL_ANY_SAMPLES_PASSED:
- case GL_ANY_SAMPLES_PASSED_CONSERVATIVE:
- case GL_SAMPLES_PASSED_ARB:
- /* For occlusion queries, we delay taking an initial sample until the
- * first drawing occurs in this batch. See the reasoning in the comments
- * for brw_emit_query_begin() below.
- *
- * Since we're starting a new query, we need to be sure to throw away
- * any previous occlusion query results.
- */
- brw_bo_unreference(query->bo);
- query->bo = NULL;
- query->last_index = -1;
-
- brw->query.obj = query;
-
- /* Depth statistics on Gfx4 require strange workarounds, so we try to
- * avoid them when necessary. They're required for occlusion queries,
- * so turn them on now.
- */
- brw->stats_wm++;
- brw->ctx.NewDriverState |= BRW_NEW_STATS_WM;
- break;
-
- default:
- unreachable("Unrecognized query target in brw_begin_query()");
- }
-}
-
-/**
- * Gfx4-5 driver hook for glEndQuery().
- *
- * Emits GPU commands to record a final query value, ending any data capturing.
- * However, the final result isn't necessarily available until the GPU processes
- * those commands. brw_queryobj_get_results() processes the captured data to
- * produce the final result.
- */
-static void
-brw_end_query(struct gl_context *ctx, struct gl_query_object *q)
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_query_object *query = (struct brw_query_object *)q;
- UNUSED const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- assert(devinfo->ver < 6);
-
- switch (query->Base.Target) {
- case GL_TIME_ELAPSED_EXT:
- /* Write the final timestamp. */
- brw_write_timestamp(brw, query->bo, 1);
- break;
-
- case GL_ANY_SAMPLES_PASSED:
- case GL_ANY_SAMPLES_PASSED_CONSERVATIVE:
- case GL_SAMPLES_PASSED_ARB:
-
- /* No query->bo means that EndQuery was called after BeginQuery with no
- * intervening drawing. Rather than doing nothing at all here in this
- * case, we emit the query_begin and query_end state to the
- * hardware. This is to guarantee that waiting on the result of this
- * empty state will cause all previous queries to complete at all, as
- * required by the OpenGL 4.3 (Core Profile) spec, section 4.2.1:
- *
- * "It must always be true that if any query object returns
- * a result available of TRUE, all queries of the same type
- * issued prior to that query must also return TRUE."
- */
- if (!query->bo) {
- brw_emit_query_begin(brw);
- }
-
- assert(query->bo);
-
- brw_emit_query_end(brw);
-
- brw->query.obj = NULL;
-
- brw->stats_wm--;
- brw->ctx.NewDriverState |= BRW_NEW_STATS_WM;
- break;
-
- default:
- unreachable("Unrecognized query target in brw_end_query()");
- }
-}
-
-/**
- * The Gfx4-5 WaitQuery() driver hook.
- *
- * Wait for a query result to become available and return it. This is the
- * backing for glGetQueryObjectiv() with the GL_QUERY_RESULT pname.
- */
-static void brw_wait_query(struct gl_context *ctx, struct gl_query_object *q)
-{
- struct brw_query_object *query = (struct brw_query_object *)q;
- UNUSED const struct intel_device_info *devinfo =
- &brw_context(ctx)->screen->devinfo;
-
- assert(devinfo->ver < 6);
-
- brw_queryobj_get_results(ctx, query);
- query->Base.Ready = true;
-}
-
-/**
- * The Gfx4-5 CheckQuery() driver hook.
- *
- * Checks whether a query result is ready yet. If not, flushes.
- * This is the backing for glGetQueryObjectiv()'s QUERY_RESULT_AVAILABLE pname.
- */
-static void brw_check_query(struct gl_context *ctx, struct gl_query_object *q)
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_query_object *query = (struct brw_query_object *)q;
- UNUSED const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- assert(devinfo->ver < 6);
-
- /* From the GL_ARB_occlusion_query spec:
- *
- * "Instead of allowing for an infinite loop, performing a
- * QUERY_RESULT_AVAILABLE_ARB will perform a flush if the result is
- * not ready yet on the first time it is queried. This ensures that
- * the async query will return true in finite time.
- */
- if (query->bo && brw_batch_references(&brw->batch, query->bo))
- brw_batch_flush(brw);
-
- if (query->bo == NULL || !brw_bo_busy(query->bo)) {
- brw_queryobj_get_results(ctx, query);
- query->Base.Ready = true;
- }
-}
-
-/**
- * Ensure there query's BO has enough space to store a new pair of values.
- *
- * If not, gather the existing BO's results and create a new buffer of the
- * same size.
- */
-static void
-ensure_bo_has_space(struct gl_context *ctx, struct brw_query_object *query)
-{
- struct brw_context *brw = brw_context(ctx);
- UNUSED const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- assert(devinfo->ver < 6);
-
- if (!query->bo || query->last_index * 2 + 1 >= 4096 / sizeof(uint64_t)) {
-
- if (query->bo != NULL) {
- /* The old query BO did not have enough space, so we allocated a new
- * one. Gather the results so far (adding up the differences) and
- * release the old BO.
- */
- brw_queryobj_get_results(ctx, query);
- }
-
- query->bo = brw_bo_alloc(brw->bufmgr, "query", 4096, BRW_MEMZONE_OTHER);
- query->last_index = 0;
- }
-}
-
-/**
- * Record the PS_DEPTH_COUNT value (for occlusion queries) just before
- * primitive drawing.
- *
- * In a pre-hardware context world, the single PS_DEPTH_COUNT register is
- * shared among all applications using the GPU. However, our query value
- * needs to only include fragments generated by our application/GL context.
- *
- * To accommodate this, we record PS_DEPTH_COUNT at the start and end of
- * each batchbuffer (technically, the first primitive drawn and flush time).
- * Subtracting each pair of values calculates the change in PS_DEPTH_COUNT
- * caused by a batchbuffer. Since there is no preemption inside batches,
- * this is guaranteed to only measure the effects of our current application.
- *
- * Adding each of these differences (in case drawing is done over many batches)
- * produces the final expected value.
- *
- * In a world with hardware contexts, PS_DEPTH_COUNT is saved and restored
- * as part of the context state, so this is unnecessary, and skipped.
- */
-void
-brw_emit_query_begin(struct brw_context *brw)
-{
- struct gl_context *ctx = &brw->ctx;
- struct brw_query_object *query = brw->query.obj;
-
- /* Skip if we're not doing any queries, or we've already recorded the
- * initial query value for this batchbuffer.
- */
- if (!query || brw->query.begin_emitted)
- return;
-
- ensure_bo_has_space(ctx, query);
-
- brw_write_depth_count(brw, query->bo, query->last_index * 2);
-
- brw->query.begin_emitted = true;
-}
-
-/**
- * Called at batchbuffer flush to get an ending PS_DEPTH_COUNT
- * (for non-hardware context platforms).
- *
- * See the explanation in brw_emit_query_begin().
- */
-void
-brw_emit_query_end(struct brw_context *brw)
-{
- struct brw_query_object *query = brw->query.obj;
-
- if (!brw->query.begin_emitted)
- return;
-
- brw_write_depth_count(brw, query->bo, query->last_index * 2 + 1);
-
- brw->query.begin_emitted = false;
- query->last_index++;
-}
-
-/**
- * Driver hook for glQueryCounter().
- *
- * This handles GL_TIMESTAMP queries, which perform a pipelined read of the
- * current GPU time. This is unlike GL_TIME_ELAPSED, which measures the
- * time while the query is active.
- */
-void
-brw_query_counter(struct gl_context *ctx, struct gl_query_object *q)
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_query_object *query = (struct brw_query_object *) q;
-
- assert(q->Target == GL_TIMESTAMP);
-
- brw_bo_unreference(query->bo);
- query->bo =
- brw_bo_alloc(brw->bufmgr, "timestamp query", 4096, BRW_MEMZONE_OTHER);
- brw_write_timestamp(brw, query->bo, 0);
-
- query->flushed = false;
-}
-
-/**
- * Read the TIMESTAMP register immediately (in a non-pipelined fashion).
- *
- * This is used to implement the GetTimestamp() driver hook.
- */
-static uint64_t
-brw_get_timestamp(struct gl_context *ctx)
-{
- struct brw_context *brw = brw_context(ctx);
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- uint64_t result = 0;
-
- switch (brw->screen->hw_has_timestamp) {
- case 3: /* New kernel, always full 36bit accuracy */
- brw_reg_read(brw->bufmgr, TIMESTAMP | 1, &result);
- break;
- case 2: /* 64bit kernel, result is left-shifted by 32bits, losing 4bits */
- brw_reg_read(brw->bufmgr, TIMESTAMP, &result);
- result = result >> 32;
- break;
- case 1: /* 32bit kernel, result is 36bit wide but may be inaccurate! */
- brw_reg_read(brw->bufmgr, TIMESTAMP, &result);
- break;
- }
-
- /* Scale to nanosecond units */
- result = intel_device_info_timebase_scale(devinfo, result);
-
- /* Ensure the scaled timestamp overflows according to
- * GL_QUERY_COUNTER_BITS. Technically this isn't required if
- * querying GL_TIMESTAMP via glGetInteger but it seems best to keep
- * QueryObject and GetInteger timestamps consistent.
- */
- result &= (1ull << ctx->Const.QueryCounterBits.Timestamp) - 1;
- return result;
-}
-
-/**
- * Is this type of query written by PIPE_CONTROL?
- */
-bool
-brw_is_query_pipelined(struct brw_query_object *query)
-{
- switch (query->Base.Target) {
- case GL_TIMESTAMP:
- case GL_TIME_ELAPSED:
- case GL_ANY_SAMPLES_PASSED:
- case GL_ANY_SAMPLES_PASSED_CONSERVATIVE:
- case GL_SAMPLES_PASSED_ARB:
- return true;
-
- case GL_PRIMITIVES_GENERATED:
- case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
- case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB:
- case GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB:
- case GL_VERTICES_SUBMITTED_ARB:
- case GL_PRIMITIVES_SUBMITTED_ARB:
- case GL_VERTEX_SHADER_INVOCATIONS_ARB:
- case GL_GEOMETRY_SHADER_INVOCATIONS:
- case GL_GEOMETRY_SHADER_PRIMITIVES_EMITTED_ARB:
- case GL_FRAGMENT_SHADER_INVOCATIONS_ARB:
- case GL_CLIPPING_INPUT_PRIMITIVES_ARB:
- case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB:
- case GL_COMPUTE_SHADER_INVOCATIONS_ARB:
- case GL_TESS_CONTROL_SHADER_PATCHES_ARB:
- case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB:
- return false;
-
- default:
- unreachable("Unrecognized query target in is_query_pipelined()");
- }
-}
-
-/* Initialize query object functions used on all generations. */
-void brw_init_common_queryobj_functions(struct dd_function_table *functions)
-{
- functions->NewQueryObject = brw_new_query_object;
- functions->DeleteQuery = brw_delete_query;
- functions->GetTimestamp = brw_get_timestamp;
-}
-
-/* Initialize Gfx4/5-specific query object functions. */
-void gfx4_init_queryobj_functions(struct dd_function_table *functions)
-{
- functions->BeginQuery = brw_begin_query;
- functions->EndQuery = brw_end_query;
- functions->CheckQuery = brw_check_query;
- functions->WaitQuery = brw_wait_query;
- functions->QueryCounter = brw_query_counter;
-}
+++ /dev/null
-/*
- * Copyright © 2012 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-#include "main/context.h"
-
-#include <xf86drm.h>
-#include "brw_context.h"
-
-/**
- * Query information about GPU resets observed by this context
- *
- * Called via \c dd_function_table::GetGraphicsResetStatus.
- */
-GLenum
-brw_get_graphics_reset_status(struct gl_context *ctx)
-{
- struct brw_context *brw = brw_context(ctx);
- struct drm_i915_reset_stats stats = { .ctx_id = brw->hw_ctx };
-
- /* If hardware contexts are not being used (or
- * DRM_IOCTL_I915_GET_RESET_STATS is not supported), this function should
- * not be accessible.
- */
- assert(brw->hw_ctx != 0);
-
- /* A reset status other than NO_ERROR was returned last time. I915 returns
- * nonzero active/pending only if reset has been encountered and completed.
- * Return NO_ERROR from now on.
- */
- if (brw->reset_count != 0)
- return GL_NO_ERROR;
-
- if (drmIoctl(brw->screen->fd, DRM_IOCTL_I915_GET_RESET_STATS, &stats) != 0)
- return GL_NO_ERROR;
-
- /* A reset was observed while a batch from this context was executing.
- * Assume that this context was at fault.
- */
- if (stats.batch_active != 0) {
- brw->reset_count = stats.reset_count;
- return GL_GUILTY_CONTEXT_RESET_ARB;
- }
-
- /* A reset was observed while a batch from this context was in progress,
- * but the batch was not executing. In this case, assume that the context
- * was not at fault.
- */
- if (stats.batch_pending != 0) {
- brw->reset_count = stats.reset_count;
- return GL_INNOCENT_CONTEXT_RESET_ARB;
- }
-
- return GL_NO_ERROR;
-}
-
-void
-brw_check_for_reset(struct brw_context *brw)
-{
- struct drm_i915_reset_stats stats = { .ctx_id = brw->hw_ctx };
-
- if (drmIoctl(brw->screen->fd, DRM_IOCTL_I915_GET_RESET_STATS, &stats) != 0)
- return;
-
- if (stats.batch_active > 0 || stats.batch_pending > 0)
- _mesa_set_context_lost_dispatch(&brw->ctx);
-}
+++ /dev/null
-/*
- * Copyright 2003 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "drm-uapi/drm_fourcc.h"
-#include <errno.h>
-#include <time.h>
-#include <unistd.h>
-#include "main/context.h"
-#include "main/framebuffer.h"
-#include "main/renderbuffer.h"
-#include "main/texobj.h"
-#include "main/hash.h"
-#include "main/fbobject.h"
-#include "main/version.h"
-#include "main/glthread.h"
-#include "swrast/s_renderbuffer.h"
-#include "util/ralloc.h"
-#include "util/disk_cache.h"
-#include "brw_defines.h"
-#include "brw_state.h"
-#include "compiler/nir/nir.h"
-
-#include "utils.h"
-#include "util/disk_cache.h"
-#include "util/driconf.h"
-#include "util/u_cpu_detect.h"
-#include "util/u_memory.h"
-
-#include "common/intel_defines.h"
-
-static const driOptionDescription brw_driconf[] = {
- DRI_CONF_SECTION_PERFORMANCE
- /* Options correspond to DRI_CONF_BO_REUSE_DISABLED,
- * DRI_CONF_BO_REUSE_ALL
- */
- DRI_CONF_OPT_E(bo_reuse, 1, 0, 1,
- "Buffer object reuse",
- DRI_CONF_ENUM(0, "Disable buffer object reuse")
- DRI_CONF_ENUM(1, "Enable reuse of all sizes of buffer objects"))
- DRI_CONF_MESA_NO_ERROR(false)
- DRI_CONF_MESA_GLTHREAD(false)
- DRI_CONF_SECTION_END
-
- DRI_CONF_SECTION_QUALITY
- DRI_CONF_PRECISE_TRIG(false)
-
- DRI_CONF_OPT_I(clamp_max_samples, -1, 0, 0,
- "Clamp the value of GL_MAX_SAMPLES to the "
- "given integer. If negative, then do not clamp.")
- DRI_CONF_SECTION_END
-
- DRI_CONF_SECTION_DEBUG
- DRI_CONF_ALWAYS_FLUSH_BATCH(false)
- DRI_CONF_ALWAYS_FLUSH_CACHE(false)
- DRI_CONF_DISABLE_THROTTLING(false)
- DRI_CONF_FORCE_GLSL_EXTENSIONS_WARN(false)
- DRI_CONF_FORCE_GLSL_VERSION(0)
- DRI_CONF_DISABLE_GLSL_LINE_CONTINUATIONS(false)
- DRI_CONF_DISABLE_BLEND_FUNC_EXTENDED(false)
- DRI_CONF_DUAL_COLOR_BLEND_BY_LOCATION(false)
- DRI_CONF_ALLOW_EXTRA_PP_TOKENS(false)
- DRI_CONF_ALLOW_GLSL_EXTENSION_DIRECTIVE_MIDSHADER(false)
- DRI_CONF_ALLOW_GLSL_BUILTIN_VARIABLE_REDECLARATION(false)
- DRI_CONF_ALLOW_GLSL_CROSS_STAGE_INTERPOLATION_MISMATCH(false)
- DRI_CONF_ALLOW_HIGHER_COMPAT_VERSION(false)
- DRI_CONF_FORCE_COMPAT_PROFILE(false)
- DRI_CONF_FORCE_GLSL_ABS_SQRT(false)
- DRI_CONF_FORCE_GL_VENDOR()
-
- DRI_CONF_OPT_B(shader_precompile, true, "Perform code generation at shader link time.")
- DRI_CONF_SECTION_END
-
- DRI_CONF_SECTION_MISCELLANEOUS
- DRI_CONF_GLSL_ZERO_INIT(false)
- DRI_CONF_VS_POSITION_ALWAYS_INVARIANT(false)
- DRI_CONF_VS_POSITION_ALWAYS_PRECISE(false)
- DRI_CONF_ALLOW_RGB10_CONFIGS(false)
- DRI_CONF_ALLOW_RGB565_CONFIGS(true)
- DRI_CONF_SECTION_END
-};
-
-static char *
-brw_driconf_get_xml(UNUSED const char *driver_name)
-{
- return driGetOptionsXml(brw_driconf, ARRAY_SIZE(brw_driconf));
-}
-
-static const __DRIconfigOptionsExtension brw_config_options = {
- .base = { __DRI_CONFIG_OPTIONS, 2 },
- .xml = NULL,
- .getXml = brw_driconf_get_xml,
-};
-
-#include "brw_batch.h"
-#include "brw_buffers.h"
-#include "brw_bufmgr.h"
-#include "brw_fbo.h"
-#include "brw_mipmap_tree.h"
-#include "brw_screen.h"
-#include "brw_tex.h"
-#include "brw_image.h"
-
-#include "brw_context.h"
-
-#include "drm-uapi/i915_drm.h"
-
-/**
- * For debugging purposes, this returns a time in seconds.
- */
-double
-get_time(void)
-{
- struct timespec tp;
-
- clock_gettime(CLOCK_MONOTONIC, &tp);
-
- return tp.tv_sec + tp.tv_nsec / 1000000000.0;
-}
-
-static const __DRItexBufferExtension brwTexBufferExtension = {
- .base = { __DRI_TEX_BUFFER, 3 },
-
- .setTexBuffer = brw_set_texbuffer,
- .setTexBuffer2 = brw_set_texbuffer2,
- .releaseTexBuffer = brw_release_texbuffer,
-};
-
-static void
-brw_dri2_flush_with_flags(__DRIcontext *cPriv,
- __DRIdrawable *dPriv,
- unsigned flags,
- enum __DRI2throttleReason reason)
-{
- struct brw_context *brw = cPriv->driverPrivate;
-
- if (!brw)
- return;
-
- struct gl_context *ctx = &brw->ctx;
-
- _mesa_glthread_finish(ctx);
-
- FLUSH_VERTICES(ctx, 0, 0);
-
- if (flags & __DRI2_FLUSH_DRAWABLE)
- brw_resolve_for_dri2_flush(brw, dPriv);
-
- if (reason == __DRI2_THROTTLE_SWAPBUFFER)
- brw->need_swap_throttle = true;
- if (reason == __DRI2_THROTTLE_FLUSHFRONT)
- brw->need_flush_throttle = true;
-
- brw_batch_flush(brw);
-}
-
-/**
- * Provides compatibility with loaders that only support the older (version
- * 1-3) flush interface.
- *
- * That includes libGL up to Mesa 9.0, and the X Server at least up to 1.13.
- */
-static void
-brw_dri2_flush(__DRIdrawable *drawable)
-{
- brw_dri2_flush_with_flags(drawable->driContextPriv, drawable,
- __DRI2_FLUSH_DRAWABLE,
- __DRI2_THROTTLE_SWAPBUFFER);
-}
-
-static const struct __DRI2flushExtensionRec brwFlushExtension = {
- .base = { __DRI2_FLUSH, 4 },
-
- .flush = brw_dri2_flush,
- .invalidate = dri2InvalidateDrawable,
- .flush_with_flags = brw_dri2_flush_with_flags,
-};
-
-static const struct brw_image_format brw_image_formats[] = {
- { DRM_FORMAT_ABGR16161616F, __DRI_IMAGE_COMPONENTS_RGBA, 1,
- { { 0, 0, 0, __DRI_IMAGE_FORMAT_ABGR16161616F, 8 } } },
-
- { DRM_FORMAT_XBGR16161616F, __DRI_IMAGE_COMPONENTS_RGB, 1,
- { { 0, 0, 0, __DRI_IMAGE_FORMAT_XBGR16161616F, 8 } } },
-
- { DRM_FORMAT_ARGB2101010, __DRI_IMAGE_COMPONENTS_RGBA, 1,
- { { 0, 0, 0, __DRI_IMAGE_FORMAT_ARGB2101010, 4 } } },
-
- { DRM_FORMAT_XRGB2101010, __DRI_IMAGE_COMPONENTS_RGB, 1,
- { { 0, 0, 0, __DRI_IMAGE_FORMAT_XRGB2101010, 4 } } },
-
- { DRM_FORMAT_ABGR2101010, __DRI_IMAGE_COMPONENTS_RGBA, 1,
- { { 0, 0, 0, __DRI_IMAGE_FORMAT_ABGR2101010, 4 } } },
-
- { DRM_FORMAT_XBGR2101010, __DRI_IMAGE_COMPONENTS_RGB, 1,
- { { 0, 0, 0, __DRI_IMAGE_FORMAT_XBGR2101010, 4 } } },
-
- { DRM_FORMAT_ARGB8888, __DRI_IMAGE_COMPONENTS_RGBA, 1,
- { { 0, 0, 0, __DRI_IMAGE_FORMAT_ARGB8888, 4 } } },
-
- { DRM_FORMAT_ABGR8888, __DRI_IMAGE_COMPONENTS_RGBA, 1,
- { { 0, 0, 0, __DRI_IMAGE_FORMAT_ABGR8888, 4 } } },
-
- { __DRI_IMAGE_FOURCC_SARGB8888, __DRI_IMAGE_COMPONENTS_RGBA, 1,
- { { 0, 0, 0, __DRI_IMAGE_FORMAT_SARGB8, 4 } } },
-
- { __DRI_IMAGE_FOURCC_SXRGB8888, __DRI_IMAGE_COMPONENTS_RGB, 1,
- { { 0, 0, 0, __DRI_IMAGE_FORMAT_SXRGB8, 4 } } },
-
- { DRM_FORMAT_XRGB8888, __DRI_IMAGE_COMPONENTS_RGB, 1,
- { { 0, 0, 0, __DRI_IMAGE_FORMAT_XRGB8888, 4 }, } },
-
- { DRM_FORMAT_XBGR8888, __DRI_IMAGE_COMPONENTS_RGB, 1,
- { { 0, 0, 0, __DRI_IMAGE_FORMAT_XBGR8888, 4 }, } },
-
- { DRM_FORMAT_ARGB1555, __DRI_IMAGE_COMPONENTS_RGBA, 1,
- { { 0, 0, 0, __DRI_IMAGE_FORMAT_ARGB1555, 2 } } },
-
- { DRM_FORMAT_RGB565, __DRI_IMAGE_COMPONENTS_RGB, 1,
- { { 0, 0, 0, __DRI_IMAGE_FORMAT_RGB565, 2 } } },
-
- { DRM_FORMAT_R8, __DRI_IMAGE_COMPONENTS_R, 1,
- { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 }, } },
-
- { DRM_FORMAT_R16, __DRI_IMAGE_COMPONENTS_R, 1,
- { { 0, 0, 0, __DRI_IMAGE_FORMAT_R16, 1 }, } },
-
- { DRM_FORMAT_GR88, __DRI_IMAGE_COMPONENTS_RG, 1,
- { { 0, 0, 0, __DRI_IMAGE_FORMAT_GR88, 2 }, } },
-
- { DRM_FORMAT_GR1616, __DRI_IMAGE_COMPONENTS_RG, 1,
- { { 0, 0, 0, __DRI_IMAGE_FORMAT_GR1616, 2 }, } },
-
- { DRM_FORMAT_YUV410, __DRI_IMAGE_COMPONENTS_Y_U_V, 3,
- { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
- { 1, 2, 2, __DRI_IMAGE_FORMAT_R8, 1 },
- { 2, 2, 2, __DRI_IMAGE_FORMAT_R8, 1 } } },
-
- { DRM_FORMAT_YUV411, __DRI_IMAGE_COMPONENTS_Y_U_V, 3,
- { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
- { 1, 2, 0, __DRI_IMAGE_FORMAT_R8, 1 },
- { 2, 2, 0, __DRI_IMAGE_FORMAT_R8, 1 } } },
-
- { DRM_FORMAT_YUV420, __DRI_IMAGE_COMPONENTS_Y_U_V, 3,
- { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
- { 1, 1, 1, __DRI_IMAGE_FORMAT_R8, 1 },
- { 2, 1, 1, __DRI_IMAGE_FORMAT_R8, 1 } } },
-
- { DRM_FORMAT_YUV422, __DRI_IMAGE_COMPONENTS_Y_U_V, 3,
- { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
- { 1, 1, 0, __DRI_IMAGE_FORMAT_R8, 1 },
- { 2, 1, 0, __DRI_IMAGE_FORMAT_R8, 1 } } },
-
- { DRM_FORMAT_YUV444, __DRI_IMAGE_COMPONENTS_Y_U_V, 3,
- { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
- { 1, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
- { 2, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 } } },
-
- { DRM_FORMAT_YVU410, __DRI_IMAGE_COMPONENTS_Y_U_V, 3,
- { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
- { 2, 2, 2, __DRI_IMAGE_FORMAT_R8, 1 },
- { 1, 2, 2, __DRI_IMAGE_FORMAT_R8, 1 } } },
-
- { DRM_FORMAT_YVU411, __DRI_IMAGE_COMPONENTS_Y_U_V, 3,
- { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
- { 2, 2, 0, __DRI_IMAGE_FORMAT_R8, 1 },
- { 1, 2, 0, __DRI_IMAGE_FORMAT_R8, 1 } } },
-
- { DRM_FORMAT_YVU420, __DRI_IMAGE_COMPONENTS_Y_U_V, 3,
- { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
- { 2, 1, 1, __DRI_IMAGE_FORMAT_R8, 1 },
- { 1, 1, 1, __DRI_IMAGE_FORMAT_R8, 1 } } },
-
- { DRM_FORMAT_YVU422, __DRI_IMAGE_COMPONENTS_Y_U_V, 3,
- { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
- { 2, 1, 0, __DRI_IMAGE_FORMAT_R8, 1 },
- { 1, 1, 0, __DRI_IMAGE_FORMAT_R8, 1 } } },
-
- { DRM_FORMAT_YVU444, __DRI_IMAGE_COMPONENTS_Y_U_V, 3,
- { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
- { 2, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
- { 1, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 } } },
-
- { DRM_FORMAT_NV12, __DRI_IMAGE_COMPONENTS_Y_UV, 2,
- { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
- { 1, 1, 1, __DRI_IMAGE_FORMAT_GR88, 2 } } },
-
- { DRM_FORMAT_P010, __DRI_IMAGE_COMPONENTS_Y_UV, 2,
- { { 0, 0, 0, __DRI_IMAGE_FORMAT_R16, 2 },
- { 1, 1, 1, __DRI_IMAGE_FORMAT_GR1616, 4 } } },
-
- { DRM_FORMAT_P012, __DRI_IMAGE_COMPONENTS_Y_UV, 2,
- { { 0, 0, 0, __DRI_IMAGE_FORMAT_R16, 2 },
- { 1, 1, 1, __DRI_IMAGE_FORMAT_GR1616, 4 } } },
-
- { DRM_FORMAT_P016, __DRI_IMAGE_COMPONENTS_Y_UV, 2,
- { { 0, 0, 0, __DRI_IMAGE_FORMAT_R16, 2 },
- { 1, 1, 1, __DRI_IMAGE_FORMAT_GR1616, 4 } } },
-
- { DRM_FORMAT_NV16, __DRI_IMAGE_COMPONENTS_Y_UV, 2,
- { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
- { 1, 1, 0, __DRI_IMAGE_FORMAT_GR88, 2 } } },
-
- { DRM_FORMAT_AYUV, __DRI_IMAGE_COMPONENTS_AYUV, 1,
- { { 0, 0, 0, __DRI_IMAGE_FORMAT_ABGR8888, 4 } } },
-
- { DRM_FORMAT_XYUV8888, __DRI_IMAGE_COMPONENTS_XYUV, 1,
- { { 0, 0, 0, __DRI_IMAGE_FORMAT_XBGR8888, 4 } } },
-
- /* For YUYV and UYVY buffers, we set up two overlapping DRI images
- * and treat them as planar buffers in the compositors.
- * Plane 0 is GR88 and samples YU or YV pairs and places Y into
- * the R component, while plane 1 is ARGB/ABGR and samples YUYV/UYVY
- * clusters and places pairs and places U into the G component and
- * V into A. This lets the texture sampler interpolate the Y
- * components correctly when sampling from plane 0, and interpolate
- * U and V correctly when sampling from plane 1. */
- { DRM_FORMAT_YUYV, __DRI_IMAGE_COMPONENTS_Y_XUXV, 2,
- { { 0, 0, 0, __DRI_IMAGE_FORMAT_GR88, 2 },
- { 0, 1, 0, __DRI_IMAGE_FORMAT_ARGB8888, 4 } } },
- { DRM_FORMAT_UYVY, __DRI_IMAGE_COMPONENTS_Y_UXVX, 2,
- { { 0, 0, 0, __DRI_IMAGE_FORMAT_GR88, 2 },
- { 0, 1, 0, __DRI_IMAGE_FORMAT_ABGR8888, 4 } } }
-};
-
-static const struct {
- uint64_t modifier;
- unsigned since_ver;
-} supported_modifiers[] = {
- { .modifier = DRM_FORMAT_MOD_LINEAR , .since_ver = 1 },
- { .modifier = I915_FORMAT_MOD_X_TILED , .since_ver = 1 },
- { .modifier = I915_FORMAT_MOD_Y_TILED , .since_ver = 6 },
- { .modifier = I915_FORMAT_MOD_Y_TILED_CCS , .since_ver = 9 },
-};
-
-static bool
-modifier_is_supported(const struct intel_device_info *devinfo,
- const struct brw_image_format *fmt, int dri_format,
- unsigned use, uint64_t modifier)
-{
- const struct isl_drm_modifier_info *modinfo =
- isl_drm_modifier_get_info(modifier);
- int i;
-
- /* ISL had better know about the modifier */
- if (!modinfo)
- return false;
-
- if (devinfo->ver < 9 && (use & __DRI_IMAGE_USE_SCANOUT) &&
- !(modinfo->tiling == ISL_TILING_LINEAR ||
- modinfo->tiling == ISL_TILING_X))
- return false;
-
- if (modinfo->aux_usage == ISL_AUX_USAGE_CCS_E) {
- /* If INTEL_DEBUG=norbc is set, don't support any CCS_E modifiers */
- if (INTEL_DEBUG(DEBUG_NO_RBC))
- return false;
-
- /* CCS_E is not supported for planar images */
- if (fmt && fmt->nplanes > 1)
- return false;
-
- if (fmt) {
- assert(dri_format == 0);
- dri_format = fmt->planes[0].dri_format;
- }
-
- mesa_format format = driImageFormatToGLFormat(dri_format);
- /* Whether or not we support compression is based on the RGBA non-sRGB
- * version of the format.
- */
- format = _mesa_format_fallback_rgbx_to_rgba(format);
- format = _mesa_get_srgb_format_linear(format);
- if (!isl_format_supports_ccs_e(devinfo,
- brw_isl_format_for_mesa_format(format)))
- return false;
- }
-
- for (i = 0; i < ARRAY_SIZE(supported_modifiers); i++) {
- if (supported_modifiers[i].modifier != modifier)
- continue;
-
- return supported_modifiers[i].since_ver <= devinfo->ver;
- }
-
- return false;
-}
-
-static uint64_t
-tiling_to_modifier(uint32_t tiling)
-{
- static const uint64_t map[] = {
- [I915_TILING_NONE] = DRM_FORMAT_MOD_LINEAR,
- [I915_TILING_X] = I915_FORMAT_MOD_X_TILED,
- [I915_TILING_Y] = I915_FORMAT_MOD_Y_TILED,
- };
-
- assert(tiling < ARRAY_SIZE(map));
-
- return map[tiling];
-}
-
-static void
-brw_image_warn_if_unaligned(__DRIimage *image, const char *func)
-{
- uint32_t tiling, swizzle;
- brw_bo_get_tiling(image->bo, &tiling, &swizzle);
-
- if (tiling != I915_TILING_NONE && (image->offset & 0xfff)) {
- _mesa_warning(NULL, "%s: offset 0x%08x not on tile boundary",
- func, image->offset);
- }
-}
-
-static const struct brw_image_format *
-brw_image_format_lookup(int fourcc)
-{
- for (unsigned i = 0; i < ARRAY_SIZE(brw_image_formats); i++) {
- if (brw_image_formats[i].fourcc == fourcc)
- return &brw_image_formats[i];
- }
-
- return NULL;
-}
-
-static bool
-brw_image_get_fourcc(__DRIimage *image, int *fourcc)
-{
- if (image->planar_format) {
- *fourcc = image->planar_format->fourcc;
- return true;
- }
-
- for (unsigned i = 0; i < ARRAY_SIZE(brw_image_formats); i++) {
- if (brw_image_formats[i].planes[0].dri_format == image->dri_format) {
- *fourcc = brw_image_formats[i].fourcc;
- return true;
- }
- }
- return false;
-}
-
-static __DRIimage *
-brw_allocate_image(struct brw_screen *screen, int dri_format,
- void *loaderPrivate)
-{
- __DRIimage *image;
-
- image = calloc(1, sizeof *image);
- if (image == NULL)
- return NULL;
-
- image->screen = screen;
- image->dri_format = dri_format;
- image->offset = 0;
-
- image->format = driImageFormatToGLFormat(dri_format);
- if (dri_format != __DRI_IMAGE_FORMAT_NONE &&
- image->format == MESA_FORMAT_NONE) {
- free(image);
- return NULL;
- }
-
- image->internal_format = _mesa_get_format_base_format(image->format);
- image->driScrnPriv = screen->driScrnPriv;
- image->loader_private = loaderPrivate;
-
- return image;
-}
-
-/**
- * Sets up a DRIImage structure to point to a slice out of a miptree.
- */
-static void
-brw_setup_image_from_mipmap_tree(struct brw_context *brw, __DRIimage *image,
- struct brw_mipmap_tree *mt, GLuint level,
- GLuint zoffset)
-{
- brw_miptree_make_shareable(brw, mt);
-
- brw_miptree_check_level_layer(mt, level, zoffset);
-
- image->width = minify(mt->surf.phys_level0_sa.width,
- level - mt->first_level);
- image->height = minify(mt->surf.phys_level0_sa.height,
- level - mt->first_level);
- image->pitch = mt->surf.row_pitch_B;
-
- image->offset = brw_miptree_get_tile_offsets(mt, level, zoffset,
- &image->tile_x,
- &image->tile_y);
-
- brw_bo_unreference(image->bo);
- image->bo = mt->bo;
- brw_bo_reference(mt->bo);
-}
-
-static __DRIimage *
-brw_create_image_from_name(__DRIscreen *dri_screen,
- int width, int height, int format,
- int name, int pitch, void *loaderPrivate)
-{
- struct brw_screen *screen = dri_screen->driverPrivate;
- __DRIimage *image;
- int cpp;
-
- image = brw_allocate_image(screen, format, loaderPrivate);
- if (image == NULL)
- return NULL;
-
- if (image->format == MESA_FORMAT_NONE)
- cpp = 1;
- else
- cpp = _mesa_get_format_bytes(image->format);
-
- image->width = width;
- image->height = height;
- image->pitch = pitch * cpp;
- image->bo = brw_bo_gem_create_from_name(screen->bufmgr, "image",
- name);
- if (!image->bo) {
- free(image);
- return NULL;
- }
- image->modifier = tiling_to_modifier(image->bo->tiling_mode);
-
- return image;
-}
-
-static __DRIimage *
-brw_create_image_from_renderbuffer(__DRIcontext *context,
- int renderbuffer, void *loaderPrivate)
-{
- __DRIimage *image;
- struct brw_context *brw = context->driverPrivate;
- struct gl_context *ctx = &brw->ctx;
- struct gl_renderbuffer *rb;
- struct brw_renderbuffer *irb;
-
- rb = _mesa_lookup_renderbuffer(ctx, renderbuffer);
- if (!rb) {
- _mesa_error(ctx, GL_INVALID_OPERATION, "glRenderbufferExternalMESA");
- return NULL;
- }
-
- irb = brw_renderbuffer(rb);
- brw_miptree_make_shareable(brw, irb->mt);
- image = calloc(1, sizeof *image);
- if (image == NULL)
- return NULL;
-
- image->internal_format = rb->InternalFormat;
- image->format = rb->Format;
- image->modifier = tiling_to_modifier(
- isl_tiling_to_i915_tiling(irb->mt->surf.tiling));
- image->offset = 0;
- image->driScrnPriv = context->driScreenPriv;
- image->loader_private = loaderPrivate;
- brw_bo_unreference(image->bo);
- image->bo = irb->mt->bo;
- brw_bo_reference(irb->mt->bo);
- image->width = rb->Width;
- image->height = rb->Height;
- image->pitch = irb->mt->surf.row_pitch_B;
- image->dri_format = driGLFormatToImageFormat(image->format);
- image->has_depthstencil = irb->mt->stencil_mt? true : false;
-
- rb->NeedsFinishRenderTexture = true;
- return image;
-}
-
-static __DRIimage *
-brw_create_image_from_texture(__DRIcontext *context, int target,
- unsigned texture, int zoffset,
- int level,
- unsigned *error,
- void *loaderPrivate)
-{
- __DRIimage *image;
- struct brw_context *brw = context->driverPrivate;
- struct gl_texture_object *obj;
- struct brw_texture_object *iobj;
- GLuint face = 0;
-
- obj = _mesa_lookup_texture(&brw->ctx, texture);
- if (!obj || obj->Target != target) {
- *error = __DRI_IMAGE_ERROR_BAD_PARAMETER;
- return NULL;
- }
-
- if (target == GL_TEXTURE_CUBE_MAP)
- face = zoffset;
-
- _mesa_test_texobj_completeness(&brw->ctx, obj);
- iobj = brw_texture_object(obj);
- if (!obj->_BaseComplete || (level > 0 && !obj->_MipmapComplete)) {
- *error = __DRI_IMAGE_ERROR_BAD_PARAMETER;
- return NULL;
- }
-
- if (level < obj->Attrib.BaseLevel || level > obj->_MaxLevel) {
- *error = __DRI_IMAGE_ERROR_BAD_MATCH;
- return NULL;
- }
-
- if (target == GL_TEXTURE_3D && obj->Image[face][level]->Depth < zoffset) {
- *error = __DRI_IMAGE_ERROR_BAD_MATCH;
- return NULL;
- }
- image = calloc(1, sizeof *image);
- if (image == NULL) {
- *error = __DRI_IMAGE_ERROR_BAD_ALLOC;
- return NULL;
- }
-
- image->internal_format = obj->Image[face][level]->InternalFormat;
- image->format = obj->Image[face][level]->TexFormat;
- image->modifier = tiling_to_modifier(
- isl_tiling_to_i915_tiling(iobj->mt->surf.tiling));
- image->driScrnPriv = context->driScreenPriv;
- image->loader_private = loaderPrivate;
- brw_setup_image_from_mipmap_tree(brw, image, iobj->mt, level, zoffset);
- image->dri_format = driGLFormatToImageFormat(image->format);
- image->has_depthstencil = iobj->mt->stencil_mt? true : false;
- image->planar_format = iobj->planar_format;
- if (image->dri_format == __DRI_IMAGE_FORMAT_NONE) {
- *error = __DRI_IMAGE_ERROR_BAD_PARAMETER;
- free(image);
- return NULL;
- }
-
- *error = __DRI_IMAGE_ERROR_SUCCESS;
- return image;
-}
-
-static void
-brw_destroy_image(__DRIimage *image)
-{
- const __DRIscreen * driScreen = image->driScrnPriv;
- const __DRIimageLoaderExtension *imgLoader = driScreen->image.loader;
- const __DRIdri2LoaderExtension *dri2Loader = driScreen->dri2.loader;
-
- if (imgLoader && imgLoader->base.version >= 4 &&
- imgLoader->destroyLoaderImageState) {
- imgLoader->destroyLoaderImageState(image->loader_private);
- } else if (dri2Loader && dri2Loader->base.version >= 5 &&
- dri2Loader->destroyLoaderImageState) {
- dri2Loader->destroyLoaderImageState(image->loader_private);
- }
-
- brw_bo_unreference(image->bo);
- free(image);
-}
-
-enum modifier_priority {
- MODIFIER_PRIORITY_INVALID = 0,
- MODIFIER_PRIORITY_LINEAR,
- MODIFIER_PRIORITY_X,
- MODIFIER_PRIORITY_Y,
- MODIFIER_PRIORITY_Y_CCS,
-};
-
-const uint64_t priority_to_modifier[] = {
- [MODIFIER_PRIORITY_INVALID] = DRM_FORMAT_MOD_INVALID,
- [MODIFIER_PRIORITY_LINEAR] = DRM_FORMAT_MOD_LINEAR,
- [MODIFIER_PRIORITY_X] = I915_FORMAT_MOD_X_TILED,
- [MODIFIER_PRIORITY_Y] = I915_FORMAT_MOD_Y_TILED,
- [MODIFIER_PRIORITY_Y_CCS] = I915_FORMAT_MOD_Y_TILED_CCS,
-};
-
-static uint64_t
-select_best_modifier(struct intel_device_info *devinfo,
- int dri_format,
- unsigned use,
- const uint64_t *modifiers,
- const unsigned count)
-{
- enum modifier_priority prio = MODIFIER_PRIORITY_INVALID;
-
- for (int i = 0; i < count; i++) {
- if (!modifier_is_supported(devinfo, NULL, dri_format, use, modifiers[i]))
- continue;
-
- switch (modifiers[i]) {
- case I915_FORMAT_MOD_Y_TILED_CCS:
- prio = MAX2(prio, MODIFIER_PRIORITY_Y_CCS);
- break;
- case I915_FORMAT_MOD_Y_TILED:
- prio = MAX2(prio, MODIFIER_PRIORITY_Y);
- break;
- case I915_FORMAT_MOD_X_TILED:
- prio = MAX2(prio, MODIFIER_PRIORITY_X);
- break;
- case DRM_FORMAT_MOD_LINEAR:
- prio = MAX2(prio, MODIFIER_PRIORITY_LINEAR);
- break;
- case DRM_FORMAT_MOD_INVALID:
- default:
- break;
- }
- }
-
- return priority_to_modifier[prio];
-}
-
-static __DRIimage *
-brw_create_image_common(__DRIscreen *dri_screen,
- int width, int height, int format,
- unsigned int use,
- const uint64_t *modifiers,
- unsigned count,
- void *loaderPrivate)
-{
- __DRIimage *image;
- struct brw_screen *screen = dri_screen->driverPrivate;
- uint64_t modifier = DRM_FORMAT_MOD_INVALID;
- bool ok;
-
- if (use & __DRI_IMAGE_USE_CURSOR) {
- if (width != 64 || height != 64)
- return NULL;
- modifier = DRM_FORMAT_MOD_LINEAR;
- }
-
- if (use & __DRI_IMAGE_USE_LINEAR)
- modifier = DRM_FORMAT_MOD_LINEAR;
-
- if (modifier == DRM_FORMAT_MOD_INVALID) {
- if (modifiers) {
- /* User requested specific modifiers */
- modifier = select_best_modifier(&screen->devinfo, format, use,
- modifiers, count);
- if (modifier == DRM_FORMAT_MOD_INVALID)
- return NULL;
- } else {
- /* Historically, X-tiled was the default, and so lack of modifier means
- * X-tiled.
- */
- modifier = I915_FORMAT_MOD_X_TILED;
- }
- }
-
- image = brw_allocate_image(screen, format, loaderPrivate);
- if (image == NULL)
- return NULL;
-
- const struct isl_drm_modifier_info *mod_info =
- isl_drm_modifier_get_info(modifier);
-
- struct isl_surf surf;
- ok = isl_surf_init(&screen->isl_dev, &surf,
- .dim = ISL_SURF_DIM_2D,
- .format = brw_isl_format_for_mesa_format(image->format),
- .width = width,
- .height = height,
- .depth = 1,
- .levels = 1,
- .array_len = 1,
- .samples = 1,
- .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT |
- ISL_SURF_USAGE_TEXTURE_BIT |
- ISL_SURF_USAGE_STORAGE_BIT |
- ((use & __DRI_IMAGE_USE_SCANOUT) ?
- ISL_SURF_USAGE_DISPLAY_BIT : 0),
- .tiling_flags = (1 << mod_info->tiling));
- assert(ok);
- if (!ok) {
- free(image);
- return NULL;
- }
-
- struct isl_surf aux_surf = {0,};
- if (mod_info->aux_usage == ISL_AUX_USAGE_CCS_E) {
- ok = isl_surf_get_ccs_surf(&screen->isl_dev, &surf, NULL, &aux_surf, 0);
- if (!ok) {
- free(image);
- return NULL;
- }
- } else {
- assert(mod_info->aux_usage == ISL_AUX_USAGE_NONE);
- aux_surf.size_B = 0;
- }
-
- /* We request that the bufmgr zero the buffer for us for two reasons:
- *
- * 1) If a buffer gets re-used from the pool, we don't want to leak random
- * garbage from our process to some other.
- *
- * 2) For images with CCS_E, we want to ensure that the CCS starts off in
- * a valid state. A CCS value of 0 indicates that the given block is
- * in the pass-through state which is what we want.
- */
- image->bo = brw_bo_alloc_tiled(screen->bufmgr, "image",
- surf.size_B + aux_surf.size_B,
- BRW_MEMZONE_OTHER,
- isl_tiling_to_i915_tiling(mod_info->tiling),
- surf.row_pitch_B, BO_ALLOC_ZEROED);
- if (image->bo == NULL) {
- free(image);
- return NULL;
- }
- image->width = width;
- image->height = height;
- image->pitch = surf.row_pitch_B;
- image->modifier = modifier;
-
- if (aux_surf.size_B) {
- image->aux_offset = surf.size_B;
- image->aux_pitch = aux_surf.row_pitch_B;
- image->aux_size = aux_surf.size_B;
- }
-
- return image;
-}
-
-static __DRIimage *
-brw_create_image(__DRIscreen *dri_screen,
- int width, int height, int format,
- unsigned int use,
- void *loaderPrivate)
-{
- return brw_create_image_common(dri_screen, width, height, format, use,
- NULL, 0, loaderPrivate);
-}
-
-static void *
-brw_map_image(__DRIcontext *context, __DRIimage *image,
- int x0, int y0, int width, int height,
- unsigned int flags, int *stride, void **map_info)
-{
- struct brw_context *brw = NULL;
- struct brw_bo *bo = NULL;
- void *raw_data = NULL;
- GLuint pix_w = 1;
- GLuint pix_h = 1;
- GLint pix_bytes = 1;
-
- if (!context || !image || !stride || !map_info || *map_info)
- return NULL;
-
- if (x0 < 0 || x0 >= image->width || width > image->width - x0)
- return NULL;
-
- if (y0 < 0 || y0 >= image->height || height > image->height - y0)
- return NULL;
-
- if (flags & MAP_INTERNAL_MASK)
- return NULL;
-
- brw = context->driverPrivate;
- bo = image->bo;
-
- assert(brw);
- assert(bo);
-
- /* DRI flags and GL_MAP.*_BIT flags are the same, so just pass them on. */
- raw_data = brw_bo_map(brw, bo, flags);
- if (!raw_data)
- return NULL;
-
- _mesa_get_format_block_size(image->format, &pix_w, &pix_h);
- pix_bytes = _mesa_get_format_bytes(image->format);
-
- assert(pix_w);
- assert(pix_h);
- assert(pix_bytes > 0);
-
- raw_data += (x0 / pix_w) * pix_bytes + (y0 / pix_h) * image->pitch;
-
- brw_bo_reference(bo);
-
- *stride = image->pitch;
- *map_info = bo;
-
- return raw_data;
-}
-
-static void
-brw_unmap_image(UNUSED __DRIcontext *context, UNUSED __DRIimage *image,
- void *map_info)
-{
- struct brw_bo *bo = map_info;
-
- brw_bo_unmap(bo);
- brw_bo_unreference(bo);
-}
-
-static __DRIimage *
-brw_create_image_with_modifiers(__DRIscreen *dri_screen,
- int width, int height, int format,
- const uint64_t *modifiers,
- const unsigned count,
- void *loaderPrivate)
-{
- return brw_create_image_common(dri_screen, width, height, format, 0,
- modifiers, count, loaderPrivate);
-}
-
-static __DRIimage *
-brw_create_image_with_modifiers2(__DRIscreen *dri_screen,
- int width, int height, int format,
- const uint64_t *modifiers,
- const unsigned count, unsigned int use,
- void *loaderPrivate)
-{
- return brw_create_image_common(dri_screen, width, height, format, use,
- modifiers, count, loaderPrivate);
-}
-
-static GLboolean
-brw_query_image(__DRIimage *image, int attrib, int *value)
-{
- switch (attrib) {
- case __DRI_IMAGE_ATTRIB_STRIDE:
- *value = image->pitch;
- return true;
- case __DRI_IMAGE_ATTRIB_HANDLE: {
- __DRIscreen *dri_screen = image->screen->driScrnPriv;
- uint32_t handle;
- if (brw_bo_export_gem_handle_for_device(image->bo,
- dri_screen->fd,
- &handle))
- return false;
- *value = handle;
- return true;
- }
- case __DRI_IMAGE_ATTRIB_NAME:
- return !brw_bo_flink(image->bo, (uint32_t *) value);
- case __DRI_IMAGE_ATTRIB_FORMAT:
- *value = image->dri_format;
- return true;
- case __DRI_IMAGE_ATTRIB_WIDTH:
- *value = image->width;
- return true;
- case __DRI_IMAGE_ATTRIB_HEIGHT:
- *value = image->height;
- return true;
- case __DRI_IMAGE_ATTRIB_COMPONENTS:
- if (image->planar_format == NULL)
- return false;
- *value = image->planar_format->components;
- return true;
- case __DRI_IMAGE_ATTRIB_FD:
- return !brw_bo_gem_export_to_prime(image->bo, value);
- case __DRI_IMAGE_ATTRIB_FOURCC:
- return brw_image_get_fourcc(image, value);
- case __DRI_IMAGE_ATTRIB_NUM_PLANES:
- if (isl_drm_modifier_has_aux(image->modifier)) {
- assert(!image->planar_format || image->planar_format->nplanes == 1);
- *value = 2;
- } else if (image->planar_format) {
- *value = image->planar_format->nplanes;
- } else {
- *value = 1;
- }
- return true;
- case __DRI_IMAGE_ATTRIB_OFFSET:
- *value = image->offset;
- return true;
- case __DRI_IMAGE_ATTRIB_MODIFIER_LOWER:
- *value = (image->modifier & 0xffffffff);
- return true;
- case __DRI_IMAGE_ATTRIB_MODIFIER_UPPER:
- *value = ((image->modifier >> 32) & 0xffffffff);
- return true;
-
- default:
- return false;
- }
-}
-
-static GLboolean
-brw_query_format_modifier_attribs(__DRIscreen *dri_screen,
- uint32_t fourcc, uint64_t modifier,
- int attrib, uint64_t *value)
-{
- struct brw_screen *screen = dri_screen->driverPrivate;
- const struct brw_image_format *f = brw_image_format_lookup(fourcc);
-
- if (!modifier_is_supported(&screen->devinfo, f, 0, 0, modifier))
- return false;
-
- switch (attrib) {
- case __DRI_IMAGE_FORMAT_MODIFIER_ATTRIB_PLANE_COUNT:
- *value = isl_drm_modifier_has_aux(modifier) ? 2 : f->nplanes;
- return true;
-
- default:
- return false;
- }
-}
-
-static __DRIimage *
-brw_dup_image(__DRIimage *orig_image, void *loaderPrivate)
-{
- __DRIimage *image;
-
- image = calloc(1, sizeof *image);
- if (image == NULL)
- return NULL;
-
- brw_bo_reference(orig_image->bo);
- image->screen = orig_image->screen;
- image->bo = orig_image->bo;
- image->internal_format = orig_image->internal_format;
- image->planar_format = orig_image->planar_format;
- image->dri_format = orig_image->dri_format;
- image->format = orig_image->format;
- image->modifier = orig_image->modifier;
- image->offset = orig_image->offset;
- image->width = orig_image->width;
- image->height = orig_image->height;
- image->pitch = orig_image->pitch;
- image->tile_x = orig_image->tile_x;
- image->tile_y = orig_image->tile_y;
- image->has_depthstencil = orig_image->has_depthstencil;
- image->driScrnPriv = orig_image->driScrnPriv;
- image->loader_private = loaderPrivate;
- image->aux_offset = orig_image->aux_offset;
- image->aux_pitch = orig_image->aux_pitch;
-
- memcpy(image->strides, orig_image->strides, sizeof(image->strides));
- memcpy(image->offsets, orig_image->offsets, sizeof(image->offsets));
-
- return image;
-}
-
-static GLboolean
-brw_validate_usage(__DRIimage *image, unsigned int use)
-{
- if (use & __DRI_IMAGE_USE_CURSOR) {
- if (image->width != 64 || image->height != 64)
- return GL_FALSE;
- }
-
- return GL_TRUE;
-}
-
-static __DRIimage *
-brw_create_image_from_names(__DRIscreen *dri_screen,
- int width, int height, int fourcc,
- int *names, int num_names,
- int *strides, int *offsets,
- void *loaderPrivate)
-{
- const struct brw_image_format *f = NULL;
- __DRIimage *image;
- int i, index;
-
- if (dri_screen == NULL || names == NULL || num_names != 1)
- return NULL;
-
- f = brw_image_format_lookup(fourcc);
- if (f == NULL)
- return NULL;
-
- image = brw_create_image_from_name(dri_screen, width, height,
- __DRI_IMAGE_FORMAT_NONE,
- names[0], strides[0],
- loaderPrivate);
-
- if (image == NULL)
- return NULL;
-
- image->planar_format = f;
- for (i = 0; i < f->nplanes; i++) {
- index = f->planes[i].buffer_index;
- image->offsets[index] = offsets[index];
- image->strides[index] = strides[index];
- }
-
- return image;
-}
-
-static __DRIimage *
-brw_create_image_from_fds_common(__DRIscreen *dri_screen,
- int width, int height, int fourcc,
- uint64_t modifier, int *fds, int num_fds,
- int *strides, int *offsets,
- void *loaderPrivate)
-{
- struct brw_screen *screen = dri_screen->driverPrivate;
- const struct brw_image_format *f;
- __DRIimage *image;
- int i, index;
- bool ok;
-
- if (fds == NULL || num_fds < 1)
- return NULL;
-
- f = brw_image_format_lookup(fourcc);
- if (f == NULL)
- return NULL;
-
- if (modifier != DRM_FORMAT_MOD_INVALID &&
- !modifier_is_supported(&screen->devinfo, f, 0, 0, modifier))
- return NULL;
-
- if (f->nplanes == 1)
- image = brw_allocate_image(screen, f->planes[0].dri_format,
- loaderPrivate);
- else
- image = brw_allocate_image(screen, __DRI_IMAGE_FORMAT_NONE,
- loaderPrivate);
-
- if (image == NULL)
- return NULL;
-
- image->width = width;
- image->height = height;
- image->pitch = strides[0];
-
- image->planar_format = f;
-
- if (modifier != DRM_FORMAT_MOD_INVALID) {
- const struct isl_drm_modifier_info *mod_info =
- isl_drm_modifier_get_info(modifier);
- uint32_t tiling = isl_tiling_to_i915_tiling(mod_info->tiling);
- image->bo = brw_bo_gem_create_from_prime_tiled(screen->bufmgr, fds[0],
- tiling, strides[0]);
- } else {
- image->bo = brw_bo_gem_create_from_prime(screen->bufmgr, fds[0]);
- }
-
- if (image->bo == NULL) {
- free(image);
- return NULL;
- }
-
- /* We only support all planes from the same bo.
- * brw_bo_gem_create_from_prime() should return the same pointer for all
- * fds received here */
- for (i = 1; i < num_fds; i++) {
- struct brw_bo *aux = brw_bo_gem_create_from_prime(screen->bufmgr, fds[i]);
- brw_bo_unreference(aux);
- if (aux != image->bo) {
- brw_bo_unreference(image->bo);
- free(image);
- return NULL;
- }
- }
-
- if (modifier != DRM_FORMAT_MOD_INVALID)
- image->modifier = modifier;
- else
- image->modifier = tiling_to_modifier(image->bo->tiling_mode);
-
- const struct isl_drm_modifier_info *mod_info =
- isl_drm_modifier_get_info(image->modifier);
-
- int size = 0;
- struct isl_surf surf;
- for (i = 0; i < f->nplanes; i++) {
- index = f->planes[i].buffer_index;
- image->offsets[index] = offsets[index];
- image->strides[index] = strides[index];
-
- mesa_format format = driImageFormatToGLFormat(f->planes[i].dri_format);
- /* The images we will create are actually based on the RGBA non-sRGB
- * version of the format.
- */
- format = _mesa_format_fallback_rgbx_to_rgba(format);
- format = _mesa_get_srgb_format_linear(format);
-
- ok = isl_surf_init(&screen->isl_dev, &surf,
- .dim = ISL_SURF_DIM_2D,
- .format = brw_isl_format_for_mesa_format(format),
- .width = image->width >> f->planes[i].width_shift,
- .height = image->height >> f->planes[i].height_shift,
- .depth = 1,
- .levels = 1,
- .array_len = 1,
- .samples = 1,
- .row_pitch_B = strides[index],
- .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT |
- ISL_SURF_USAGE_TEXTURE_BIT |
- ISL_SURF_USAGE_STORAGE_BIT,
- .tiling_flags = (1 << mod_info->tiling));
- if (!ok) {
- brw_bo_unreference(image->bo);
- free(image);
- return NULL;
- }
-
- const int end = offsets[index] + surf.size_B;
- if (size < end)
- size = end;
- }
-
- if (mod_info->aux_usage == ISL_AUX_USAGE_CCS_E) {
- /* Even though we initialize surf in the loop above, we know that
- * anything with CCS_E will have exactly one plane so surf is properly
- * initialized when we get here.
- */
- assert(f->nplanes == 1);
-
- image->aux_offset = offsets[1];
- image->aux_pitch = strides[1];
-
- /* Scanout hardware requires that the CCS be placed after the main
- * surface in memory. We consider any CCS that is placed any earlier in
- * memory to be invalid and reject it.
- *
- * At some point in the future, this restriction may be relaxed if the
- * hardware becomes less strict but we may need a new modifier for that.
- */
- assert(size > 0);
- if (image->aux_offset < size) {
- brw_bo_unreference(image->bo);
- free(image);
- return NULL;
- }
-
- struct isl_surf aux_surf = {0,};
- ok = isl_surf_get_ccs_surf(&screen->isl_dev, &surf, NULL, &aux_surf,
- image->aux_pitch);
- if (!ok) {
- brw_bo_unreference(image->bo);
- free(image);
- return NULL;
- }
-
- image->aux_size = aux_surf.size_B;
-
- const int end = image->aux_offset + aux_surf.size_B;
- if (size < end)
- size = end;
- } else {
- assert(mod_info->aux_usage == ISL_AUX_USAGE_NONE);
- }
-
- /* Check that the requested image actually fits within the BO. 'size'
- * is already relative to the offsets, so we don't need to add that. */
- if (image->bo->size == 0) {
- image->bo->size = size;
- } else if (size > image->bo->size) {
- brw_bo_unreference(image->bo);
- free(image);
- return NULL;
- }
-
- if (f->nplanes == 1) {
- image->offset = image->offsets[0];
- brw_image_warn_if_unaligned(image, __func__);
- }
-
- return image;
-}
-
-static __DRIimage *
-brw_create_image_from_fds(__DRIscreen *dri_screen,
- int width, int height, int fourcc,
- int *fds, int num_fds, int *strides, int *offsets,
- void *loaderPrivate)
-{
- return brw_create_image_from_fds_common(dri_screen, width, height, fourcc,
- DRM_FORMAT_MOD_INVALID,
- fds, num_fds, strides, offsets,
- loaderPrivate);
-}
-
-static __DRIimage *
-brw_create_image_from_dma_bufs2(__DRIscreen *dri_screen,
- int width, int height,
- int fourcc, uint64_t modifier,
- int *fds, int num_fds,
- int *strides, int *offsets,
- enum __DRIYUVColorSpace yuv_color_space,
- enum __DRISampleRange sample_range,
- enum __DRIChromaSiting horizontal_siting,
- enum __DRIChromaSiting vertical_siting,
- unsigned *error,
- void *loaderPrivate)
-{
- __DRIimage *image;
- const struct brw_image_format *f = brw_image_format_lookup(fourcc);
-
- if (!f) {
- *error = __DRI_IMAGE_ERROR_BAD_MATCH;
- return NULL;
- }
-
- image = brw_create_image_from_fds_common(dri_screen, width, height,
- fourcc, modifier,
- fds, num_fds, strides, offsets,
- loaderPrivate);
-
- /*
- * Invalid parameters and any inconsistencies between are assumed to be
- * checked by the caller. Therefore besides unsupported formats one can fail
- * only in allocation.
- */
- if (!image) {
- *error = __DRI_IMAGE_ERROR_BAD_ALLOC;
- return NULL;
- }
-
- image->yuv_color_space = yuv_color_space;
- image->sample_range = sample_range;
- image->horizontal_siting = horizontal_siting;
- image->vertical_siting = vertical_siting;
- image->imported_dmabuf = true;
-
- *error = __DRI_IMAGE_ERROR_SUCCESS;
- return image;
-}
-
-static __DRIimage *
-brw_create_image_from_dma_bufs(__DRIscreen *dri_screen,
- int width, int height, int fourcc,
- int *fds, int num_fds,
- int *strides, int *offsets,
- enum __DRIYUVColorSpace yuv_color_space,
- enum __DRISampleRange sample_range,
- enum __DRIChromaSiting horizontal_siting,
- enum __DRIChromaSiting vertical_siting,
- unsigned *error,
- void *loaderPrivate)
-{
- return brw_create_image_from_dma_bufs2(dri_screen, width, height,
- fourcc, DRM_FORMAT_MOD_INVALID,
- fds, num_fds, strides, offsets,
- yuv_color_space,
- sample_range,
- horizontal_siting,
- vertical_siting,
- error,
- loaderPrivate);
-}
-
-static bool
-brw_image_format_is_supported(const struct intel_device_info *devinfo,
- const struct brw_image_format *fmt)
-{
- /* Currently, all formats with an brw_image_format are available on all
- * platforms so there's really nothing to check there.
- */
-
-#ifndef NDEBUG
- if (fmt->nplanes == 1) {
- mesa_format format = driImageFormatToGLFormat(fmt->planes[0].dri_format);
- /* The images we will create are actually based on the RGBA non-sRGB
- * version of the format.
- */
- format = _mesa_format_fallback_rgbx_to_rgba(format);
- format = _mesa_get_srgb_format_linear(format);
- enum isl_format isl_format = brw_isl_format_for_mesa_format(format);
- assert(isl_format_supports_rendering(devinfo, isl_format));
- }
-#endif
-
- return true;
-}
-
-static GLboolean
-brw_query_dma_buf_formats(__DRIscreen *_screen, int max,
- int *formats, int *count)
-{
- struct brw_screen *screen = _screen->driverPrivate;
- int num_formats = 0, i;
-
- for (i = 0; i < ARRAY_SIZE(brw_image_formats); i++) {
- /* These formats are valid DRI formats but do not exist in drm_fourcc.h
- * in the Linux kernel. We don't want to accidentally advertise them
- * them through the EGL layer.
- */
- if (brw_image_formats[i].fourcc == __DRI_IMAGE_FOURCC_SARGB8888 ||
- brw_image_formats[i].fourcc == __DRI_IMAGE_FOURCC_SABGR8888 ||
- brw_image_formats[i].fourcc == __DRI_IMAGE_FOURCC_SXRGB8888)
- continue;
-
- if (!brw_image_format_is_supported(&screen->devinfo,
- &brw_image_formats[i]))
- continue;
-
- num_formats++;
- if (max == 0)
- continue;
-
- formats[num_formats - 1] = brw_image_formats[i].fourcc;
- if (num_formats >= max)
- break;
- }
-
- *count = num_formats;
- return true;
-}
-
-static GLboolean
-brw_query_dma_buf_modifiers(__DRIscreen *_screen, int fourcc, int max,
- uint64_t *modifiers,
- unsigned int *external_only,
- int *count)
-{
- struct brw_screen *screen = _screen->driverPrivate;
- const struct brw_image_format *f;
- int num_mods = 0, i;
-
- f = brw_image_format_lookup(fourcc);
- if (f == NULL)
- return false;
-
- if (!brw_image_format_is_supported(&screen->devinfo, f))
- return false;
-
- for (i = 0; i < ARRAY_SIZE(supported_modifiers); i++) {
- uint64_t modifier = supported_modifiers[i].modifier;
- if (!modifier_is_supported(&screen->devinfo, f, 0, 0, modifier))
- continue;
-
- num_mods++;
- if (max == 0)
- continue;
-
- modifiers[num_mods - 1] = modifier;
- if (num_mods >= max)
- break;
- }
-
- if (external_only != NULL) {
- for (i = 0; i < num_mods && i < max; i++) {
- if (f->components == __DRI_IMAGE_COMPONENTS_Y_U_V ||
- f->components == __DRI_IMAGE_COMPONENTS_Y_UV ||
- f->components == __DRI_IMAGE_COMPONENTS_AYUV ||
- f->components == __DRI_IMAGE_COMPONENTS_XYUV ||
- f->components == __DRI_IMAGE_COMPONENTS_Y_XUXV ||
- f->components == __DRI_IMAGE_COMPONENTS_Y_UXVX) {
- external_only[i] = GL_TRUE;
- }
- else {
- external_only[i] = GL_FALSE;
- }
- }
- }
-
- *count = num_mods;
- return true;
-}
-
-static __DRIimage *
-brw_from_planar(__DRIimage *parent, int plane, void *loaderPrivate)
-{
- int width, height, offset, stride, size, dri_format;
- __DRIimage *image;
-
- if (parent == NULL)
- return NULL;
-
- width = parent->width;
- height = parent->height;
-
- const struct brw_image_format *f = parent->planar_format;
-
- if (f && plane < f->nplanes) {
- /* Use the planar format definition. */
- width >>= f->planes[plane].width_shift;
- height >>= f->planes[plane].height_shift;
- dri_format = f->planes[plane].dri_format;
- int index = f->planes[plane].buffer_index;
- offset = parent->offsets[index];
- stride = parent->strides[index];
- size = height * stride;
- } else if (plane == 0) {
- /* The only plane of a non-planar image: copy the parent definition
- * directly. */
- dri_format = parent->dri_format;
- offset = parent->offset;
- stride = parent->pitch;
- size = height * stride;
- } else if (plane == 1 && parent->modifier != DRM_FORMAT_MOD_INVALID &&
- isl_drm_modifier_has_aux(parent->modifier)) {
- /* Auxiliary plane */
- dri_format = parent->dri_format;
- offset = parent->aux_offset;
- stride = parent->aux_pitch;
- size = parent->aux_size;
- } else {
- return NULL;
- }
-
- if (offset + size > parent->bo->size) {
- _mesa_warning(NULL, "intel_from_planar: subimage out of bounds");
- return NULL;
- }
-
- image = brw_allocate_image(parent->screen, dri_format, loaderPrivate);
- if (image == NULL)
- return NULL;
-
- image->bo = parent->bo;
- brw_bo_reference(parent->bo);
- image->modifier = parent->modifier;
-
- image->width = width;
- image->height = height;
- image->pitch = stride;
- image->offset = offset;
-
- brw_image_warn_if_unaligned(image, __func__);
-
- return image;
-}
-
-static const __DRIimageExtension brwImageExtension = {
- .base = { __DRI_IMAGE, 19 },
-
- .createImageFromName = brw_create_image_from_name,
- .createImageFromRenderbuffer = brw_create_image_from_renderbuffer,
- .destroyImage = brw_destroy_image,
- .createImage = brw_create_image,
- .queryImage = brw_query_image,
- .dupImage = brw_dup_image,
- .validateUsage = brw_validate_usage,
- .createImageFromNames = brw_create_image_from_names,
- .fromPlanar = brw_from_planar,
- .createImageFromTexture = brw_create_image_from_texture,
- .createImageFromFds = brw_create_image_from_fds,
- .createImageFromDmaBufs = brw_create_image_from_dma_bufs,
- .blitImage = NULL,
- .getCapabilities = NULL,
- .mapImage = brw_map_image,
- .unmapImage = brw_unmap_image,
- .createImageWithModifiers = brw_create_image_with_modifiers,
- .createImageFromDmaBufs2 = brw_create_image_from_dma_bufs2,
- .queryDmaBufFormats = brw_query_dma_buf_formats,
- .queryDmaBufModifiers = brw_query_dma_buf_modifiers,
- .queryDmaBufFormatModifierAttribs = brw_query_format_modifier_attribs,
- .createImageWithModifiers2 = brw_create_image_with_modifiers2,
-};
-
-static int
-brw_query_renderer_integer(__DRIscreen *dri_screen,
- int param, unsigned int *value)
-{
- const struct brw_screen *const screen =
- (struct brw_screen *) dri_screen->driverPrivate;
-
- switch (param) {
- case __DRI2_RENDERER_VENDOR_ID:
- value[0] = 0x8086;
- return 0;
- case __DRI2_RENDERER_DEVICE_ID:
- value[0] = screen->deviceID;
- return 0;
- case __DRI2_RENDERER_ACCELERATED:
- value[0] = 1;
- return 0;
- case __DRI2_RENDERER_VIDEO_MEMORY: {
- /* Once a batch uses more than 75% of the maximum mappable size, we
- * assume that there's some fragmentation, and we start doing extra
- * flushing, etc. That's the big cliff apps will care about.
- */
- const unsigned gpu_mappable_megabytes =
- screen->aperture_threshold / (1024 * 1024);
-
- const long system_memory_pages = sysconf(_SC_PHYS_PAGES);
- const long system_page_size = sysconf(_SC_PAGE_SIZE);
-
- if (system_memory_pages <= 0 || system_page_size <= 0)
- return -1;
-
- const uint64_t system_memory_bytes = (uint64_t) system_memory_pages
- * (uint64_t) system_page_size;
-
- const unsigned system_memory_megabytes =
- (unsigned) (system_memory_bytes / (1024 * 1024));
-
- value[0] = MIN2(system_memory_megabytes, gpu_mappable_megabytes);
- return 0;
- }
- case __DRI2_RENDERER_UNIFIED_MEMORY_ARCHITECTURE:
- value[0] = 1;
- return 0;
- case __DRI2_RENDERER_HAS_TEXTURE_3D:
- value[0] = 1;
- return 0;
- case __DRI2_RENDERER_HAS_CONTEXT_PRIORITY:
- value[0] = 0;
- if (brw_hw_context_set_priority(screen->bufmgr,
- 0, INTEL_CONTEXT_HIGH_PRIORITY) == 0)
- value[0] |= __DRI2_RENDERER_HAS_CONTEXT_PRIORITY_HIGH;
- if (brw_hw_context_set_priority(screen->bufmgr,
- 0, INTEL_CONTEXT_LOW_PRIORITY) == 0)
- value[0] |= __DRI2_RENDERER_HAS_CONTEXT_PRIORITY_LOW;
- /* reset to default last, just in case */
- if (brw_hw_context_set_priority(screen->bufmgr,
- 0, INTEL_CONTEXT_MEDIUM_PRIORITY) == 0)
- value[0] |= __DRI2_RENDERER_HAS_CONTEXT_PRIORITY_MEDIUM;
- return 0;
- case __DRI2_RENDERER_HAS_FRAMEBUFFER_SRGB:
- value[0] = 1;
- return 0;
- default:
- return driQueryRendererIntegerCommon(dri_screen, param, value);
- }
-
- return -1;
-}
-
-static int
-brw_query_renderer_string(__DRIscreen *dri_screen,
- int param, const char **value)
-{
- const struct brw_screen *screen =
- (struct brw_screen *) dri_screen->driverPrivate;
-
- switch (param) {
- case __DRI2_RENDERER_VENDOR_ID:
- value[0] = brw_vendor_string;
- return 0;
- case __DRI2_RENDERER_DEVICE_ID:
- value[0] = brw_get_renderer_string(screen);
- return 0;
- default:
- break;
- }
-
- return -1;
-}
-
-static void
-brw_set_cache_funcs(__DRIscreen *dri_screen,
- __DRIblobCacheSet set, __DRIblobCacheGet get)
-{
- const struct brw_screen *const screen =
- (struct brw_screen *) dri_screen->driverPrivate;
-
- if (!screen->disk_cache)
- return;
-
- disk_cache_set_callbacks(screen->disk_cache, set, get);
-}
-
-static const __DRI2rendererQueryExtension brwRendererQueryExtension = {
- .base = { __DRI2_RENDERER_QUERY, 1 },
-
- .queryInteger = brw_query_renderer_integer,
- .queryString = brw_query_renderer_string
-};
-
-static const __DRIrobustnessExtension dri2Robustness = {
- .base = { __DRI2_ROBUSTNESS, 1 }
-};
-
-static const __DRI2blobExtension brwBlobExtension = {
- .base = { __DRI2_BLOB, 1 },
- .set_cache_funcs = brw_set_cache_funcs
-};
-
-static const __DRImutableRenderBufferDriverExtension brwMutableRenderBufferExtension = {
- .base = { __DRI_MUTABLE_RENDER_BUFFER_DRIVER, 1 },
-};
-
-static const __DRIextension *screenExtensions[] = {
- &brwTexBufferExtension.base,
- &brwFenceExtension.base,
- &brwFlushExtension.base,
- &brwImageExtension.base,
- &brwRendererQueryExtension.base,
- &brwMutableRenderBufferExtension.base,
- &dri2ConfigQueryExtension.base,
- &dri2NoErrorExtension.base,
- &brwBlobExtension.base,
- NULL
-};
-
-static const __DRIextension *brwRobustScreenExtensions[] = {
- &brwTexBufferExtension.base,
- &brwFenceExtension.base,
- &brwFlushExtension.base,
- &brwImageExtension.base,
- &brwRendererQueryExtension.base,
- &brwMutableRenderBufferExtension.base,
- &dri2ConfigQueryExtension.base,
- &dri2Robustness.base,
- &dri2NoErrorExtension.base,
- &brwBlobExtension.base,
- NULL
-};
-
-static int
-brw_get_param(struct brw_screen *screen, int param, int *value)
-{
- int ret = 0;
- struct drm_i915_getparam gp;
-
- memset(&gp, 0, sizeof(gp));
- gp.param = param;
- gp.value = value;
-
- if (drmIoctl(screen->fd, DRM_IOCTL_I915_GETPARAM, &gp) == -1) {
- ret = -errno;
- if (ret != -EINVAL)
- _mesa_warning(NULL, "drm_i915_getparam: %d", ret);
- }
-
- return ret;
-}
-
-static bool
-brw_get_boolean(struct brw_screen *screen, int param)
-{
- int value = 0;
- return (brw_get_param(screen, param, &value) == 0) && value;
-}
-
-static int
-brw_get_integer(struct brw_screen *screen, int param)
-{
- int value = -1;
-
- if (brw_get_param(screen, param, &value) == 0)
- return value;
-
- return -1;
-}
-
-static void
-brw_destroy_screen(__DRIscreen *sPriv)
-{
- struct brw_screen *screen = sPriv->driverPrivate;
-
- brw_bufmgr_unref(screen->bufmgr);
- driDestroyOptionInfo(&screen->optionCache);
-
- disk_cache_destroy(screen->disk_cache);
-
- ralloc_free(screen);
- sPriv->driverPrivate = NULL;
-}
-
-
-/**
- * Create a gl_framebuffer and attach it to __DRIdrawable::driverPrivate.
- *
- *_This implements driDriverAPI::createNewDrawable, which the DRI layer calls
- * when creating a EGLSurface, GLXDrawable, or GLXPixmap. Despite the name,
- * this does not allocate GPU memory.
- */
-static GLboolean
-brw_create_buffer(__DRIscreen *dri_screen,
- __DRIdrawable *driDrawPriv,
- const struct gl_config *mesaVis, GLboolean isPixmap)
-{
- struct brw_renderbuffer *rb;
- struct brw_screen *screen = (struct brw_screen *)
- dri_screen->driverPrivate;
- mesa_format rgbFormat;
- unsigned num_samples =
- brw_quantize_num_samples(screen, mesaVis->samples);
-
- if (isPixmap)
- return false;
-
- struct gl_framebuffer *fb = CALLOC_STRUCT(gl_framebuffer);
- if (!fb)
- return false;
-
- _mesa_initialize_window_framebuffer(fb, mesaVis);
-
- if (screen->winsys_msaa_samples_override != -1) {
- num_samples = screen->winsys_msaa_samples_override;
- fb->Visual.samples = num_samples;
- }
-
- if (mesaVis->redBits == 16 && mesaVis->alphaBits > 0 && mesaVis->floatMode) {
- rgbFormat = MESA_FORMAT_RGBA_FLOAT16;
- } else if (mesaVis->redBits == 16 && mesaVis->floatMode) {
- rgbFormat = MESA_FORMAT_RGBX_FLOAT16;
- } else if (mesaVis->redBits == 10 && mesaVis->alphaBits > 0) {
- rgbFormat = mesaVis->redMask == 0x3ff00000 ? MESA_FORMAT_B10G10R10A2_UNORM
- : MESA_FORMAT_R10G10B10A2_UNORM;
- } else if (mesaVis->redBits == 10) {
- rgbFormat = mesaVis->redMask == 0x3ff00000 ? MESA_FORMAT_B10G10R10X2_UNORM
- : MESA_FORMAT_R10G10B10X2_UNORM;
- } else if (mesaVis->redBits == 5) {
- rgbFormat = mesaVis->redMask == 0x1f ? MESA_FORMAT_R5G6B5_UNORM
- : MESA_FORMAT_B5G6R5_UNORM;
- } else if (mesaVis->alphaBits == 0) {
- rgbFormat = mesaVis->redMask == 0xff ? MESA_FORMAT_R8G8B8X8_SRGB
- : MESA_FORMAT_B8G8R8X8_SRGB;
- fb->Visual.sRGBCapable = true;
- } else if (mesaVis->sRGBCapable) {
- rgbFormat = mesaVis->redMask == 0xff ? MESA_FORMAT_R8G8B8A8_SRGB
- : MESA_FORMAT_B8G8R8A8_SRGB;
- fb->Visual.sRGBCapable = true;
- } else {
- rgbFormat = mesaVis->redMask == 0xff ? MESA_FORMAT_R8G8B8A8_SRGB
- : MESA_FORMAT_B8G8R8A8_SRGB;
- fb->Visual.sRGBCapable = true;
- }
-
- /* mesaVis->sRGBCapable was set, user is asking for sRGB */
- bool srgb_cap_set = mesaVis->redBits >= 8 && mesaVis->sRGBCapable;
-
- /* setup the hardware-based renderbuffers */
- rb = brw_create_winsys_renderbuffer(screen, rgbFormat, num_samples);
- _mesa_attach_and_own_rb(fb, BUFFER_FRONT_LEFT, &rb->Base.Base);
- rb->need_srgb = srgb_cap_set;
-
- if (mesaVis->doubleBufferMode) {
- rb = brw_create_winsys_renderbuffer(screen, rgbFormat, num_samples);
- _mesa_attach_and_own_rb(fb, BUFFER_BACK_LEFT, &rb->Base.Base);
- rb->need_srgb = srgb_cap_set;
- }
-
- /*
- * Assert here that the gl_config has an expected depth/stencil bit
- * combination: one of d24/s8, d16/s0, d0/s0. (See brw_init_screen(),
- * which constructs the advertised configs.)
- */
- if (mesaVis->depthBits == 24) {
- assert(mesaVis->stencilBits == 8);
-
- if (screen->devinfo.has_hiz_and_separate_stencil) {
- rb = brw_create_private_renderbuffer(screen,
- MESA_FORMAT_Z24_UNORM_X8_UINT,
- num_samples);
- _mesa_attach_and_own_rb(fb, BUFFER_DEPTH, &rb->Base.Base);
- rb = brw_create_private_renderbuffer(screen, MESA_FORMAT_S_UINT8,
- num_samples);
- _mesa_attach_and_own_rb(fb, BUFFER_STENCIL, &rb->Base.Base);
- } else {
- /*
- * Use combined depth/stencil. Note that the renderbuffer is
- * attached to two attachment points.
- */
- rb = brw_create_private_renderbuffer(screen,
- MESA_FORMAT_Z24_UNORM_S8_UINT,
- num_samples);
- _mesa_attach_and_own_rb(fb, BUFFER_DEPTH, &rb->Base.Base);
- _mesa_attach_and_reference_rb(fb, BUFFER_STENCIL, &rb->Base.Base);
- }
- }
- else if (mesaVis->depthBits == 16) {
- assert(mesaVis->stencilBits == 0);
- rb = brw_create_private_renderbuffer(screen, MESA_FORMAT_Z_UNORM16,
- num_samples);
- _mesa_attach_and_own_rb(fb, BUFFER_DEPTH, &rb->Base.Base);
- }
- else {
- assert(mesaVis->depthBits == 0);
- assert(mesaVis->stencilBits == 0);
- }
-
- /* now add any/all software-based renderbuffers we may need */
- _swrast_add_soft_renderbuffers(fb,
- false, /* never sw color */
- false, /* never sw depth */
- false, /* never sw stencil */
- mesaVis->accumRedBits > 0,
- false /* never sw alpha */);
- driDrawPriv->driverPrivate = fb;
-
- return true;
-}
-
-static void
-brw_destroy_buffer(__DRIdrawable *driDrawPriv)
-{
- struct gl_framebuffer *fb = driDrawPriv->driverPrivate;
-
- _mesa_reference_framebuffer(&fb, NULL);
-}
-
-static bool
-brw_init_bufmgr(struct brw_screen *screen)
-{
- __DRIscreen *dri_screen = screen->driScrnPriv;
-
- bool bo_reuse = false;
- int bo_reuse_mode = driQueryOptioni(&screen->optionCache, "bo_reuse");
- switch (bo_reuse_mode) {
- case DRI_CONF_BO_REUSE_DISABLED:
- break;
- case DRI_CONF_BO_REUSE_ALL:
- bo_reuse = true;
- break;
- }
-
- screen->bufmgr = brw_bufmgr_get_for_fd(&screen->devinfo, dri_screen->fd, bo_reuse);
- if (screen->bufmgr == NULL) {
- fprintf(stderr, "[%s:%u] Error initializing buffer manager.\n",
- __func__, __LINE__);
- return false;
- }
- screen->fd = brw_bufmgr_get_fd(screen->bufmgr);
-
- if (!brw_get_boolean(screen, I915_PARAM_HAS_EXEC_NO_RELOC)) {
- fprintf(stderr, "[%s: %u] Kernel 3.9 required.\n", __func__, __LINE__);
- return false;
- }
-
- return true;
-}
-
-static int
-brw_detect_timestamp(struct brw_screen *screen)
-{
- uint64_t dummy = 0, last = 0;
- int upper, lower, loops;
-
- /* On 64bit systems, some old kernels trigger a hw bug resulting in the
- * TIMESTAMP register being shifted and the low 32bits always zero.
- *
- * More recent kernels offer an interface to read the full 36bits
- * everywhere.
- */
- if (brw_reg_read(screen->bufmgr, TIMESTAMP | 1, &dummy) == 0)
- return 3;
-
- /* Determine if we have a 32bit or 64bit kernel by inspecting the
- * upper 32bits for a rapidly changing timestamp.
- */
- if (brw_reg_read(screen->bufmgr, TIMESTAMP, &last))
- return 0;
-
- upper = lower = 0;
- for (loops = 0; loops < 10; loops++) {
- /* The TIMESTAMP should change every 80ns, so several round trips
- * through the kernel should be enough to advance it.
- */
- if (brw_reg_read(screen->bufmgr, TIMESTAMP, &dummy))
- return 0;
-
- upper += (dummy >> 32) != (last >> 32);
- if (upper > 1) /* beware 32bit counter overflow */
- return 2; /* upper dword holds the low 32bits of the timestamp */
-
- lower += (dummy & 0xffffffff) != (last & 0xffffffff);
- if (lower > 1)
- return 1; /* timestamp is unshifted */
-
- last = dummy;
- }
-
- /* No advancement? No timestamp! */
- return 0;
-}
-
- /**
- * Test if we can use MI_LOAD_REGISTER_MEM from an untrusted batchbuffer.
- *
- * Some combinations of hardware and kernel versions allow this feature,
- * while others don't. Instead of trying to enumerate every case, just
- * try and write a register and see if works.
- */
-static bool
-brw_detect_pipelined_register(struct brw_screen *screen,
- int reg, uint32_t expected_value, bool reset)
-{
- if (screen->devinfo.no_hw)
- return false;
-
- struct brw_bo *results, *bo;
- uint32_t *batch;
- uint32_t offset = 0;
- void *map;
- bool success = false;
-
- /* Create a zero'ed temporary buffer for reading our results */
- results = brw_bo_alloc(screen->bufmgr, "registers", 4096, BRW_MEMZONE_OTHER);
- if (results == NULL)
- goto err;
-
- bo = brw_bo_alloc(screen->bufmgr, "batchbuffer", 4096, BRW_MEMZONE_OTHER);
- if (bo == NULL)
- goto err_results;
-
- map = brw_bo_map(NULL, bo, MAP_WRITE);
- if (!map)
- goto err_batch;
-
- batch = map;
-
- /* Write the register. */
- *batch++ = MI_LOAD_REGISTER_IMM | (3 - 2);
- *batch++ = reg;
- *batch++ = expected_value;
-
- /* Save the register's value back to the buffer. */
- *batch++ = MI_STORE_REGISTER_MEM | (3 - 2);
- *batch++ = reg;
- struct drm_i915_gem_relocation_entry reloc = {
- .offset = (char *) batch - (char *) map,
- .delta = offset * sizeof(uint32_t),
- .target_handle = results->gem_handle,
- .read_domains = I915_GEM_DOMAIN_INSTRUCTION,
- .write_domain = I915_GEM_DOMAIN_INSTRUCTION,
- };
- *batch++ = reloc.presumed_offset + reloc.delta;
-
- /* And afterwards clear the register */
- if (reset) {
- *batch++ = MI_LOAD_REGISTER_IMM | (3 - 2);
- *batch++ = reg;
- *batch++ = 0;
- }
-
- *batch++ = MI_BATCH_BUFFER_END;
-
- struct drm_i915_gem_exec_object2 exec_objects[2] = {
- {
- .handle = results->gem_handle,
- },
- {
- .handle = bo->gem_handle,
- .relocation_count = 1,
- .relocs_ptr = (uintptr_t) &reloc,
- }
- };
-
- struct drm_i915_gem_execbuffer2 execbuf = {
- .buffers_ptr = (uintptr_t) exec_objects,
- .buffer_count = 2,
- .batch_len = ALIGN((char *) batch - (char *) map, 8),
- .flags = I915_EXEC_RENDER,
- };
-
- /* Don't bother with error checking - if the execbuf fails, the
- * value won't be written and we'll just report that there's no access.
- */
- drmIoctl(screen->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf);
-
- /* Check whether the value got written. */
- void *results_map = brw_bo_map(NULL, results, MAP_READ);
- if (results_map) {
- success = *((uint32_t *)results_map + offset) == expected_value;
- brw_bo_unmap(results);
- }
-
-err_batch:
- brw_bo_unreference(bo);
-err_results:
- brw_bo_unreference(results);
-err:
- return success;
-}
-
-static bool
-brw_detect_pipelined_so(struct brw_screen *screen)
-{
- const struct intel_device_info *devinfo = &screen->devinfo;
-
- /* Supposedly, Broadwell just works. */
- if (devinfo->ver >= 8)
- return true;
-
- if (devinfo->ver <= 6)
- return false;
-
- /* See the big explanation about command parser versions below */
- if (screen->cmd_parser_version >= (devinfo->verx10 == 75 ? 7 : 2))
- return true;
-
- /* We use SO_WRITE_OFFSET0 since you're supposed to write it (unlike the
- * statistics registers), and we already reset it to zero before using it.
- */
- return brw_detect_pipelined_register(screen,
- GFX7_SO_WRITE_OFFSET(0),
- 0x1337d0d0,
- false);
-}
-
-/**
- * Return array of MSAA modes supported by the hardware. The array is
- * zero-terminated and sorted in decreasing order.
- */
-const int*
-brw_supported_msaa_modes(const struct brw_screen *screen)
-{
- static const int gfx9_modes[] = {16, 8, 4, 2, 0, -1};
- static const int gfx8_modes[] = {8, 4, 2, 0, -1};
- static const int gfx7_modes[] = {8, 4, 0, -1};
- static const int gfx6_modes[] = {4, 0, -1};
- static const int gfx4_modes[] = {0, -1};
-
- if (screen->devinfo.ver >= 9) {
- return gfx9_modes;
- } else if (screen->devinfo.ver >= 8) {
- return gfx8_modes;
- } else if (screen->devinfo.ver >= 7) {
- return gfx7_modes;
- } else if (screen->devinfo.ver == 6) {
- return gfx6_modes;
- } else {
- return gfx4_modes;
- }
-}
-
-static unsigned
-brw_loader_get_cap(const __DRIscreen *dri_screen, enum dri_loader_cap cap)
-{
- if (dri_screen->dri2.loader && dri_screen->dri2.loader->base.version >= 4 &&
- dri_screen->dri2.loader->getCapability)
- return dri_screen->dri2.loader->getCapability(dri_screen->loaderPrivate, cap);
-
- if (dri_screen->image.loader && dri_screen->image.loader->base.version >= 2 &&
- dri_screen->image.loader->getCapability)
- return dri_screen->image.loader->getCapability(dri_screen->loaderPrivate, cap);
-
- return 0;
-}
-
-static bool
-brw_allowed_format(__DRIscreen *dri_screen, mesa_format format)
-{
- struct brw_screen *screen = dri_screen->driverPrivate;
-
- /* Expose only BGRA ordering if the loader doesn't support RGBA ordering. */
- bool allow_rgba_ordering = brw_loader_get_cap(dri_screen, DRI_LOADER_CAP_RGBA_ORDERING);
- if (!allow_rgba_ordering &&
- (format == MESA_FORMAT_R8G8B8A8_UNORM ||
- format == MESA_FORMAT_R8G8B8X8_UNORM ||
- format == MESA_FORMAT_R8G8B8A8_SRGB ||
- format == MESA_FORMAT_R8G8B8X8_SRGB))
- return false;
-
- /* Shall we expose 10 bpc formats? */
- bool allow_rgb10_configs = driQueryOptionb(&screen->optionCache,
- "allow_rgb10_configs");
- if (!allow_rgb10_configs &&
- (format == MESA_FORMAT_B10G10R10A2_UNORM ||
- format == MESA_FORMAT_B10G10R10X2_UNORM))
- return false;
-
- /* Shall we expose 565 formats? */
- bool allow_rgb565_configs = driQueryOptionb(&screen->optionCache,
- "allow_rgb565_configs");
- if (!allow_rgb565_configs && format == MESA_FORMAT_B5G6R5_UNORM)
- return false;
-
- /* Shall we expose fp16 formats? */
- bool allow_fp16_configs = brw_loader_get_cap(dri_screen, DRI_LOADER_CAP_FP16);
- if (!allow_fp16_configs &&
- (format == MESA_FORMAT_RGBA_FLOAT16 ||
- format == MESA_FORMAT_RGBX_FLOAT16))
- return false;
-
- return true;
-}
-
-static __DRIconfig**
-brw_screen_make_configs(__DRIscreen *dri_screen)
-{
- static const mesa_format formats[] = {
- MESA_FORMAT_B5G6R5_UNORM,
- MESA_FORMAT_B8G8R8A8_UNORM,
- MESA_FORMAT_B8G8R8X8_UNORM,
-
- MESA_FORMAT_B8G8R8A8_SRGB,
- MESA_FORMAT_B8G8R8X8_SRGB,
-
- /* For 10 bpc, 30 bit depth framebuffers. */
- MESA_FORMAT_B10G10R10A2_UNORM,
- MESA_FORMAT_B10G10R10X2_UNORM,
-
- MESA_FORMAT_RGBA_FLOAT16,
- MESA_FORMAT_RGBX_FLOAT16,
-
- /* The 32-bit RGBA format must not precede the 32-bit BGRA format.
- * Likewise for RGBX and BGRX. Otherwise, the GLX client and the GLX
- * server may disagree on which format the GLXFBConfig represents,
- * resulting in swapped color channels.
- *
- * The problem, as of 2017-05-30:
- * When matching a GLXFBConfig to a __DRIconfig, GLX ignores the channel
- * order and chooses the first __DRIconfig with the expected channel
- * sizes. Specifically, GLX compares the GLXFBConfig's and __DRIconfig's
- * __DRI_ATTRIB_{CHANNEL}_SIZE but ignores __DRI_ATTRIB_{CHANNEL}_MASK.
- *
- * EGL does not suffer from this problem. It correctly compares the
- * channel masks when matching EGLConfig to __DRIconfig.
- */
-
- /* Required by Android, for HAL_PIXEL_FORMAT_RGBA_8888. */
- MESA_FORMAT_R8G8B8A8_UNORM,
- MESA_FORMAT_R8G8B8A8_SRGB,
-
- /* Required by Android, for HAL_PIXEL_FORMAT_RGBX_8888. */
- MESA_FORMAT_R8G8B8X8_UNORM,
- MESA_FORMAT_R8G8B8X8_SRGB,
- };
-
- /* __DRI_ATTRIB_SWAP_COPY is not supported due to page flipping. */
- static const GLenum back_buffer_modes[] = {
- __DRI_ATTRIB_SWAP_UNDEFINED, __DRI_ATTRIB_SWAP_NONE
- };
-
- static const uint8_t singlesample_samples[1] = {0};
-
- struct brw_screen *screen = dri_screen->driverPrivate;
- const struct intel_device_info *devinfo = &screen->devinfo;
- uint8_t depth_bits[4], stencil_bits[4];
- __DRIconfig **configs = NULL;
-
- unsigned num_formats = ARRAY_SIZE(formats);
-
- /* Generate singlesample configs, each without accumulation buffer
- * and with EGL_MUTABLE_RENDER_BUFFER_BIT_KHR.
- */
- for (unsigned i = 0; i < num_formats; i++) {
- __DRIconfig **new_configs;
- int num_depth_stencil_bits = 1;
-
- if (!brw_allowed_format(dri_screen, formats[i]))
- continue;
-
- /* Starting with DRI2 protocol version 1.1 we can request a depth/stencil
- * buffer that has a different number of bits per pixel than the color
- * buffer, gen >= 6 supports this.
- */
- depth_bits[0] = 0;
- stencil_bits[0] = 0;
-
- if (formats[i] == MESA_FORMAT_B5G6R5_UNORM) {
- if (devinfo->ver >= 8) {
- depth_bits[num_depth_stencil_bits] = 16;
- stencil_bits[num_depth_stencil_bits] = 0;
- num_depth_stencil_bits++;
- }
- if (devinfo->ver >= 6) {
- depth_bits[num_depth_stencil_bits] = 24;
- stencil_bits[num_depth_stencil_bits] = 8;
- num_depth_stencil_bits++;
- }
- } else {
- depth_bits[num_depth_stencil_bits] = 24;
- stencil_bits[num_depth_stencil_bits] = 8;
- num_depth_stencil_bits++;
- }
-
- new_configs = driCreateConfigs(formats[i],
- depth_bits,
- stencil_bits,
- num_depth_stencil_bits,
- back_buffer_modes, 2,
- singlesample_samples, 1,
- false, false);
- configs = driConcatConfigs(configs, new_configs);
- }
-
- /* Generate the minimum possible set of configs that include an
- * accumulation buffer.
- */
- for (unsigned i = 0; i < num_formats; i++) {
- __DRIconfig **new_configs;
-
- if (!brw_allowed_format(dri_screen, formats[i]))
- continue;
-
- if (formats[i] == MESA_FORMAT_B5G6R5_UNORM) {
- if (devinfo->ver >= 8) {
- depth_bits[0] = 16;
- stencil_bits[0] = 0;
- } else if (devinfo->ver >= 6) {
- depth_bits[0] = 24;
- stencil_bits[0] = 8;
- } else {
- depth_bits[0] = 0;
- stencil_bits[0] = 0;
- }
- } else {
- depth_bits[0] = 24;
- stencil_bits[0] = 8;
- }
-
- new_configs = driCreateConfigs(formats[i],
- depth_bits, stencil_bits, 1,
- back_buffer_modes, 1,
- singlesample_samples, 1,
- true, false);
- configs = driConcatConfigs(configs, new_configs);
- }
-
- /* Generate multisample configs.
- *
- * This loop breaks early, and hence is a no-op, on gen < 6.
- *
- * Multisample configs must follow the singlesample configs in order to
- * work around an X server bug present in 1.12. The X server chooses to
- * associate the first listed RGBA888-Z24S8 config, regardless of its
- * sample count, with the 32-bit depth visual used for compositing.
- *
- * Only doublebuffer configs with GLX_SWAP_UNDEFINED_OML behavior are
- * supported. Singlebuffer configs are not supported because no one wants
- * them.
- */
- for (unsigned i = 0; i < num_formats; i++) {
- if (devinfo->ver < 6)
- break;
-
- if (!brw_allowed_format(dri_screen, formats[i]))
- continue;
-
- __DRIconfig **new_configs;
- const int num_depth_stencil_bits = 2;
- int num_msaa_modes = 0;
- const uint8_t *multisample_samples = NULL;
-
- depth_bits[0] = 0;
- stencil_bits[0] = 0;
-
- if (formats[i] == MESA_FORMAT_B5G6R5_UNORM && devinfo->ver >= 8) {
- depth_bits[1] = 16;
- stencil_bits[1] = 0;
- } else {
- depth_bits[1] = 24;
- stencil_bits[1] = 8;
- }
-
- if (devinfo->ver >= 9) {
- static const uint8_t multisample_samples_gfx9[] = {2, 4, 8, 16};
- multisample_samples = multisample_samples_gfx9;
- num_msaa_modes = ARRAY_SIZE(multisample_samples_gfx9);
- } else if (devinfo->ver == 8) {
- static const uint8_t multisample_samples_gfx8[] = {2, 4, 8};
- multisample_samples = multisample_samples_gfx8;
- num_msaa_modes = ARRAY_SIZE(multisample_samples_gfx8);
- } else if (devinfo->ver == 7) {
- static const uint8_t multisample_samples_gfx7[] = {4, 8};
- multisample_samples = multisample_samples_gfx7;
- num_msaa_modes = ARRAY_SIZE(multisample_samples_gfx7);
- } else if (devinfo->ver == 6) {
- static const uint8_t multisample_samples_gfx6[] = {4};
- multisample_samples = multisample_samples_gfx6;
- num_msaa_modes = ARRAY_SIZE(multisample_samples_gfx6);
- }
-
- new_configs = driCreateConfigs(formats[i],
- depth_bits,
- stencil_bits,
- num_depth_stencil_bits,
- back_buffer_modes, 1,
- multisample_samples,
- num_msaa_modes,
- false, false);
- configs = driConcatConfigs(configs, new_configs);
- }
-
- if (configs == NULL) {
- fprintf(stderr, "[%s:%u] Error creating FBConfig!\n", __func__,
- __LINE__);
- return NULL;
- }
-
- return configs;
-}
-
-static void
-set_max_gl_versions(struct brw_screen *screen)
-{
- __DRIscreen *dri_screen = screen->driScrnPriv;
- const bool has_astc = screen->devinfo.ver >= 9;
-
- switch (screen->devinfo.ver) {
- case 11:
- case 10:
- case 9:
- case 8:
- dri_screen->max_gl_core_version = 46;
- dri_screen->max_gl_compat_version = 30;
- dri_screen->max_gl_es1_version = 11;
- dri_screen->max_gl_es2_version = has_astc ? 32 : 31;
- break;
- case 7:
- dri_screen->max_gl_core_version = 33;
- if (can_do_pipelined_register_writes(screen)) {
- dri_screen->max_gl_core_version = 42;
- if (screen->devinfo.platform == INTEL_PLATFORM_HSW && can_do_compute_dispatch(screen))
- dri_screen->max_gl_core_version = 43;
- if (screen->devinfo.platform == INTEL_PLATFORM_HSW && can_do_mi_math_and_lrr(screen))
- dri_screen->max_gl_core_version = 45;
- }
- dri_screen->max_gl_compat_version = 30;
- dri_screen->max_gl_es1_version = 11;
- dri_screen->max_gl_es2_version = screen->devinfo.platform == INTEL_PLATFORM_HSW ? 31 : 30;
- break;
- case 6:
- dri_screen->max_gl_core_version = 33;
- dri_screen->max_gl_compat_version = 30;
- dri_screen->max_gl_es1_version = 11;
- dri_screen->max_gl_es2_version = 30;
- break;
- case 5:
- case 4:
- dri_screen->max_gl_core_version = 0;
- dri_screen->max_gl_compat_version = 21;
- dri_screen->max_gl_es1_version = 11;
- dri_screen->max_gl_es2_version = 20;
- break;
- default:
- unreachable("unrecognized brw_screen::gen");
- }
-
- /* OpenGL 3.3+ requires GL_ARB_blend_func_extended. Don't advertise those
- * versions if driconf disables the extension.
- */
- if (driQueryOptionb(&screen->optionCache, "disable_blend_func_extended")) {
- dri_screen->max_gl_core_version =
- MIN2(32, dri_screen->max_gl_core_version);
- dri_screen->max_gl_compat_version =
- MIN2(32, dri_screen->max_gl_compat_version);
- }
-
- /* Using the `allow_higher_compat_version` option during context creation
- * means that an application that doesn't request a specific version can be
- * given a version higher than 3.0. However, an application still cannot
- * request a higher version. For that to work, max_gl_compat_version must
- * be set.
- */
- if (dri_screen->max_gl_compat_version < dri_screen->max_gl_core_version) {
- if (driQueryOptionb(&screen->optionCache, "allow_higher_compat_version"))
- dri_screen->max_gl_compat_version = dri_screen->max_gl_core_version;
- }
-}
-
-static void
-shader_debug_log_mesa(void *data, unsigned *msg_id, const char *fmt, ...)
-{
- struct brw_context *brw = (struct brw_context *)data;
- va_list args;
-
- va_start(args, fmt);
- _mesa_gl_vdebugf(&brw->ctx, msg_id,
- MESA_DEBUG_SOURCE_SHADER_COMPILER,
- MESA_DEBUG_TYPE_OTHER,
- MESA_DEBUG_SEVERITY_NOTIFICATION, fmt, args);
- va_end(args);
-}
-
-static void
-shader_perf_log_mesa(void *data, unsigned *msg_id, const char *fmt, ...)
-{
- struct brw_context *brw = (struct brw_context *)data;
-
- va_list args;
- va_start(args, fmt);
-
- if (INTEL_DEBUG(DEBUG_PERF)) {
- va_list args_copy;
- va_copy(args_copy, args);
- vfprintf(stderr, fmt, args_copy);
- va_end(args_copy);
- }
-
- if (brw->perf_debug) {
- _mesa_gl_vdebugf(&brw->ctx, msg_id,
- MESA_DEBUG_SOURCE_SHADER_COMPILER,
- MESA_DEBUG_TYPE_PERFORMANCE,
- MESA_DEBUG_SEVERITY_MEDIUM, fmt, args);
- }
- va_end(args);
-}
-
-/**
- * This is the driver specific part of the createNewScreen entry point.
- * Called when using DRI2.
- *
- * \return the struct gl_config supported by this driver
- */
-static const
-__DRIconfig **brw_init_screen(__DRIscreen *dri_screen)
-{
- struct brw_screen *screen;
-
- util_cpu_detect();
-
- if (dri_screen->image.loader) {
- } else if (dri_screen->dri2.loader->base.version <= 2 ||
- dri_screen->dri2.loader->getBuffersWithFormat == NULL) {
- fprintf(stderr,
- "\nERROR! DRI2 loader with getBuffersWithFormat() "
- "support required\n");
- return NULL;
- }
-
- /* Allocate the private area */
- screen = rzalloc(NULL, struct brw_screen);
- if (!screen) {
- fprintf(stderr, "\nERROR! Allocating private area failed\n");
- return NULL;
- }
- /* parse information in __driConfigOptions */
- driOptionCache options;
- memset(&options, 0, sizeof(options));
-
- driParseOptionInfo(&options, brw_driconf, ARRAY_SIZE(brw_driconf));
- driParseConfigFiles(&screen->optionCache, &options, dri_screen->myNum,
- "i965", NULL, NULL, NULL, 0, NULL, 0);
- driDestroyOptionCache(&options);
-
- screen->driScrnPriv = dri_screen;
- dri_screen->driverPrivate = (void *) screen;
-
- if (!intel_get_device_info_from_fd(dri_screen->fd, &screen->devinfo))
- return NULL;
-
- const struct intel_device_info *devinfo = &screen->devinfo;
- screen->deviceID = devinfo->chipset_id;
-
- if (devinfo->ver >= 12) {
- fprintf(stderr, "gfx12 and newer are not supported on i965\n");
- return NULL;
- }
-
- if (!brw_init_bufmgr(screen))
- return NULL;
-
- brw_process_intel_debug_variable();
-
- if (INTEL_DEBUG(DEBUG_SHADER_TIME) && devinfo->ver < 7) {
- fprintf(stderr,
- "shader_time debugging requires gfx7 (Ivybridge) or better.\n");
- intel_debug &= ~DEBUG_SHADER_TIME;
- }
-
- if (brw_get_integer(screen, I915_PARAM_MMAP_GTT_VERSION) >= 1) {
- /* Theorectically unlimited! At least for individual objects...
- *
- * Currently the entire (global) address space for all GTT maps is
- * limited to 64bits. That is all objects on the system that are
- * setup for GTT mmapping must fit within 64bits. An attempt to use
- * one that exceeds the limit with fail in brw_bo_map_gtt().
- *
- * Long before we hit that limit, we will be practically limited by
- * that any single object must fit in physical memory (RAM). The upper
- * limit on the CPU's address space is currently 48bits (Skylake), of
- * which only 39bits can be physical memory. (The GPU itself also has
- * a 48bit addressable virtual space.) We can fit over 32 million
- * objects of the current maximum allocable size before running out
- * of mmap space.
- */
- screen->max_gtt_map_object_size = UINT64_MAX;
- } else {
- /* Estimate the size of the mappable aperture into the GTT. There's an
- * ioctl to get the whole GTT size, but not one to get the mappable subset.
- * It turns out it's basically always 256MB, though some ancient hardware
- * was smaller.
- */
- uint32_t gtt_size = 256 * 1024 * 1024;
-
- /* We don't want to map two objects such that a memcpy between them would
- * just fault one mapping in and then the other over and over forever. So
- * we would need to divide the GTT size by 2. Additionally, some GTT is
- * taken up by things like the framebuffer and the ringbuffer and such, so
- * be more conservative.
- */
- screen->max_gtt_map_object_size = gtt_size / 4;
- }
-
- screen->aperture_threshold = devinfo->aperture_bytes * 3 / 4;
-
- screen->hw_has_timestamp = brw_detect_timestamp(screen);
-
- isl_device_init(&screen->isl_dev, &screen->devinfo);
-
- /* Gfx7-7.5 kernel requirements / command parser saga:
- *
- * - pre-v3.16:
- * Haswell and Baytrail cannot use any privileged batchbuffer features.
- *
- * Ivybridge has aliasing PPGTT on by default, which accidentally marks
- * all batches secure, allowing them to use any feature with no checking.
- * This is effectively equivalent to a command parser version of
- * \infinity - everything is possible.
- *
- * The command parser does not exist, and querying the version will
- * return -EINVAL.
- *
- * - v3.16:
- * The kernel enables the command parser by default, for systems with
- * aliasing PPGTT enabled (Ivybridge and Haswell). However, the
- * hardware checker is still enabled, so Haswell and Baytrail cannot
- * do anything.
- *
- * Ivybridge goes from "everything is possible" to "only what the
- * command parser allows" (if the user boots with i915.cmd_parser=0,
- * then everything is possible again). We can only safely use features
- * allowed by the supported command parser version.
- *
- * Annoyingly, I915_PARAM_CMD_PARSER_VERSION reports the static version
- * implemented by the kernel, even if it's turned off. So, checking
- * for version > 0 does not mean that you can write registers. We have
- * to try it and see. The version does, however, indicate the age of
- * the kernel.
- *
- * Instead of matching the hardware checker's behavior of converting
- * privileged commands to MI_NOOP, it makes execbuf2 start returning
- * -EINVAL, making it dangerous to try and use privileged features.
- *
- * Effective command parser versions:
- * - Haswell: 0 (reporting 1, writes don't work)
- * - Baytrail: 0 (reporting 1, writes don't work)
- * - Ivybridge: 1 (enabled) or infinite (disabled)
- *
- * - v3.17:
- * Baytrail aliasing PPGTT is enabled, making it like Ivybridge:
- * effectively version 1 (enabled) or infinite (disabled).
- *
- * - v3.19: f1f55cc0556031c8ee3fe99dae7251e78b9b653b
- * Command parser v2 supports predicate writes.
- *
- * - Haswell: 0 (reporting 1, writes don't work)
- * - Baytrail: 2 (enabled) or infinite (disabled)
- * - Ivybridge: 2 (enabled) or infinite (disabled)
- *
- * So version >= 2 is enough to know that Ivybridge and Baytrail
- * will work. Haswell still can't do anything.
- *
- * - v4.0: Version 3 happened. Largely not relevant.
- *
- * - v4.1: 6702cf16e0ba8b0129f5aa1b6609d4e9c70bc13b
- * L3 config registers are properly saved and restored as part
- * of the hardware context. We can approximately detect this point
- * in time by checking if I915_PARAM_REVISION is recognized - it
- * landed in a later commit, but in the same release cycle.
- *
- * - v4.2: 245054a1fe33c06ad233e0d58a27ec7b64db9284
- * Command parser finally gains secure batch promotion. On Haswell,
- * the hardware checker gets disabled, which finally allows it to do
- * privileged commands.
- *
- * I915_PARAM_CMD_PARSER_VERSION reports 3. Effective versions:
- * - Haswell: 3 (enabled) or 0 (disabled)
- * - Baytrail: 3 (enabled) or infinite (disabled)
- * - Ivybridge: 3 (enabled) or infinite (disabled)
- *
- * Unfortunately, detecting this point in time is tricky, because
- * no version bump happened when this important change occurred.
- * On Haswell, if we can write any register, then the kernel is at
- * least this new, and we can start trusting the version number.
- *
- * - v4.4: 2bbe6bbb0dc94fd4ce287bdac9e1bd184e23057b and
- * Command parser reaches version 4, allowing access to Haswell
- * atomic scratch and chicken3 registers. If version >= 4, we know
- * the kernel is new enough to support privileged features on all
- * hardware. However, the user might have disabled it...and the
- * kernel will still report version 4. So we still have to guess
- * and check.
- *
- * - v4.4: 7b9748cb513a6bef4af87b79f0da3ff7e8b56cd8
- * Command parser v5 whitelists indirect compute shader dispatch
- * registers, needed for OpenGL 4.3 and later.
- *
- * - v4.8:
- * Command parser v7 lets us use MI_MATH on Haswell.
- *
- * Additionally, the kernel begins reporting version 0 when
- * the command parser is disabled, allowing us to skip the
- * guess-and-check step on Haswell. Unfortunately, this also
- * means that we can no longer use it as an indicator of the
- * age of the kernel.
- */
- if (brw_get_param(screen, I915_PARAM_CMD_PARSER_VERSION,
- &screen->cmd_parser_version) < 0) {
- /* Command parser does not exist - getparam is unrecognized */
- screen->cmd_parser_version = 0;
- }
-
- /* Kernel 4.13 retuired for exec object capture */
- if (brw_get_boolean(screen, I915_PARAM_HAS_EXEC_CAPTURE)) {
- screen->kernel_features |= KERNEL_ALLOWS_EXEC_CAPTURE;
- }
-
- if (brw_get_boolean(screen, I915_PARAM_HAS_EXEC_BATCH_FIRST)) {
- screen->kernel_features |= KERNEL_ALLOWS_EXEC_BATCH_FIRST;
- }
-
- if (!brw_detect_pipelined_so(screen)) {
- /* We can't do anything, so the effective version is 0. */
- screen->cmd_parser_version = 0;
- } else {
- screen->kernel_features |= KERNEL_ALLOWS_SOL_OFFSET_WRITES;
- }
-
- if (devinfo->ver >= 8 || screen->cmd_parser_version >= 2)
- screen->kernel_features |= KERNEL_ALLOWS_PREDICATE_WRITES;
-
- /* Haswell requires command parser version 4 in order to have L3
- * atomic scratch1 and chicken3 bits
- */
- if (devinfo->verx10 == 75 && screen->cmd_parser_version >= 4) {
- screen->kernel_features |=
- KERNEL_ALLOWS_HSW_SCRATCH1_AND_ROW_CHICKEN3;
- }
-
- /* Haswell requires command parser version 6 in order to write to the
- * MI_MATH GPR registers, and version 7 in order to use
- * MI_LOAD_REGISTER_REG (which all users of MI_MATH use).
- */
- if (devinfo->ver >= 8 ||
- (devinfo->verx10 == 75 && screen->cmd_parser_version >= 7)) {
- screen->kernel_features |= KERNEL_ALLOWS_MI_MATH_AND_LRR;
- }
-
- /* Gfx7 needs at least command parser version 5 to support compute */
- if (devinfo->ver >= 8 || screen->cmd_parser_version >= 5)
- screen->kernel_features |= KERNEL_ALLOWS_COMPUTE_DISPATCH;
-
- if (brw_get_boolean(screen, I915_PARAM_HAS_CONTEXT_ISOLATION))
- screen->kernel_features |= KERNEL_ALLOWS_CONTEXT_ISOLATION;
-
- const char *force_msaa = getenv("INTEL_FORCE_MSAA");
- if (force_msaa) {
- screen->winsys_msaa_samples_override =
- brw_quantize_num_samples(screen, atoi(force_msaa));
- printf("Forcing winsys sample count to %d\n",
- screen->winsys_msaa_samples_override);
- } else {
- screen->winsys_msaa_samples_override = -1;
- }
-
- set_max_gl_versions(screen);
-
- /* Notification of GPU resets requires hardware contexts and a kernel new
- * enough to support DRM_IOCTL_I915_GET_RESET_STATS. If the ioctl is
- * supported, calling it with a context of 0 will either generate EPERM or
- * no error. If the ioctl is not supported, it always generate EINVAL.
- * Use this to determine whether to advertise the __DRI2_ROBUSTNESS
- * extension to the loader.
- *
- * Don't even try on pre-Gfx6, since we don't attempt to use contexts there.
- */
- if (devinfo->ver >= 6) {
- struct drm_i915_reset_stats stats;
- memset(&stats, 0, sizeof(stats));
-
- const int ret = drmIoctl(screen->fd, DRM_IOCTL_I915_GET_RESET_STATS, &stats);
-
- screen->has_context_reset_notification =
- (ret != -1 || errno != EINVAL);
- }
-
- dri_screen->extensions = !screen->has_context_reset_notification
- ? screenExtensions : brwRobustScreenExtensions;
-
- screen->compiler = brw_compiler_create(screen, devinfo);
- screen->compiler->shader_debug_log = shader_debug_log_mesa;
- screen->compiler->shader_perf_log = shader_perf_log_mesa;
-
- /* Changing the meaning of constant buffer pointers from a dynamic state
- * offset to an absolute address is only safe if the kernel isolates other
- * contexts from our changes.
- */
- screen->compiler->constant_buffer_0_is_relative = devinfo->ver < 8 ||
- !(screen->kernel_features & KERNEL_ALLOWS_CONTEXT_ISOLATION);
-
- screen->compiler->glsl_compiler_options[MESA_SHADER_VERTEX].PositionAlwaysInvariant = driQueryOptionb(&screen->optionCache, "vs_position_always_invariant");
- screen->compiler->glsl_compiler_options[MESA_SHADER_TESS_EVAL].PositionAlwaysPrecise = driQueryOptionb(&screen->optionCache, "vs_position_always_precise");
-
- screen->compiler->supports_pull_constants = true;
- screen->compiler->compact_params = true;
- screen->compiler->lower_variable_group_size = true;
-
- screen->has_exec_fence =
- brw_get_boolean(screen, I915_PARAM_HAS_EXEC_FENCE);
-
- brw_screen_init_surface_formats(screen);
-
- if (INTEL_DEBUG(DEBUG_BATCH | DEBUG_SUBMIT)) {
- unsigned int caps = brw_get_integer(screen, I915_PARAM_HAS_SCHEDULER);
- if (caps) {
- fprintf(stderr, "Kernel scheduler detected: %08x\n", caps);
- if (caps & I915_SCHEDULER_CAP_PRIORITY)
- fprintf(stderr, " - User priority sorting enabled\n");
- if (caps & I915_SCHEDULER_CAP_PREEMPTION)
- fprintf(stderr, " - Preemption enabled\n");
- }
- }
-
- brw_disk_cache_init(screen);
-
- return (const __DRIconfig**) brw_screen_make_configs(dri_screen);
-}
-
-struct brw_buffer {
- __DRIbuffer base;
- struct brw_bo *bo;
-};
-
-static __DRIbuffer *
-brw_allocate_buffer(__DRIscreen *dri_screen,
- unsigned attachment, unsigned format,
- int width, int height)
-{
- struct brw_screen *screen = dri_screen->driverPrivate;
-
- assert(attachment == __DRI_BUFFER_FRONT_LEFT ||
- attachment == __DRI_BUFFER_BACK_LEFT);
-
- struct brw_buffer *buffer = calloc(1, sizeof *buffer);
- if (buffer == NULL)
- return NULL;
-
- /* The front and back buffers are color buffers, which are X tiled. GFX9+
- * supports Y tiled and compressed buffers, but there is no way to plumb that
- * through to here. */
- uint32_t pitch;
- int cpp = format / 8;
- buffer->bo = brw_bo_alloc_tiled_2d(screen->bufmgr,
- __func__,
- width,
- height,
- cpp,
- BRW_MEMZONE_OTHER,
- I915_TILING_X, &pitch,
- BO_ALLOC_BUSY);
-
- if (buffer->bo == NULL) {
- free(buffer);
- return NULL;
- }
-
- brw_bo_flink(buffer->bo, &buffer->base.name);
-
- buffer->base.attachment = attachment;
- buffer->base.cpp = cpp;
- buffer->base.pitch = pitch;
-
- return &buffer->base;
-}
-
-static void
-brw_release_buffer(UNUSED __DRIscreen *dri_screen, __DRIbuffer *_buffer)
-{
- struct brw_buffer *buffer = (struct brw_buffer *) _buffer;
-
- brw_bo_unreference(buffer->bo);
- free(buffer);
-}
-
-static const struct __DriverAPIRec brw_driver_api = {
- .InitScreen = brw_init_screen,
- .DestroyScreen = brw_destroy_screen,
- .CreateContext = brw_create_context,
- .DestroyContext = brw_destroy_context,
- .CreateBuffer = brw_create_buffer,
- .DestroyBuffer = brw_destroy_buffer,
- .MakeCurrent = brw_make_current,
- .UnbindContext = brw_unbind_context,
- .AllocateBuffer = brw_allocate_buffer,
- .ReleaseBuffer = brw_release_buffer
-};
-
-static const struct __DRIDriverVtableExtensionRec brw_vtable = {
- .base = { __DRI_DRIVER_VTABLE, 1 },
- .vtable = &brw_driver_api,
-};
-
-static const __DRIextension *brw_driver_extensions[] = {
- &driCoreExtension.base,
- &driImageDriverExtension.base,
- &driDRI2Extension.base,
- &brw_vtable.base,
- &brw_config_options.base,
- NULL
-};
-
-PUBLIC const __DRIextension **__driDriverGetExtensions_i965(void)
-{
- globalDriverAPI = &brw_driver_api;
-
- return brw_driver_extensions;
-}
+++ /dev/null
-/*
- * Copyright 2003 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef _INTEL_INIT_H_
-#define _INTEL_INIT_H_
-
-#include <stdbool.h>
-#include <sys/time.h>
-
-#include <GL/internal/dri_interface.h>
-
-#include "isl/isl.h"
-#include "dri_util.h"
-#include "brw_bufmgr.h"
-#include "dev/intel_device_info.h"
-#include "drm-uapi/i915_drm.h"
-#include "util/xmlconfig.h"
-
-#include "isl/isl.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct brw_screen
-{
- int deviceID;
- struct intel_device_info devinfo;
-
- __DRIscreen *driScrnPriv;
-
- uint64_t max_gtt_map_object_size;
-
- /** Bytes of aperture usage beyond which execbuf is likely to fail. */
- uint64_t aperture_threshold;
-
- /** DRM fd associated with this screen. Not owned by this object. Do not close. */
- int fd;
-
- bool has_exec_fence; /**< I915_PARAM_HAS_EXEC_FENCE */
-
- int hw_has_timestamp;
-
- struct isl_device isl_dev;
-
- /**
- * Does the kernel support context reset notifications?
- */
- bool has_context_reset_notification;
-
- /**
- * Does the kernel support features such as pipelined register access to
- * specific registers?
- */
- unsigned kernel_features;
-#define KERNEL_ALLOWS_SOL_OFFSET_WRITES (1<<0)
-#define KERNEL_ALLOWS_PREDICATE_WRITES (1<<1)
-#define KERNEL_ALLOWS_MI_MATH_AND_LRR (1<<2)
-#define KERNEL_ALLOWS_HSW_SCRATCH1_AND_ROW_CHICKEN3 (1<<3)
-#define KERNEL_ALLOWS_COMPUTE_DISPATCH (1<<4)
-#define KERNEL_ALLOWS_EXEC_CAPTURE (1<<5)
-#define KERNEL_ALLOWS_EXEC_BATCH_FIRST (1<<6)
-#define KERNEL_ALLOWS_CONTEXT_ISOLATION (1<<7)
-
- struct brw_bufmgr *bufmgr;
-
- /**
- * A unique ID for shader programs.
- */
- unsigned program_id;
-
- int winsys_msaa_samples_override;
-
- struct brw_compiler *compiler;
-
- /**
- * Configuration cache with default values for all contexts
- */
- driOptionCache optionCache;
-
- /**
- * Version of the command parser reported by the
- * I915_PARAM_CMD_PARSER_VERSION parameter
- */
- int cmd_parser_version;
-
- bool mesa_format_supports_texture[MESA_FORMAT_COUNT];
- bool mesa_format_supports_render[MESA_FORMAT_COUNT];
- enum isl_format mesa_to_isl_render_format[MESA_FORMAT_COUNT];
-
- struct disk_cache *disk_cache;
-};
-
-extern void brw_destroy_context(__DRIcontext *driContextPriv);
-
-extern GLboolean brw_unbind_context(__DRIcontext *driContextPriv);
-
-PUBLIC const __DRIextension **__driDriverGetExtensions_i965(void);
-extern const __DRI2fenceExtension brwFenceExtension;
-
-extern GLboolean
-brw_make_current(__DRIcontext *driContextPriv,
- __DRIdrawable *driDrawPriv,
- __DRIdrawable *driReadPriv);
-
-double get_time(void);
-
-const int*
-brw_supported_msaa_modes(const struct brw_screen *screen);
-
-static inline bool
-can_do_pipelined_register_writes(const struct brw_screen *screen)
-{
- return screen->kernel_features & KERNEL_ALLOWS_SOL_OFFSET_WRITES;
-}
-
-static inline bool
-can_do_hsw_l3_atomics(const struct brw_screen *screen)
-{
- return screen->kernel_features & KERNEL_ALLOWS_HSW_SCRATCH1_AND_ROW_CHICKEN3;
-}
-
-static inline bool
-can_do_mi_math_and_lrr(const struct brw_screen *screen)
-{
- return screen->kernel_features & KERNEL_ALLOWS_MI_MATH_AND_LRR;
-}
-
-static inline bool
-can_do_compute_dispatch(const struct brw_screen *screen)
-{
- return screen->kernel_features & KERNEL_ALLOWS_COMPUTE_DISPATCH;
-}
-
-static inline bool
-can_do_predicate_writes(const struct brw_screen *screen)
-{
- return screen->kernel_features & KERNEL_ALLOWS_PREDICATE_WRITES;
-}
-
-static inline bool
-can_do_exec_capture(const struct brw_screen *screen)
-{
- return screen->kernel_features & KERNEL_ALLOWS_EXEC_CAPTURE;
-}
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
+++ /dev/null
-/*
- Copyright (C) Intel Corp. 2006. All Rights Reserved.
- Intel funded Tungsten Graphics to
- develop this 3D driver.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
- **********************************************************************/
- /*
- * Authors:
- * Keith Whitwell <keithw@vmware.com>
- */
-
-#include "compiler/nir/nir.h"
-#include "main/macros.h"
-#include "main/mtypes.h"
-#include "main/enums.h"
-#include "main/fbobject.h"
-#include "main/state.h"
-
-#include "brw_batch.h"
-
-#include "brw_defines.h"
-#include "brw_context.h"
-#include "brw_util.h"
-#include "brw_state.h"
-#include "compiler/brw_eu.h"
-
-#include "util/ralloc.h"
-
-static void
-compile_sf_prog(struct brw_context *brw, struct brw_sf_prog_key *key)
-{
- const unsigned *program;
- void *mem_ctx;
- unsigned program_size;
-
- mem_ctx = ralloc_context(NULL);
-
- struct brw_sf_prog_data prog_data;
- program = brw_compile_sf(brw->screen->compiler, mem_ctx, key, &prog_data,
- &brw->vue_map_geom_out, &program_size);
-
- brw_upload_cache(&brw->cache, BRW_CACHE_SF_PROG,
- key, sizeof(*key),
- program, program_size,
- &prog_data, sizeof(prog_data),
- &brw->sf.prog_offset, &brw->sf.prog_data);
- ralloc_free(mem_ctx);
-}
-
-/* Calculate interpolants for triangle and line rasterization.
- */
-void
-brw_upload_sf_prog(struct brw_context *brw)
-{
- struct gl_context *ctx = &brw->ctx;
- struct brw_sf_prog_key key;
-
- if (!brw_state_dirty(brw,
- _NEW_BUFFERS |
- _NEW_HINT |
- _NEW_LIGHT |
- _NEW_POINT |
- _NEW_POLYGON |
- _NEW_PROGRAM |
- _NEW_TRANSFORM,
- BRW_NEW_BLORP |
- BRW_NEW_FS_PROG_DATA |
- BRW_NEW_REDUCED_PRIMITIVE |
- BRW_NEW_VUE_MAP_GEOM_OUT))
- return;
-
- /* _NEW_BUFFERS */
- bool flip_y = ctx->DrawBuffer->FlipY;
-
- memset(&key, 0, sizeof(key));
-
- /* Populate the key, noting state dependencies:
- */
- /* BRW_NEW_VUE_MAP_GEOM_OUT */
- key.attrs = brw->vue_map_geom_out.slots_valid;
-
- /* BRW_NEW_REDUCED_PRIMITIVE */
- switch (brw->reduced_primitive) {
- case GL_TRIANGLES:
- /* NOTE: We just use the edgeflag attribute as an indicator that
- * unfilled triangles are active. We don't actually do the
- * edgeflag testing here, it is already done in the clip
- * program.
- */
- if (key.attrs & BITFIELD64_BIT(VARYING_SLOT_EDGE))
- key.primitive = BRW_SF_PRIM_UNFILLED_TRIS;
- else
- key.primitive = BRW_SF_PRIM_TRIANGLES;
- break;
- case GL_LINES:
- key.primitive = BRW_SF_PRIM_LINES;
- break;
- case GL_POINTS:
- key.primitive = BRW_SF_PRIM_POINTS;
- break;
- }
-
- /* _NEW_TRANSFORM */
- key.userclip_active = (ctx->Transform.ClipPlanesEnabled != 0);
-
- /* _NEW_POINT */
- key.do_point_sprite = ctx->Point.PointSprite;
- if (key.do_point_sprite) {
- key.point_sprite_coord_replace = ctx->Point.CoordReplace & 0xff;
- }
- if (brw->programs[MESA_SHADER_FRAGMENT]->info.inputs_read &
- BITFIELD64_BIT(VARYING_SLOT_PNTC)) {
- key.do_point_coord = 1;
- }
-
- /*
- * Window coordinates in a FBO are inverted, which means point
- * sprite origin must be inverted, too.
- */
- if ((ctx->Point.SpriteOrigin == GL_LOWER_LEFT) == flip_y)
- key.sprite_origin_lower_left = true;
-
- /* BRW_NEW_FS_PROG_DATA */
- const struct brw_wm_prog_data *wm_prog_data =
- brw_wm_prog_data(brw->wm.base.prog_data);
- if (wm_prog_data) {
- key.contains_flat_varying = wm_prog_data->contains_flat_varying;
-
- STATIC_ASSERT(sizeof(key.interp_mode) ==
- sizeof(wm_prog_data->interp_mode));
- memcpy(key.interp_mode, wm_prog_data->interp_mode,
- sizeof(key.interp_mode));
- }
-
- /* _NEW_LIGHT | _NEW_PROGRAM */
- key.do_twoside_color = _mesa_vertex_program_two_side_enabled(ctx);
-
- /* _NEW_POLYGON */
- if (key.do_twoside_color) {
- /* If we're rendering to a FBO, we have to invert the polygon
- * face orientation, just as we invert the viewport in
- * sf_unit_create_from_key().
- */
- key.frontface_ccw = brw->polygon_front_bit != flip_y;
- }
-
- if (!brw_search_cache(&brw->cache, BRW_CACHE_SF_PROG, &key, sizeof(key),
- &brw->sf.prog_offset, &brw->sf.prog_data, true)) {
- compile_sf_prog( brw, &key );
- }
-}
+++ /dev/null
-/*
- * Copyright 2003 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "main/context.h"
-#include "main/macros.h"
-#include "main/enums.h"
-#include "main/dd.h"
-
-#include "brw_screen.h"
-#include "brw_context.h"
-#include "brw_defines.h"
-
-int
-brw_translate_shadow_compare_func(GLenum func)
-{
- /* GL specifies the result of shadow comparisons as:
- * 1 if ref <op> texel,
- * 0 otherwise.
- *
- * The hardware does:
- * 0 if texel <op> ref,
- * 1 otherwise.
- *
- * So, these look a bit strange because there's both a negation
- * and swapping of the arguments involved.
- */
- switch (func) {
- case GL_NEVER:
- return BRW_COMPAREFUNCTION_ALWAYS;
- case GL_LESS:
- return BRW_COMPAREFUNCTION_LEQUAL;
- case GL_LEQUAL:
- return BRW_COMPAREFUNCTION_LESS;
- case GL_GREATER:
- return BRW_COMPAREFUNCTION_GEQUAL;
- case GL_GEQUAL:
- return BRW_COMPAREFUNCTION_GREATER;
- case GL_NOTEQUAL:
- return BRW_COMPAREFUNCTION_EQUAL;
- case GL_EQUAL:
- return BRW_COMPAREFUNCTION_NOTEQUAL;
- case GL_ALWAYS:
- return BRW_COMPAREFUNCTION_NEVER;
- }
-
- unreachable("Invalid shadow comparison function.");
-}
-
-int
-brw_translate_compare_func(GLenum func)
-{
- switch (func) {
- case GL_NEVER:
- return BRW_COMPAREFUNCTION_NEVER;
- case GL_LESS:
- return BRW_COMPAREFUNCTION_LESS;
- case GL_LEQUAL:
- return BRW_COMPAREFUNCTION_LEQUAL;
- case GL_GREATER:
- return BRW_COMPAREFUNCTION_GREATER;
- case GL_GEQUAL:
- return BRW_COMPAREFUNCTION_GEQUAL;
- case GL_NOTEQUAL:
- return BRW_COMPAREFUNCTION_NOTEQUAL;
- case GL_EQUAL:
- return BRW_COMPAREFUNCTION_EQUAL;
- case GL_ALWAYS:
- return BRW_COMPAREFUNCTION_ALWAYS;
- }
-
- unreachable("Invalid comparison function.");
-}
-
-int
-brw_translate_stencil_op(GLenum op)
-{
- switch (op) {
- case GL_KEEP:
- return BRW_STENCILOP_KEEP;
- case GL_ZERO:
- return BRW_STENCILOP_ZERO;
- case GL_REPLACE:
- return BRW_STENCILOP_REPLACE;
- case GL_INCR:
- return BRW_STENCILOP_INCRSAT;
- case GL_DECR:
- return BRW_STENCILOP_DECRSAT;
- case GL_INCR_WRAP:
- return BRW_STENCILOP_INCR;
- case GL_DECR_WRAP:
- return BRW_STENCILOP_DECR;
- case GL_INVERT:
- return BRW_STENCILOP_INVERT;
- default:
- return BRW_STENCILOP_ZERO;
- }
-}
+++ /dev/null
-/*
- Copyright (C) Intel Corp. 2006. All Rights Reserved.
- Intel funded Tungsten Graphics to
- develop this 3D driver.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
- **********************************************************************/
- /*
- * Authors:
- * Keith Whitwell <keithw@vmware.com>
- */
-
-
-#ifndef BRW_STATE_H
-#define BRW_STATE_H
-
-#include "brw_context.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-enum intel_msaa_layout;
-
-extern const struct brw_tracked_state brw_blend_constant_color;
-extern const struct brw_tracked_state brw_clip_unit;
-extern const struct brw_tracked_state brw_vs_pull_constants;
-extern const struct brw_tracked_state brw_tcs_pull_constants;
-extern const struct brw_tracked_state brw_tes_pull_constants;
-extern const struct brw_tracked_state brw_gs_pull_constants;
-extern const struct brw_tracked_state brw_wm_pull_constants;
-extern const struct brw_tracked_state brw_cs_pull_constants;
-extern const struct brw_tracked_state brw_constant_buffer;
-extern const struct brw_tracked_state brw_curbe_offsets;
-extern const struct brw_tracked_state brw_binding_table_pointers;
-extern const struct brw_tracked_state brw_depthbuffer;
-extern const struct brw_tracked_state brw_recalculate_urb_fence;
-extern const struct brw_tracked_state brw_sf_vp;
-extern const struct brw_tracked_state brw_cs_texture_surfaces;
-extern const struct brw_tracked_state brw_vs_ubo_surfaces;
-extern const struct brw_tracked_state brw_vs_image_surfaces;
-extern const struct brw_tracked_state brw_tcs_ubo_surfaces;
-extern const struct brw_tracked_state brw_tcs_image_surfaces;
-extern const struct brw_tracked_state brw_tes_ubo_surfaces;
-extern const struct brw_tracked_state brw_tes_image_surfaces;
-extern const struct brw_tracked_state brw_gs_ubo_surfaces;
-extern const struct brw_tracked_state brw_gs_image_surfaces;
-extern const struct brw_tracked_state brw_renderbuffer_surfaces;
-extern const struct brw_tracked_state brw_renderbuffer_read_surfaces;
-extern const struct brw_tracked_state brw_texture_surfaces;
-extern const struct brw_tracked_state brw_wm_binding_table;
-extern const struct brw_tracked_state brw_gs_binding_table;
-extern const struct brw_tracked_state brw_tes_binding_table;
-extern const struct brw_tracked_state brw_tcs_binding_table;
-extern const struct brw_tracked_state brw_vs_binding_table;
-extern const struct brw_tracked_state brw_wm_ubo_surfaces;
-extern const struct brw_tracked_state brw_wm_image_surfaces;
-extern const struct brw_tracked_state brw_cs_ubo_surfaces;
-extern const struct brw_tracked_state brw_cs_image_surfaces;
-
-extern const struct brw_tracked_state brw_psp_urb_cbs;
-
-extern const struct brw_tracked_state brw_indices;
-extern const struct brw_tracked_state brw_index_buffer;
-extern const struct brw_tracked_state gfx7_cs_push_constants;
-extern const struct brw_tracked_state gfx6_binding_table_pointers;
-extern const struct brw_tracked_state gfx6_gs_binding_table;
-extern const struct brw_tracked_state gfx6_renderbuffer_surfaces;
-extern const struct brw_tracked_state gfx6_sampler_state;
-extern const struct brw_tracked_state gfx6_sol_surface;
-extern const struct brw_tracked_state gfx6_sf_vp;
-extern const struct brw_tracked_state gfx6_urb;
-extern const struct brw_tracked_state gfx7_l3_state;
-extern const struct brw_tracked_state gfx7_push_constant_space;
-extern const struct brw_tracked_state gfx7_urb;
-extern const struct brw_tracked_state gfx8_pma_fix;
-extern const struct brw_tracked_state brw_cs_work_groups_surface;
-
-void gfx4_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
- struct brw_bo *bo, uint32_t offset,
- uint64_t imm);
-void gfx45_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
- struct brw_bo *bo, uint32_t offset,
- uint64_t imm);
-void gfx5_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
- struct brw_bo *bo, uint32_t offset,
- uint64_t imm);
-void gfx6_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
- struct brw_bo *bo, uint32_t offset,
- uint64_t imm);
-void gfx7_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
- struct brw_bo *bo, uint32_t offset,
- uint64_t imm);
-void gfx75_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
- struct brw_bo *bo, uint32_t offset,
- uint64_t imm);
-void gfx8_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
- struct brw_bo *bo, uint32_t offset,
- uint64_t imm);
-void gfx9_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
- struct brw_bo *bo, uint32_t offset,
- uint64_t imm);
-void gfx11_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
- struct brw_bo *bo, uint32_t offset,
- uint64_t imm);
-
-static inline bool
-brw_state_dirty(const struct brw_context *brw,
- GLuint mesa_flags, uint64_t brw_flags)
-{
- return ((brw->NewGLState & mesa_flags) |
- (brw->ctx.NewDriverState & brw_flags)) != 0;
-}
-
-/* brw_binding_tables.c */
-void brw_upload_binding_table(struct brw_context *brw,
- uint32_t packet_name,
- const struct brw_stage_prog_data *prog_data,
- struct brw_stage_state *stage_state);
-
-/* brw_misc_state.c */
-void brw_upload_invariant_state(struct brw_context *brw);
-uint32_t
-brw_depthbuffer_format(struct brw_context *brw);
-
-/* gfx8_depth_state.c */
-void gfx8_write_pma_stall_bits(struct brw_context *brw,
- uint32_t pma_stall_bits);
-
-/* brw_disk_cache.c */
-void brw_disk_cache_init(struct brw_screen *screen);
-bool brw_disk_cache_upload_program(struct brw_context *brw,
- gl_shader_stage stage);
-void brw_disk_cache_write_compute_program(struct brw_context *brw);
-void brw_disk_cache_write_render_programs(struct brw_context *brw);
-
-/***********************************************************************
- * brw_state_upload.c
- */
-void brw_upload_render_state(struct brw_context *brw);
-void brw_render_state_finished(struct brw_context *brw);
-void brw_upload_compute_state(struct brw_context *brw);
-void brw_compute_state_finished(struct brw_context *brw);
-void brw_init_state(struct brw_context *brw);
-void brw_destroy_state(struct brw_context *brw);
-void brw_emit_select_pipeline(struct brw_context *brw,
- enum brw_pipeline pipeline);
-void brw_enable_obj_preemption(struct brw_context *brw, bool enable);
-
-static inline void
-brw_select_pipeline(struct brw_context *brw, enum brw_pipeline pipeline)
-{
- if (unlikely(brw->last_pipeline != pipeline)) {
- assert(pipeline < BRW_NUM_PIPELINES);
- brw_emit_select_pipeline(brw, pipeline);
- brw->last_pipeline = pipeline;
- }
-}
-
-/***********************************************************************
- * brw_program_cache.c
- */
-
-void brw_upload_cache(struct brw_cache *cache,
- enum brw_cache_id cache_id,
- const void *key,
- GLuint key_sz,
- const void *data,
- GLuint data_sz,
- const void *aux,
- GLuint aux_sz,
- uint32_t *out_offset, void *out_aux);
-
-bool brw_search_cache(struct brw_cache *cache, enum brw_cache_id cache_id,
- const void *key, GLuint key_size, uint32_t *inout_offset,
- void *inout_aux, bool flag_state);
-
-const void *brw_find_previous_compile(struct brw_cache *cache,
- enum brw_cache_id cache_id,
- unsigned program_string_id);
-
-void brw_program_cache_check_size(struct brw_context *brw);
-
-void brw_init_caches( struct brw_context *brw );
-void brw_destroy_caches( struct brw_context *brw );
-
-void brw_print_program_cache(struct brw_context *brw);
-
-enum brw_cache_id brw_stage_cache_id(gl_shader_stage stage);
-
-/* brw_batch.c */
-void brw_require_statebuffer_space(struct brw_context *brw, int size);
-void *brw_state_batch(struct brw_context *brw,
- int size, int alignment, uint32_t *out_offset);
-
-/* brw_wm_surface_state.c */
-uint32_t brw_get_surface_tiling_bits(uint32_t tiling);
-uint32_t brw_get_surface_num_multisamples(unsigned num_samples);
-enum isl_format brw_isl_format_for_mesa_format(mesa_format mesa_format);
-
-GLuint translate_tex_target(GLenum target);
-
-enum isl_format translate_tex_format(struct brw_context *brw,
- mesa_format mesa_format,
- GLenum srgb_decode);
-
-int brw_get_texture_swizzle(const struct gl_context *ctx,
- const struct gl_texture_object *t);
-
-void brw_emit_buffer_surface_state(struct brw_context *brw,
- uint32_t *out_offset,
- struct brw_bo *bo,
- unsigned buffer_offset,
- unsigned surface_format,
- unsigned buffer_size,
- unsigned pitch,
- unsigned reloc_flags);
-
-/* brw_sampler_state.c */
-void brw_emit_sampler_state(struct brw_context *brw,
- uint32_t *sampler_state,
- uint32_t batch_offset_for_sampler_state,
- unsigned min_filter,
- unsigned mag_filter,
- unsigned mip_filter,
- unsigned max_anisotropy,
- unsigned address_rounding,
- unsigned wrap_s,
- unsigned wrap_t,
- unsigned wrap_r,
- unsigned base_level,
- unsigned min_lod,
- unsigned max_lod,
- int lod_bias,
- unsigned shadow_function,
- bool non_normalized_coordinates,
- uint32_t border_color_offset);
-
-/* gfx6_constant_state.c */
-void
-brw_populate_constant_data(struct brw_context *brw,
- const struct gl_program *prog,
- const struct brw_stage_state *stage_state,
- void *dst,
- const uint32_t *param,
- unsigned nr_params);
-void
-brw_upload_pull_constants(struct brw_context *brw,
- GLbitfield64 brw_new_constbuf,
- const struct gl_program *prog,
- struct brw_stage_state *stage_state,
- const struct brw_stage_prog_data *prog_data);
-void
-brw_upload_cs_push_constants(struct brw_context *brw,
- const struct gl_program *prog,
- const struct brw_cs_prog_data *cs_prog_data,
- struct brw_stage_state *stage_state);
-
-/* gfx7_vs_state.c */
-void
-gfx7_upload_constant_state(struct brw_context *brw,
- const struct brw_stage_state *stage_state,
- bool active, unsigned opcode);
-
-/* brw_clip.c */
-void brw_upload_clip_prog(struct brw_context *brw);
-
-/* brw_sf.c */
-void brw_upload_sf_prog(struct brw_context *brw);
-
-bool brw_is_drawing_points(const struct brw_context *brw);
-bool brw_is_drawing_lines(const struct brw_context *brw);
-
-/* gfx7_l3_state.c */
-void
-gfx7_restore_default_l3_config(struct brw_context *brw);
-
-static inline bool
-use_state_point_size(const struct brw_context *brw)
-{
- const struct gl_context *ctx = &brw->ctx;
-
- /* Section 14.4 (Points) of the OpenGL 4.5 specification says:
- *
- * "If program point size mode is enabled, the derived point size is
- * taken from the (potentially clipped) shader built-in gl_PointSize
- * written by:
- *
- * * the geometry shader, if active;
- * * the tessellation evaluation shader, if active and no
- * geometry shader is active;
- * * the vertex shader, otherwise
- *
- * and clamped to the implementation-dependent point size range. If
- * the value written to gl_PointSize is less than or equal to zero,
- * or if no value was written to gl_PointSize, results are undefined.
- * If program point size mode is disabled, the derived point size is
- * specified with the command
- *
- * void PointSize(float size);
- *
- * size specifies the requested size of a point. The default value
- * is 1.0."
- *
- * The rules for GLES come from the ES 3.2, OES_geometry_point_size, and
- * OES_tessellation_point_size specifications. To summarize: if the last
- * stage before rasterization is a GS or TES, then use gl_PointSize from
- * the shader if written. Otherwise, use 1.0. If the last stage is a
- * vertex shader, use gl_PointSize, or it is undefined.
- *
- * We can combine these rules into a single condition for both APIs.
- * Using the state point size when the last shader stage doesn't write
- * gl_PointSize satisfies GL's requirements, as it's undefined. Because
- * ES doesn't have a PointSize() command, the state point size will
- * remain 1.0, satisfying the ES default value in the GS/TES case, and
- * the VS case (1.0 works for "undefined"). Mesa sets the program point
- * mode flag to always-enabled in ES, so we can safely check that, and
- * it'll be ignored for ES.
- *
- * _NEW_PROGRAM | _NEW_POINT
- * BRW_NEW_VUE_MAP_GEOM_OUT
- */
- return (!ctx->VertexProgram.PointSizeEnabled && !ctx->Point._Attenuated) ||
- (brw->vue_map_geom_out.slots_valid & VARYING_BIT_PSIZ) == 0;
-}
-
-void brw_copy_pipeline_atoms(struct brw_context *brw,
- enum brw_pipeline pipeline,
- const struct brw_tracked_state **atoms,
- int num_atoms);
-void gfx4_init_atoms(struct brw_context *brw);
-void gfx45_init_atoms(struct brw_context *brw);
-void gfx5_init_atoms(struct brw_context *brw);
-void gfx6_init_atoms(struct brw_context *brw);
-void gfx7_init_atoms(struct brw_context *brw);
-void gfx75_init_atoms(struct brw_context *brw);
-void gfx8_init_atoms(struct brw_context *brw);
-void gfx9_init_atoms(struct brw_context *brw);
-void gfx11_init_atoms(struct brw_context *brw);
-
-static inline uint32_t
-brw_mocs(const struct isl_device *dev, struct brw_bo *bo)
-{
- return isl_mocs(dev, 0, bo && bo->external);
-}
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
+++ /dev/null
-/*
- Copyright (C) Intel Corp. 2006. All Rights Reserved.
- Intel funded Tungsten Graphics to
- develop this 3D driver.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
- **********************************************************************/
- /*
- * Authors:
- * Keith Whitwell <keithw@vmware.com>
- */
-
-
-
-#include "brw_context.h"
-#include "brw_defines.h"
-#include "brw_state.h"
-#include "brw_program.h"
-#include "drivers/common/meta.h"
-#include "brw_batch.h"
-#include "brw_buffers.h"
-#include "brw_vs.h"
-#include "brw_ff_gs.h"
-#include "brw_gs.h"
-#include "brw_wm.h"
-#include "brw_cs.h"
-#include "genxml/genX_bits.h"
-#include "main/framebuffer.h"
-
-void
-brw_enable_obj_preemption(struct brw_context *brw, bool enable)
-{
- ASSERTED const struct intel_device_info *devinfo = &brw->screen->devinfo;
- assert(devinfo->ver >= 9);
-
- if (enable == brw->object_preemption)
- return;
-
- /* A fixed function pipe flush is required before modifying this field */
- brw_emit_end_of_pipe_sync(brw, PIPE_CONTROL_RENDER_TARGET_FLUSH);
-
- bool replay_mode = enable ?
- GFX9_REPLAY_MODE_MIDOBJECT : GFX9_REPLAY_MODE_MIDBUFFER;
-
- /* enable object level preemption */
- brw_load_register_imm32(brw, CS_CHICKEN1,
- replay_mode | GFX9_REPLAY_MODE_MASK);
-
- brw->object_preemption = enable;
-}
-
-static void
-brw_upload_gfx11_slice_hashing_state(struct brw_context *brw)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- int subslices_delta =
- devinfo->ppipe_subslices[0] - devinfo->ppipe_subslices[1];
- if (subslices_delta == 0)
- return;
-
- unsigned size = GFX11_SLICE_HASH_TABLE_length * 4;
- uint32_t hash_address;
-
- uint32_t *map = brw_state_batch(brw, size, 64, &hash_address);
-
- unsigned idx = 0;
-
- unsigned sl_small = 0;
- unsigned sl_big = 1;
- if (subslices_delta > 0) {
- sl_small = 1;
- sl_big = 0;
- }
-
- /**
- * Create a 16x16 slice hashing table like the following one:
- *
- * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
- * [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ]
- * [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ]
- * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
- * [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ]
- * [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ]
- * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
- * [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ]
- * [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ]
- * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
- * [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ]
- * [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ]
- * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
- * [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ]
- * [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ]
- * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
- *
- * The table above is used when the pixel pipe 0 has less subslices than
- * pixel pipe 1. When pixel pipe 0 has more subslices, then a similar table
- * with 0's and 1's inverted is used.
- */
- for (int i = 0; i < GFX11_SLICE_HASH_TABLE_length; i++) {
- uint32_t dw = 0;
-
- for (int j = 0; j < 8; j++) {
- unsigned slice = idx++ % 3 ? sl_big : sl_small;
- dw |= slice << (j * 4);
- }
- map[i] = dw;
- }
-
- BEGIN_BATCH(2);
- OUT_BATCH(_3DSTATE_SLICE_TABLE_STATE_POINTERS << 16 | (2 - 2));
- OUT_RELOC(brw->batch.state.bo, 0, hash_address | 1);
- ADVANCE_BATCH();
-
- /* From gfx10/gfx11 workaround table in h/w specs:
- *
- * "On 3DSTATE_3D_MODE, driver must always program bits 31:16 of DW1
- * a value of 0xFFFF"
- *
- * This means that whenever we update a field with this instruction, we need
- * to update all the others.
- *
- * Since this is the first time we emit this
- * instruction, we are only setting the fSLICE_HASHING_TABLE_ENABLE flag,
- * and leaving everything else at their default state (0).
- */
- BEGIN_BATCH(2);
- OUT_BATCH(_3DSTATE_3D_MODE << 16 | (2 - 2));
- OUT_BATCH(0xffff0000 | SLICE_HASHING_TABLE_ENABLE);
- ADVANCE_BATCH();
-}
-
-static void
-brw_upload_initial_gpu_state(struct brw_context *brw)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- const struct brw_compiler *compiler = brw->screen->compiler;
-
- /* On platforms with hardware contexts, we can set our initial GPU state
- * right away rather than doing it via state atoms. This saves a small
- * amount of overhead on every draw call.
- */
- if (!brw->hw_ctx)
- return;
-
- if (devinfo->ver == 6)
- brw_emit_post_sync_nonzero_flush(brw);
-
- brw_upload_invariant_state(brw);
-
- if (devinfo->ver == 11) {
- /* The default behavior of bit 5 "Headerless Message for Pre-emptable
- * Contexts" in SAMPLER MODE register is set to 0, which means
- * headerless sampler messages are not allowed for pre-emptable
- * contexts. Set the bit 5 to 1 to allow them.
- */
- brw_load_register_imm32(brw, GFX11_SAMPLER_MODE,
- HEADERLESS_MESSAGE_FOR_PREEMPTABLE_CONTEXTS_MASK |
- HEADERLESS_MESSAGE_FOR_PREEMPTABLE_CONTEXTS);
-
- /* Bit 1 "Enabled Texel Offset Precision Fix" must be set in
- * HALF_SLICE_CHICKEN7 register.
- */
- brw_load_register_imm32(brw, HALF_SLICE_CHICKEN7,
- TEXEL_OFFSET_FIX_MASK |
- TEXEL_OFFSET_FIX_ENABLE);
-
- /* Wa_1406697149: Bit 9 "Error Detection Behavior Control" must be set
- * in L3CNTLREG register. The default setting of the bit is not the
- * desirable behavior.
- */
- brw_load_register_imm32(brw, GFX8_L3CNTLREG,
- GFX8_L3CNTLREG_EDBC_NO_HANG);
- }
-
- /* hardware specification recommends disabling repacking for
- * the compatibility with decompression mechanism in display controller.
- */
- if (devinfo->disable_ccs_repack) {
- brw_load_register_imm32(brw, GFX7_CACHE_MODE_0,
- GFX11_DISABLE_REPACKING_FOR_COMPRESSION |
- REG_MASK(GFX11_DISABLE_REPACKING_FOR_COMPRESSION));
- }
-
- if (devinfo->ver == 9) {
- /* Recommended optimizations for Victim Cache eviction and floating
- * point blending.
- */
- brw_load_register_imm32(brw, GFX7_CACHE_MODE_1,
- REG_MASK(GFX9_FLOAT_BLEND_OPTIMIZATION_ENABLE) |
- REG_MASK(GFX9_MSC_RAW_HAZARD_AVOIDANCE_BIT) |
- REG_MASK(GFX9_PARTIAL_RESOLVE_DISABLE_IN_VC) |
- GFX9_FLOAT_BLEND_OPTIMIZATION_ENABLE |
- GFX9_MSC_RAW_HAZARD_AVOIDANCE_BIT |
- GFX9_PARTIAL_RESOLVE_DISABLE_IN_VC);
- }
-
- if (devinfo->ver >= 8) {
- gfx8_emit_3dstate_sample_pattern(brw);
-
- BEGIN_BATCH(5);
- OUT_BATCH(_3DSTATE_WM_HZ_OP << 16 | (5 - 2));
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- ADVANCE_BATCH();
-
- BEGIN_BATCH(2);
- OUT_BATCH(_3DSTATE_WM_CHROMAKEY << 16 | (2 - 2));
- OUT_BATCH(0);
- ADVANCE_BATCH();
- }
-
- /* Set the "CONSTANT_BUFFER Address Offset Disable" bit, so
- * 3DSTATE_CONSTANT_XS buffer 0 is an absolute address.
- *
- * This is only safe on kernels with context isolation support.
- */
- if (!compiler->constant_buffer_0_is_relative) {
- if (devinfo->ver >= 9) {
- BEGIN_BATCH(3);
- OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
- OUT_BATCH(CS_DEBUG_MODE2);
- OUT_BATCH(REG_MASK(CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE) |
- CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE);
- ADVANCE_BATCH();
- } else if (devinfo->ver == 8) {
- BEGIN_BATCH(3);
- OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
- OUT_BATCH(INSTPM);
- OUT_BATCH(REG_MASK(INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE) |
- INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE);
- ADVANCE_BATCH();
- }
- }
-
- brw->object_preemption = false;
-
- if (devinfo->ver >= 10)
- brw_enable_obj_preemption(brw, true);
-
- if (devinfo->ver == 11)
- brw_upload_gfx11_slice_hashing_state(brw);
-}
-
-static inline const struct brw_tracked_state *
-brw_get_pipeline_atoms(struct brw_context *brw,
- enum brw_pipeline pipeline)
-{
- switch (pipeline) {
- case BRW_RENDER_PIPELINE:
- return brw->render_atoms;
- case BRW_COMPUTE_PIPELINE:
- return brw->compute_atoms;
- default:
- STATIC_ASSERT(BRW_NUM_PIPELINES == 2);
- unreachable("Unsupported pipeline");
- return NULL;
- }
-}
-
-void
-brw_copy_pipeline_atoms(struct brw_context *brw,
- enum brw_pipeline pipeline,
- const struct brw_tracked_state **atoms,
- int num_atoms)
-{
- /* This is to work around brw_context::atoms being declared const. We want
- * it to be const, but it needs to be initialized somehow!
- */
- struct brw_tracked_state *context_atoms =
- (struct brw_tracked_state *) brw_get_pipeline_atoms(brw, pipeline);
-
- for (int i = 0; i < num_atoms; i++) {
- context_atoms[i] = *atoms[i];
- assert(context_atoms[i].dirty.mesa | context_atoms[i].dirty.brw);
- assert(context_atoms[i].emit);
- }
-
- brw->num_atoms[pipeline] = num_atoms;
-}
-
-void brw_init_state( struct brw_context *brw )
-{
- struct gl_context *ctx = &brw->ctx;
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- /* Force the first brw_select_pipeline to emit pipeline select */
- brw->last_pipeline = BRW_NUM_PIPELINES;
-
- brw_init_caches(brw);
-
- if (devinfo->ver >= 11)
- gfx11_init_atoms(brw);
- else if (devinfo->ver >= 10)
- unreachable("Gfx10 support dropped.");
- else if (devinfo->ver >= 9)
- gfx9_init_atoms(brw);
- else if (devinfo->ver >= 8)
- gfx8_init_atoms(brw);
- else if (devinfo->verx10 >= 75)
- gfx75_init_atoms(brw);
- else if (devinfo->ver >= 7)
- gfx7_init_atoms(brw);
- else if (devinfo->ver >= 6)
- gfx6_init_atoms(brw);
- else if (devinfo->ver >= 5)
- gfx5_init_atoms(brw);
- else if (devinfo->verx10 >= 45)
- gfx45_init_atoms(brw);
- else
- gfx4_init_atoms(brw);
-
- brw_upload_initial_gpu_state(brw);
-
- brw->NewGLState = ~0;
- brw->ctx.NewDriverState = ~0ull;
-
- /* ~0 is a nonsensical value which won't match anything we program, so
- * the programming will take effect on the first time around.
- */
- brw->pma_stall_bits = ~0;
-
- /* Make sure that brw->ctx.NewDriverState has enough bits to hold all possible
- * dirty flags.
- */
- STATIC_ASSERT(BRW_NUM_STATE_BITS <= 8 * sizeof(brw->ctx.NewDriverState));
-
- ctx->DriverFlags.NewTransformFeedback = BRW_NEW_TRANSFORM_FEEDBACK;
- ctx->DriverFlags.NewTransformFeedbackProg = BRW_NEW_TRANSFORM_FEEDBACK;
- ctx->DriverFlags.NewRasterizerDiscard = BRW_NEW_RASTERIZER_DISCARD;
- ctx->DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER;
- ctx->DriverFlags.NewShaderStorageBuffer = BRW_NEW_UNIFORM_BUFFER;
- ctx->DriverFlags.NewTextureBuffer = BRW_NEW_TEXTURE_BUFFER;
- ctx->DriverFlags.NewAtomicBuffer = BRW_NEW_UNIFORM_BUFFER;
- ctx->DriverFlags.NewImageUnits = BRW_NEW_IMAGE_UNITS;
- ctx->DriverFlags.NewTessState = BRW_NEW_DEFAULT_TESS_LEVELS;
- ctx->DriverFlags.NewIntelConservativeRasterization = BRW_NEW_CONSERVATIVE_RASTERIZATION;
-}
-
-
-void brw_destroy_state( struct brw_context *brw )
-{
- brw_destroy_caches(brw);
-}
-
-/***********************************************************************
- */
-
-static bool
-check_state(const struct brw_state_flags *a, const struct brw_state_flags *b)
-{
- return ((a->mesa & b->mesa) | (a->brw & b->brw)) != 0;
-}
-
-static void
-accumulate_state(struct brw_state_flags *a, const struct brw_state_flags *b)
-{
- a->mesa |= b->mesa;
- a->brw |= b->brw;
-}
-
-
-static void
-xor_states(struct brw_state_flags *result,
- const struct brw_state_flags *a,
- const struct brw_state_flags *b)
-{
- result->mesa = a->mesa ^ b->mesa;
- result->brw = a->brw ^ b->brw;
-}
-
-struct dirty_bit_map {
- uint64_t bit;
- char *name;
- uint32_t count;
-};
-
-#define DEFINE_BIT(name) {name, #name, 0}
-
-static struct dirty_bit_map mesa_bits[] = {
- DEFINE_BIT(_NEW_MODELVIEW),
- DEFINE_BIT(_NEW_PROJECTION),
- DEFINE_BIT(_NEW_TEXTURE_MATRIX),
- DEFINE_BIT(_NEW_COLOR),
- DEFINE_BIT(_NEW_DEPTH),
- DEFINE_BIT(_NEW_FOG),
- DEFINE_BIT(_NEW_HINT),
- DEFINE_BIT(_NEW_LIGHT),
- DEFINE_BIT(_NEW_LINE),
- DEFINE_BIT(_NEW_PIXEL),
- DEFINE_BIT(_NEW_POINT),
- DEFINE_BIT(_NEW_POLYGON),
- DEFINE_BIT(_NEW_POLYGONSTIPPLE),
- DEFINE_BIT(_NEW_SCISSOR),
- DEFINE_BIT(_NEW_STENCIL),
- DEFINE_BIT(_NEW_TEXTURE_OBJECT),
- DEFINE_BIT(_NEW_TRANSFORM),
- DEFINE_BIT(_NEW_VIEWPORT),
- DEFINE_BIT(_NEW_TEXTURE_STATE),
- DEFINE_BIT(_NEW_RENDERMODE),
- DEFINE_BIT(_NEW_BUFFERS),
- DEFINE_BIT(_NEW_CURRENT_ATTRIB),
- DEFINE_BIT(_NEW_MULTISAMPLE),
- DEFINE_BIT(_NEW_TRACK_MATRIX),
- DEFINE_BIT(_NEW_PROGRAM),
- DEFINE_BIT(_NEW_PROGRAM_CONSTANTS),
- DEFINE_BIT(_NEW_FRAG_CLAMP),
- {0, 0, 0}
-};
-
-static struct dirty_bit_map brw_bits[] = {
- DEFINE_BIT(BRW_NEW_FS_PROG_DATA),
- DEFINE_BIT(BRW_NEW_BLORP_BLIT_PROG_DATA),
- DEFINE_BIT(BRW_NEW_SF_PROG_DATA),
- DEFINE_BIT(BRW_NEW_VS_PROG_DATA),
- DEFINE_BIT(BRW_NEW_FF_GS_PROG_DATA),
- DEFINE_BIT(BRW_NEW_GS_PROG_DATA),
- DEFINE_BIT(BRW_NEW_TCS_PROG_DATA),
- DEFINE_BIT(BRW_NEW_TES_PROG_DATA),
- DEFINE_BIT(BRW_NEW_CLIP_PROG_DATA),
- DEFINE_BIT(BRW_NEW_CS_PROG_DATA),
- DEFINE_BIT(BRW_NEW_URB_FENCE),
- DEFINE_BIT(BRW_NEW_FRAGMENT_PROGRAM),
- DEFINE_BIT(BRW_NEW_GEOMETRY_PROGRAM),
- DEFINE_BIT(BRW_NEW_TESS_PROGRAMS),
- DEFINE_BIT(BRW_NEW_VERTEX_PROGRAM),
- DEFINE_BIT(BRW_NEW_REDUCED_PRIMITIVE),
- DEFINE_BIT(BRW_NEW_PATCH_PRIMITIVE),
- DEFINE_BIT(BRW_NEW_PRIMITIVE),
- DEFINE_BIT(BRW_NEW_CONTEXT),
- DEFINE_BIT(BRW_NEW_PSP),
- DEFINE_BIT(BRW_NEW_SURFACES),
- DEFINE_BIT(BRW_NEW_BINDING_TABLE_POINTERS),
- DEFINE_BIT(BRW_NEW_INDICES),
- DEFINE_BIT(BRW_NEW_VERTICES),
- DEFINE_BIT(BRW_NEW_DEFAULT_TESS_LEVELS),
- DEFINE_BIT(BRW_NEW_BATCH),
- DEFINE_BIT(BRW_NEW_INDEX_BUFFER),
- DEFINE_BIT(BRW_NEW_VS_CONSTBUF),
- DEFINE_BIT(BRW_NEW_TCS_CONSTBUF),
- DEFINE_BIT(BRW_NEW_TES_CONSTBUF),
- DEFINE_BIT(BRW_NEW_GS_CONSTBUF),
- DEFINE_BIT(BRW_NEW_PROGRAM_CACHE),
- DEFINE_BIT(BRW_NEW_STATE_BASE_ADDRESS),
- DEFINE_BIT(BRW_NEW_VUE_MAP_GEOM_OUT),
- DEFINE_BIT(BRW_NEW_TRANSFORM_FEEDBACK),
- DEFINE_BIT(BRW_NEW_RASTERIZER_DISCARD),
- DEFINE_BIT(BRW_NEW_STATS_WM),
- DEFINE_BIT(BRW_NEW_UNIFORM_BUFFER),
- DEFINE_BIT(BRW_NEW_IMAGE_UNITS),
- DEFINE_BIT(BRW_NEW_META_IN_PROGRESS),
- DEFINE_BIT(BRW_NEW_PUSH_CONSTANT_ALLOCATION),
- DEFINE_BIT(BRW_NEW_NUM_SAMPLES),
- DEFINE_BIT(BRW_NEW_TEXTURE_BUFFER),
- DEFINE_BIT(BRW_NEW_GFX4_UNIT_STATE),
- DEFINE_BIT(BRW_NEW_CC_VP),
- DEFINE_BIT(BRW_NEW_SF_VP),
- DEFINE_BIT(BRW_NEW_CLIP_VP),
- DEFINE_BIT(BRW_NEW_SAMPLER_STATE_TABLE),
- DEFINE_BIT(BRW_NEW_VS_ATTRIB_WORKAROUNDS),
- DEFINE_BIT(BRW_NEW_COMPUTE_PROGRAM),
- DEFINE_BIT(BRW_NEW_CS_WORK_GROUPS),
- DEFINE_BIT(BRW_NEW_URB_SIZE),
- DEFINE_BIT(BRW_NEW_CC_STATE),
- DEFINE_BIT(BRW_NEW_BLORP),
- DEFINE_BIT(BRW_NEW_VIEWPORT_COUNT),
- DEFINE_BIT(BRW_NEW_CONSERVATIVE_RASTERIZATION),
- DEFINE_BIT(BRW_NEW_DRAW_CALL),
- DEFINE_BIT(BRW_NEW_AUX_STATE),
- {0, 0, 0}
-};
-
-static void
-brw_update_dirty_count(struct dirty_bit_map *bit_map, uint64_t bits)
-{
- for (int i = 0; bit_map[i].bit != 0; i++) {
- if (bit_map[i].bit & bits)
- bit_map[i].count++;
- }
-}
-
-static void
-brw_print_dirty_count(struct dirty_bit_map *bit_map)
-{
- for (int i = 0; bit_map[i].bit != 0; i++) {
- if (bit_map[i].count > 1) {
- fprintf(stderr, "0x%016"PRIx64": %12d (%s)\n",
- bit_map[i].bit, bit_map[i].count, bit_map[i].name);
- }
- }
-}
-
-static inline void
-brw_upload_tess_programs(struct brw_context *brw)
-{
- if (brw->programs[MESA_SHADER_TESS_EVAL]) {
- brw_upload_tcs_prog(brw);
- brw_upload_tes_prog(brw);
- } else {
- brw->tcs.base.prog_data = NULL;
- brw->tes.base.prog_data = NULL;
- }
-}
-
-static inline void
-brw_upload_programs(struct brw_context *brw,
- enum brw_pipeline pipeline)
-{
- struct gl_context *ctx = &brw->ctx;
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- if (pipeline == BRW_RENDER_PIPELINE) {
- brw_upload_vs_prog(brw);
- brw_upload_tess_programs(brw);
-
- if (brw->programs[MESA_SHADER_GEOMETRY]) {
- brw_upload_gs_prog(brw);
- } else {
- brw->gs.base.prog_data = NULL;
- if (devinfo->ver < 7)
- brw_upload_ff_gs_prog(brw);
- }
-
- /* Update the VUE map for data exiting the GS stage of the pipeline.
- * This comes from the last enabled shader stage.
- */
- GLbitfield64 old_slots = brw->vue_map_geom_out.slots_valid;
- bool old_separate = brw->vue_map_geom_out.separate;
- struct brw_vue_prog_data *vue_prog_data;
- if (brw->programs[MESA_SHADER_GEOMETRY])
- vue_prog_data = brw_vue_prog_data(brw->gs.base.prog_data);
- else if (brw->programs[MESA_SHADER_TESS_EVAL])
- vue_prog_data = brw_vue_prog_data(brw->tes.base.prog_data);
- else
- vue_prog_data = brw_vue_prog_data(brw->vs.base.prog_data);
-
- brw->vue_map_geom_out = vue_prog_data->vue_map;
-
- /* If the layout has changed, signal BRW_NEW_VUE_MAP_GEOM_OUT. */
- if (old_slots != brw->vue_map_geom_out.slots_valid ||
- old_separate != brw->vue_map_geom_out.separate)
- brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_GEOM_OUT;
-
- if ((old_slots ^ brw->vue_map_geom_out.slots_valid) &
- VARYING_BIT_VIEWPORT) {
- ctx->NewDriverState |= BRW_NEW_VIEWPORT_COUNT;
- brw->clip.viewport_count =
- (brw->vue_map_geom_out.slots_valid & VARYING_BIT_VIEWPORT) ?
- ctx->Const.MaxViewports : 1;
- }
-
- brw_upload_wm_prog(brw);
-
- if (devinfo->ver < 6) {
- brw_upload_clip_prog(brw);
- brw_upload_sf_prog(brw);
- }
-
- brw_disk_cache_write_render_programs(brw);
- } else if (pipeline == BRW_COMPUTE_PIPELINE) {
- brw_upload_cs_prog(brw);
- brw_disk_cache_write_compute_program(brw);
- }
-}
-
-static inline void
-merge_ctx_state(struct brw_context *brw,
- struct brw_state_flags *state)
-{
- state->mesa |= brw->NewGLState;
- state->brw |= brw->ctx.NewDriverState;
-}
-
-static ALWAYS_INLINE void
-check_and_emit_atom(struct brw_context *brw,
- struct brw_state_flags *state,
- const struct brw_tracked_state *atom)
-{
- if (check_state(state, &atom->dirty)) {
- atom->emit(brw);
- merge_ctx_state(brw, state);
- }
-}
-
-static inline void
-brw_upload_pipeline_state(struct brw_context *brw,
- enum brw_pipeline pipeline)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- struct gl_context *ctx = &brw->ctx;
- int i;
- static int dirty_count = 0;
- struct brw_state_flags state = brw->state.pipelines[pipeline];
- const unsigned fb_samples =
- MAX2(_mesa_geometric_samples(ctx->DrawBuffer), 1);
-
- brw_select_pipeline(brw, pipeline);
-
- if (pipeline == BRW_RENDER_PIPELINE && brw->current_hash_scale != 1)
- brw_emit_hashing_mode(brw, UINT_MAX, UINT_MAX, 1);
-
- if (INTEL_DEBUG(DEBUG_REEMIT)) {
- /* Always re-emit all state. */
- brw->NewGLState = ~0;
- ctx->NewDriverState = ~0ull;
- }
-
- if (pipeline == BRW_RENDER_PIPELINE) {
- if (brw->programs[MESA_SHADER_FRAGMENT] !=
- ctx->FragmentProgram._Current) {
- brw->programs[MESA_SHADER_FRAGMENT] = ctx->FragmentProgram._Current;
- brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
- }
-
- if (brw->programs[MESA_SHADER_TESS_EVAL] !=
- ctx->TessEvalProgram._Current) {
- brw->programs[MESA_SHADER_TESS_EVAL] = ctx->TessEvalProgram._Current;
- brw->ctx.NewDriverState |= BRW_NEW_TESS_PROGRAMS;
- }
-
- if (brw->programs[MESA_SHADER_TESS_CTRL] !=
- ctx->TessCtrlProgram._Current) {
- brw->programs[MESA_SHADER_TESS_CTRL] = ctx->TessCtrlProgram._Current;
- brw->ctx.NewDriverState |= BRW_NEW_TESS_PROGRAMS;
- }
-
- if (brw->programs[MESA_SHADER_GEOMETRY] !=
- ctx->GeometryProgram._Current) {
- brw->programs[MESA_SHADER_GEOMETRY] = ctx->GeometryProgram._Current;
- brw->ctx.NewDriverState |= BRW_NEW_GEOMETRY_PROGRAM;
- }
-
- if (brw->programs[MESA_SHADER_VERTEX] != ctx->VertexProgram._Current) {
- brw->programs[MESA_SHADER_VERTEX] = ctx->VertexProgram._Current;
- brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM;
- }
- }
-
- if (brw->programs[MESA_SHADER_COMPUTE] != ctx->ComputeProgram._Current) {
- brw->programs[MESA_SHADER_COMPUTE] = ctx->ComputeProgram._Current;
- brw->ctx.NewDriverState |= BRW_NEW_COMPUTE_PROGRAM;
- }
-
- if (brw->meta_in_progress != _mesa_meta_in_progress(ctx)) {
- brw->meta_in_progress = _mesa_meta_in_progress(ctx);
- brw->ctx.NewDriverState |= BRW_NEW_META_IN_PROGRESS;
- }
-
- if (brw->num_samples != fb_samples) {
- brw->num_samples = fb_samples;
- brw->ctx.NewDriverState |= BRW_NEW_NUM_SAMPLES;
- }
-
- /* Exit early if no state is flagged as dirty */
- merge_ctx_state(brw, &state);
- if ((state.mesa | state.brw) == 0)
- return;
-
- /* Emit Sandybridge workaround flushes on every primitive, for safety. */
- if (devinfo->ver == 6)
- brw_emit_post_sync_nonzero_flush(brw);
-
- brw_upload_programs(brw, pipeline);
- merge_ctx_state(brw, &state);
-
- brw->vtbl.emit_state_base_address(brw);
-
- const struct brw_tracked_state *atoms =
- brw_get_pipeline_atoms(brw, pipeline);
- const int num_atoms = brw->num_atoms[pipeline];
-
- if (INTEL_DEBUG(DEBUG_ANY)) {
- /* Debug version which enforces various sanity checks on the
- * state flags which are generated and checked to help ensure
- * state atoms are ordered correctly in the list.
- */
- struct brw_state_flags examined, prev;
- memset(&examined, 0, sizeof(examined));
- prev = state;
-
- for (i = 0; i < num_atoms; i++) {
- const struct brw_tracked_state *atom = &atoms[i];
- struct brw_state_flags generated;
-
- check_and_emit_atom(brw, &state, atom);
-
- accumulate_state(&examined, &atom->dirty);
-
- /* generated = (prev ^ state)
- * if (examined & generated)
- * fail;
- */
- xor_states(&generated, &prev, &state);
- assert(!check_state(&examined, &generated));
- prev = state;
- }
- }
- else {
- for (i = 0; i < num_atoms; i++) {
- const struct brw_tracked_state *atom = &atoms[i];
-
- check_and_emit_atom(brw, &state, atom);
- }
- }
-
- if (INTEL_DEBUG(DEBUG_STATE)) {
- STATIC_ASSERT(ARRAY_SIZE(brw_bits) == BRW_NUM_STATE_BITS + 1);
-
- brw_update_dirty_count(mesa_bits, state.mesa);
- brw_update_dirty_count(brw_bits, state.brw);
- if (dirty_count++ % 1000 == 0) {
- brw_print_dirty_count(mesa_bits);
- brw_print_dirty_count(brw_bits);
- fprintf(stderr, "\n");
- }
- }
-}
-
-/***********************************************************************
- * Emit all state:
- */
-void brw_upload_render_state(struct brw_context *brw)
-{
- brw_upload_pipeline_state(brw, BRW_RENDER_PIPELINE);
-}
-
-static inline void
-brw_pipeline_state_finished(struct brw_context *brw,
- enum brw_pipeline pipeline)
-{
- /* Save all dirty state into the other pipelines */
- for (unsigned i = 0; i < BRW_NUM_PIPELINES; i++) {
- if (i != pipeline) {
- brw->state.pipelines[i].mesa |= brw->NewGLState;
- brw->state.pipelines[i].brw |= brw->ctx.NewDriverState;
- } else {
- memset(&brw->state.pipelines[i], 0, sizeof(struct brw_state_flags));
- }
- }
-
- brw->NewGLState = 0;
- brw->ctx.NewDriverState = 0ull;
-}
-
-/**
- * Clear dirty bits to account for the fact that the state emitted by
- * brw_upload_render_state() has been committed to the hardware. This is a
- * separate call from brw_upload_render_state() because it's possible that
- * after the call to brw_upload_render_state(), we will discover that we've
- * run out of aperture space, and need to rewind the batch buffer to the state
- * it had before the brw_upload_render_state() call.
- */
-void
-brw_render_state_finished(struct brw_context *brw)
-{
- brw_pipeline_state_finished(brw, BRW_RENDER_PIPELINE);
-}
-
-void
-brw_upload_compute_state(struct brw_context *brw)
-{
- brw_upload_pipeline_state(brw, BRW_COMPUTE_PIPELINE);
-}
-
-void
-brw_compute_state_finished(struct brw_context *brw)
-{
- brw_pipeline_state_finished(brw, BRW_COMPUTE_PIPELINE);
-}
+++ /dev/null
-/*
- Copyright (C) Intel Corp. 2006. All Rights Reserved.
- Intel funded Tungsten Graphics to
- develop this 3D driver.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
- **********************************************************************/
- /*
- * Authors:
- * Keith Whitwell <keithw@vmware.com>
- */
-
-
-#ifndef BRW_STRUCTS_H
-#define BRW_STRUCTS_H
-
-struct brw_urb_fence
-{
- struct
- {
- unsigned length:8;
- unsigned vs_realloc:1;
- unsigned gs_realloc:1;
- unsigned clp_realloc:1;
- unsigned sf_realloc:1;
- unsigned vfe_realloc:1;
- unsigned cs_realloc:1;
- unsigned pad:2;
- unsigned opcode:16;
- } header;
-
- struct
- {
- unsigned vs_fence:10;
- unsigned gs_fence:10;
- unsigned clp_fence:10;
- unsigned pad:2;
- } bits0;
-
- struct
- {
- unsigned sf_fence:10;
- unsigned vf_fence:10;
- unsigned cs_fence:11;
- unsigned pad:1;
- } bits1;
-};
-
-#endif
+++ /dev/null
-/*
- * Copyright © 2011 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-#include "main/mtypes.h"
-
-#include "isl/isl.h"
-
-#include "brw_context.h"
-#include "brw_state.h"
-#include "brw_defines.h"
-
-enum isl_format
-brw_isl_format_for_mesa_format(mesa_format mesa_format)
-{
- /* This table is ordered according to the enum ordering in formats.h. We do
- * expect that enum to be extended without our explicit initialization
- * staying in sync, so we initialize to 0 even though
- * ISL_FORMAT_R32G32B32A32_FLOAT happens to also be 0.
- */
- static const enum isl_format table[MESA_FORMAT_COUNT] = {
- [0 ... MESA_FORMAT_COUNT-1] = ISL_FORMAT_UNSUPPORTED,
-
- [MESA_FORMAT_R8G8B8A8_UNORM] = ISL_FORMAT_R8G8B8A8_UNORM,
- [MESA_FORMAT_B8G8R8A8_UNORM] = ISL_FORMAT_B8G8R8A8_UNORM,
- [MESA_FORMAT_R8G8B8X8_UNORM] = ISL_FORMAT_R8G8B8X8_UNORM,
- [MESA_FORMAT_B8G8R8X8_UNORM] = ISL_FORMAT_B8G8R8X8_UNORM,
- [MESA_FORMAT_RGB_UNORM8] = ISL_FORMAT_R8G8B8_UNORM,
- [MESA_FORMAT_B5G6R5_UNORM] = ISL_FORMAT_B5G6R5_UNORM,
- [MESA_FORMAT_B4G4R4A4_UNORM] = ISL_FORMAT_B4G4R4A4_UNORM,
- [MESA_FORMAT_B5G5R5A1_UNORM] = ISL_FORMAT_B5G5R5A1_UNORM,
- [MESA_FORMAT_LA_UNORM8] = ISL_FORMAT_L8A8_UNORM,
- [MESA_FORMAT_LA_UNORM16] = ISL_FORMAT_L16A16_UNORM,
- [MESA_FORMAT_A_UNORM8] = ISL_FORMAT_A8_UNORM,
- [MESA_FORMAT_A_UNORM16] = ISL_FORMAT_A16_UNORM,
- [MESA_FORMAT_L_UNORM8] = ISL_FORMAT_L8_UNORM,
- [MESA_FORMAT_L_UNORM16] = ISL_FORMAT_L16_UNORM,
- [MESA_FORMAT_I_UNORM8] = ISL_FORMAT_I8_UNORM,
- [MESA_FORMAT_I_UNORM16] = ISL_FORMAT_I16_UNORM,
- [MESA_FORMAT_YCBCR_REV] = ISL_FORMAT_YCRCB_NORMAL,
- [MESA_FORMAT_YCBCR] = ISL_FORMAT_YCRCB_SWAPUVY,
- [MESA_FORMAT_R_UNORM8] = ISL_FORMAT_R8_UNORM,
- [MESA_FORMAT_RG_UNORM8] = ISL_FORMAT_R8G8_UNORM,
- [MESA_FORMAT_R_UNORM16] = ISL_FORMAT_R16_UNORM,
- [MESA_FORMAT_RG_UNORM16] = ISL_FORMAT_R16G16_UNORM,
- [MESA_FORMAT_B10G10R10A2_UNORM] = ISL_FORMAT_B10G10R10A2_UNORM,
- [MESA_FORMAT_S_UINT8] = ISL_FORMAT_R8_UINT,
-
- [MESA_FORMAT_B8G8R8A8_SRGB] = ISL_FORMAT_B8G8R8A8_UNORM_SRGB,
- [MESA_FORMAT_R8G8B8A8_SRGB] = ISL_FORMAT_R8G8B8A8_UNORM_SRGB,
- [MESA_FORMAT_B8G8R8X8_SRGB] = ISL_FORMAT_B8G8R8X8_UNORM_SRGB,
- [MESA_FORMAT_R_SRGB8] = ISL_FORMAT_L8_UNORM_SRGB,
- [MESA_FORMAT_L_SRGB8] = ISL_FORMAT_L8_UNORM_SRGB,
- [MESA_FORMAT_LA_SRGB8] = ISL_FORMAT_L8A8_UNORM_SRGB,
- [MESA_FORMAT_SRGB_DXT1] = ISL_FORMAT_BC1_UNORM_SRGB,
- [MESA_FORMAT_SRGBA_DXT1] = ISL_FORMAT_BC1_UNORM_SRGB,
- [MESA_FORMAT_SRGBA_DXT3] = ISL_FORMAT_BC2_UNORM_SRGB,
- [MESA_FORMAT_SRGBA_DXT5] = ISL_FORMAT_BC3_UNORM_SRGB,
-
- [MESA_FORMAT_RGB_FXT1] = ISL_FORMAT_FXT1,
- [MESA_FORMAT_RGBA_FXT1] = ISL_FORMAT_FXT1,
- [MESA_FORMAT_RGB_DXT1] = ISL_FORMAT_BC1_UNORM,
- [MESA_FORMAT_RGBA_DXT1] = ISL_FORMAT_BC1_UNORM,
- [MESA_FORMAT_RGBA_DXT3] = ISL_FORMAT_BC2_UNORM,
- [MESA_FORMAT_RGBA_DXT5] = ISL_FORMAT_BC3_UNORM,
-
- [MESA_FORMAT_RGBA_FLOAT32] = ISL_FORMAT_R32G32B32A32_FLOAT,
- [MESA_FORMAT_RGBA_FLOAT16] = ISL_FORMAT_R16G16B16A16_FLOAT,
- [MESA_FORMAT_RGB_FLOAT32] = ISL_FORMAT_R32G32B32_FLOAT,
- [MESA_FORMAT_A_FLOAT32] = ISL_FORMAT_A32_FLOAT,
- [MESA_FORMAT_A_FLOAT16] = ISL_FORMAT_A16_FLOAT,
- [MESA_FORMAT_L_FLOAT32] = ISL_FORMAT_L32_FLOAT,
- [MESA_FORMAT_L_FLOAT16] = ISL_FORMAT_L16_FLOAT,
- [MESA_FORMAT_LA_FLOAT32] = ISL_FORMAT_L32A32_FLOAT,
- [MESA_FORMAT_LA_FLOAT16] = ISL_FORMAT_L16A16_FLOAT,
- [MESA_FORMAT_I_FLOAT32] = ISL_FORMAT_I32_FLOAT,
- [MESA_FORMAT_I_FLOAT16] = ISL_FORMAT_I16_FLOAT,
- [MESA_FORMAT_R_FLOAT32] = ISL_FORMAT_R32_FLOAT,
- [MESA_FORMAT_R_FLOAT16] = ISL_FORMAT_R16_FLOAT,
- [MESA_FORMAT_RG_FLOAT32] = ISL_FORMAT_R32G32_FLOAT,
- [MESA_FORMAT_RG_FLOAT16] = ISL_FORMAT_R16G16_FLOAT,
-
- [MESA_FORMAT_R_SINT8] = ISL_FORMAT_R8_SINT,
- [MESA_FORMAT_RG_SINT8] = ISL_FORMAT_R8G8_SINT,
- [MESA_FORMAT_RGB_SINT8] = ISL_FORMAT_R8G8B8_SINT,
- [MESA_FORMAT_RGBA_SINT8] = ISL_FORMAT_R8G8B8A8_SINT,
- [MESA_FORMAT_R_SINT16] = ISL_FORMAT_R16_SINT,
- [MESA_FORMAT_RG_SINT16] = ISL_FORMAT_R16G16_SINT,
- [MESA_FORMAT_RGB_SINT16] = ISL_FORMAT_R16G16B16_SINT,
- [MESA_FORMAT_RGBA_SINT16] = ISL_FORMAT_R16G16B16A16_SINT,
- [MESA_FORMAT_R_SINT32] = ISL_FORMAT_R32_SINT,
- [MESA_FORMAT_RG_SINT32] = ISL_FORMAT_R32G32_SINT,
- [MESA_FORMAT_RGB_SINT32] = ISL_FORMAT_R32G32B32_SINT,
- [MESA_FORMAT_RGBA_SINT32] = ISL_FORMAT_R32G32B32A32_SINT,
-
- [MESA_FORMAT_R_UINT8] = ISL_FORMAT_R8_UINT,
- [MESA_FORMAT_RG_UINT8] = ISL_FORMAT_R8G8_UINT,
- [MESA_FORMAT_RGB_UINT8] = ISL_FORMAT_R8G8B8_UINT,
- [MESA_FORMAT_RGBA_UINT8] = ISL_FORMAT_R8G8B8A8_UINT,
- [MESA_FORMAT_R_UINT16] = ISL_FORMAT_R16_UINT,
- [MESA_FORMAT_RG_UINT16] = ISL_FORMAT_R16G16_UINT,
- [MESA_FORMAT_RGB_UINT16] = ISL_FORMAT_R16G16B16_UINT,
- [MESA_FORMAT_RGBA_UINT16] = ISL_FORMAT_R16G16B16A16_UINT,
- [MESA_FORMAT_R_UINT32] = ISL_FORMAT_R32_UINT,
- [MESA_FORMAT_RG_UINT32] = ISL_FORMAT_R32G32_UINT,
- [MESA_FORMAT_RGB_UINT32] = ISL_FORMAT_R32G32B32_UINT,
- [MESA_FORMAT_RGBA_UINT32] = ISL_FORMAT_R32G32B32A32_UINT,
-
- [MESA_FORMAT_R_SNORM8] = ISL_FORMAT_R8_SNORM,
- [MESA_FORMAT_RG_SNORM8] = ISL_FORMAT_R8G8_SNORM,
- [MESA_FORMAT_R8G8B8A8_SNORM] = ISL_FORMAT_R8G8B8A8_SNORM,
- [MESA_FORMAT_R_SNORM16] = ISL_FORMAT_R16_SNORM,
- [MESA_FORMAT_RG_SNORM16] = ISL_FORMAT_R16G16_SNORM,
- [MESA_FORMAT_RGB_SNORM16] = ISL_FORMAT_R16G16B16_SNORM,
- [MESA_FORMAT_RGBA_SNORM16] = ISL_FORMAT_R16G16B16A16_SNORM,
- [MESA_FORMAT_RGBA_UNORM16] = ISL_FORMAT_R16G16B16A16_UNORM,
-
- [MESA_FORMAT_R_RGTC1_UNORM] = ISL_FORMAT_BC4_UNORM,
- [MESA_FORMAT_R_RGTC1_SNORM] = ISL_FORMAT_BC4_SNORM,
- [MESA_FORMAT_RG_RGTC2_UNORM] = ISL_FORMAT_BC5_UNORM,
- [MESA_FORMAT_RG_RGTC2_SNORM] = ISL_FORMAT_BC5_SNORM,
-
- [MESA_FORMAT_ETC1_RGB8] = ISL_FORMAT_ETC1_RGB8,
- [MESA_FORMAT_ETC2_RGB8] = ISL_FORMAT_ETC2_RGB8,
- [MESA_FORMAT_ETC2_SRGB8] = ISL_FORMAT_ETC2_SRGB8,
- [MESA_FORMAT_ETC2_RGBA8_EAC] = ISL_FORMAT_ETC2_EAC_RGBA8,
- [MESA_FORMAT_ETC2_SRGB8_ALPHA8_EAC] = ISL_FORMAT_ETC2_EAC_SRGB8_A8,
- [MESA_FORMAT_ETC2_R11_EAC] = ISL_FORMAT_EAC_R11,
- [MESA_FORMAT_ETC2_RG11_EAC] = ISL_FORMAT_EAC_RG11,
- [MESA_FORMAT_ETC2_SIGNED_R11_EAC] = ISL_FORMAT_EAC_SIGNED_R11,
- [MESA_FORMAT_ETC2_SIGNED_RG11_EAC] = ISL_FORMAT_EAC_SIGNED_RG11,
- [MESA_FORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1] = ISL_FORMAT_ETC2_RGB8_PTA,
- [MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1] = ISL_FORMAT_ETC2_SRGB8_PTA,
-
- [MESA_FORMAT_BPTC_RGBA_UNORM] = ISL_FORMAT_BC7_UNORM,
- [MESA_FORMAT_BPTC_SRGB_ALPHA_UNORM] = ISL_FORMAT_BC7_UNORM_SRGB,
- [MESA_FORMAT_BPTC_RGB_SIGNED_FLOAT] = ISL_FORMAT_BC6H_SF16,
- [MESA_FORMAT_BPTC_RGB_UNSIGNED_FLOAT] = ISL_FORMAT_BC6H_UF16,
-
- [MESA_FORMAT_RGBA_ASTC_4x4] = ISL_FORMAT_ASTC_LDR_2D_4X4_FLT16,
- [MESA_FORMAT_RGBA_ASTC_5x4] = ISL_FORMAT_ASTC_LDR_2D_5X4_FLT16,
- [MESA_FORMAT_RGBA_ASTC_5x5] = ISL_FORMAT_ASTC_LDR_2D_5X5_FLT16,
- [MESA_FORMAT_RGBA_ASTC_6x5] = ISL_FORMAT_ASTC_LDR_2D_6X5_FLT16,
- [MESA_FORMAT_RGBA_ASTC_6x6] = ISL_FORMAT_ASTC_LDR_2D_6X6_FLT16,
- [MESA_FORMAT_RGBA_ASTC_8x5] = ISL_FORMAT_ASTC_LDR_2D_8X5_FLT16,
- [MESA_FORMAT_RGBA_ASTC_8x6] = ISL_FORMAT_ASTC_LDR_2D_8X6_FLT16,
- [MESA_FORMAT_RGBA_ASTC_8x8] = ISL_FORMAT_ASTC_LDR_2D_8X8_FLT16,
- [MESA_FORMAT_RGBA_ASTC_10x5] = ISL_FORMAT_ASTC_LDR_2D_10X5_FLT16,
- [MESA_FORMAT_RGBA_ASTC_10x6] = ISL_FORMAT_ASTC_LDR_2D_10X6_FLT16,
- [MESA_FORMAT_RGBA_ASTC_10x8] = ISL_FORMAT_ASTC_LDR_2D_10X8_FLT16,
- [MESA_FORMAT_RGBA_ASTC_10x10] = ISL_FORMAT_ASTC_LDR_2D_10X10_FLT16,
- [MESA_FORMAT_RGBA_ASTC_12x10] = ISL_FORMAT_ASTC_LDR_2D_12X10_FLT16,
- [MESA_FORMAT_RGBA_ASTC_12x12] = ISL_FORMAT_ASTC_LDR_2D_12X12_FLT16,
- [MESA_FORMAT_SRGB8_ALPHA8_ASTC_4x4] = ISL_FORMAT_ASTC_LDR_2D_4X4_U8SRGB,
- [MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x4] = ISL_FORMAT_ASTC_LDR_2D_5X4_U8SRGB,
- [MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5] = ISL_FORMAT_ASTC_LDR_2D_5X5_U8SRGB,
- [MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x5] = ISL_FORMAT_ASTC_LDR_2D_6X5_U8SRGB,
- [MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x6] = ISL_FORMAT_ASTC_LDR_2D_6X6_U8SRGB,
- [MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x5] = ISL_FORMAT_ASTC_LDR_2D_8X5_U8SRGB,
- [MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x6] = ISL_FORMAT_ASTC_LDR_2D_8X6_U8SRGB,
- [MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x8] = ISL_FORMAT_ASTC_LDR_2D_8X8_U8SRGB,
- [MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x5] = ISL_FORMAT_ASTC_LDR_2D_10X5_U8SRGB,
- [MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x6] = ISL_FORMAT_ASTC_LDR_2D_10X6_U8SRGB,
- [MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x8] = ISL_FORMAT_ASTC_LDR_2D_10X8_U8SRGB,
- [MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x10] = ISL_FORMAT_ASTC_LDR_2D_10X10_U8SRGB,
- [MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x10] = ISL_FORMAT_ASTC_LDR_2D_12X10_U8SRGB,
- [MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x12] = ISL_FORMAT_ASTC_LDR_2D_12X12_U8SRGB,
-
- [MESA_FORMAT_R9G9B9E5_FLOAT] = ISL_FORMAT_R9G9B9E5_SHAREDEXP,
- [MESA_FORMAT_R11G11B10_FLOAT] = ISL_FORMAT_R11G11B10_FLOAT,
-
- [MESA_FORMAT_R10G10B10A2_UNORM] = ISL_FORMAT_R10G10B10A2_UNORM,
- [MESA_FORMAT_B10G10R10A2_UINT] = ISL_FORMAT_B10G10R10A2_UINT,
- [MESA_FORMAT_R10G10B10A2_UINT] = ISL_FORMAT_R10G10B10A2_UINT,
-
- [MESA_FORMAT_B5G5R5X1_UNORM] = ISL_FORMAT_B5G5R5X1_UNORM,
- [MESA_FORMAT_R8G8B8X8_SRGB] = ISL_FORMAT_R8G8B8X8_UNORM_SRGB,
- [MESA_FORMAT_B10G10R10X2_UNORM] = ISL_FORMAT_B10G10R10X2_UNORM,
- [MESA_FORMAT_RGBX_UNORM16] = ISL_FORMAT_R16G16B16X16_UNORM,
- [MESA_FORMAT_RGBX_FLOAT16] = ISL_FORMAT_R16G16B16X16_FLOAT,
- [MESA_FORMAT_RGBX_FLOAT32] = ISL_FORMAT_R32G32B32X32_FLOAT,
- };
-
- assert(mesa_format < MESA_FORMAT_COUNT);
- return table[mesa_format];
-}
-
-void
-brw_screen_init_surface_formats(struct brw_screen *screen)
-{
- const struct intel_device_info *devinfo = &screen->devinfo;
- mesa_format format;
-
- memset(&screen->mesa_format_supports_texture, 0,
- sizeof(screen->mesa_format_supports_texture));
-
- for (format = MESA_FORMAT_NONE + 1; format < MESA_FORMAT_COUNT; format++) {
- if (!_mesa_get_format_name(format))
- continue;
- enum isl_format texture, render;
- bool is_integer = _mesa_is_format_integer_color(format);
-
- render = texture = brw_isl_format_for_mesa_format(format);
-
- /* Only exposed with EXT_memory_object_* support which
- * is not for older gens.
- */
- if (devinfo->ver < 7 && format == MESA_FORMAT_Z_UNORM16)
- continue;
-
- if (texture == ISL_FORMAT_UNSUPPORTED)
- continue;
-
- /* Don't advertise 8 and 16-bit RGB formats to core mesa. This ensures
- * that they are renderable from an API perspective since core mesa will
- * fall back to RGBA or RGBX (we can't render to non-power-of-two
- * formats). For 8-bit, formats, this also keeps us from hitting some
- * nasty corners in brw_miptree_map_blit if you ever try to map one.
- */
- int format_size = _mesa_get_format_bytes(format);
- if (format_size == 3 || format_size == 6)
- continue;
-
- if (isl_format_supports_sampling(devinfo, texture) &&
- (isl_format_supports_filtering(devinfo, texture) || is_integer))
- screen->mesa_format_supports_texture[format] = true;
-
- /* Re-map some render target formats to make them supported when they
- * wouldn't be using their format for texturing.
- */
- switch (render) {
- /* For these formats, we just need to read/write the first
- * channel into R, which is to say that we just treat them as
- * GL_RED.
- */
- case ISL_FORMAT_I32_FLOAT:
- case ISL_FORMAT_L32_FLOAT:
- render = ISL_FORMAT_R32_FLOAT;
- break;
- case ISL_FORMAT_I16_FLOAT:
- case ISL_FORMAT_L16_FLOAT:
- render = ISL_FORMAT_R16_FLOAT;
- break;
- case ISL_FORMAT_I8_UNORM:
- case ISL_FORMAT_L8_UNORM:
- render = ISL_FORMAT_R8_UNORM;
- break;
- case ISL_FORMAT_I16_UNORM:
- case ISL_FORMAT_L16_UNORM:
- render = ISL_FORMAT_R16_UNORM;
- break;
- case ISL_FORMAT_R16G16B16X16_UNORM:
- render = ISL_FORMAT_R16G16B16A16_UNORM;
- break;
- case ISL_FORMAT_R16G16B16X16_FLOAT:
- render = ISL_FORMAT_R16G16B16A16_FLOAT;
- break;
- case ISL_FORMAT_B8G8R8X8_UNORM:
- /* XRGB is handled as ARGB because the chips in this family
- * cannot render to XRGB targets. This means that we have to
- * mask writes to alpha (ala glColorMask) and reconfigure the
- * alpha blending hardware to use GL_ONE (or GL_ZERO) for
- * cases where GL_DST_ALPHA (or GL_ONE_MINUS_DST_ALPHA) is
- * used. On Gfx8+ BGRX is actually allowed (but not RGBX).
- */
- if (!isl_format_supports_rendering(devinfo, texture))
- render = ISL_FORMAT_B8G8R8A8_UNORM;
- break;
- case ISL_FORMAT_B8G8R8X8_UNORM_SRGB:
- if (!isl_format_supports_rendering(devinfo, texture))
- render = ISL_FORMAT_B8G8R8A8_UNORM_SRGB;
- break;
- case ISL_FORMAT_R8G8B8X8_UNORM:
- render = ISL_FORMAT_R8G8B8A8_UNORM;
- break;
- case ISL_FORMAT_R8G8B8X8_UNORM_SRGB:
- render = ISL_FORMAT_R8G8B8A8_UNORM_SRGB;
- break;
- default:
- break;
- }
-
- /* Note that GL_EXT_texture_integer says that blending doesn't occur for
- * integer, so we don't need hardware support for blending on it. Other
- * than that, GL in general requires alpha blending for render targets,
- * even though we don't support it for some formats.
- */
- if (isl_format_supports_rendering(devinfo, render) &&
- (isl_format_supports_alpha_blending(devinfo, render) || is_integer)) {
- screen->mesa_to_isl_render_format[format] = render;
- screen->mesa_format_supports_render[format] = true;
- }
- }
-
- /* We will check this table for FBO completeness, but the surface format
- * table above only covered color rendering.
- */
- screen->mesa_format_supports_render[MESA_FORMAT_Z24_UNORM_S8_UINT] = true;
- screen->mesa_format_supports_render[MESA_FORMAT_Z24_UNORM_X8_UINT] = true;
- screen->mesa_format_supports_render[MESA_FORMAT_S_UINT8] = true;
- screen->mesa_format_supports_render[MESA_FORMAT_Z_FLOAT32] = true;
- screen->mesa_format_supports_render[MESA_FORMAT_Z32_FLOAT_S8X24_UINT] = true;
- if (devinfo->ver >= 8)
- screen->mesa_format_supports_render[MESA_FORMAT_Z_UNORM16] = true;
-
- /* We remap depth formats to a supported texturing format in
- * translate_tex_format().
- */
- screen->mesa_format_supports_texture[MESA_FORMAT_Z24_UNORM_S8_UINT] = true;
- screen->mesa_format_supports_texture[MESA_FORMAT_Z24_UNORM_X8_UINT] = true;
- screen->mesa_format_supports_texture[MESA_FORMAT_Z_FLOAT32] = true;
- screen->mesa_format_supports_texture[MESA_FORMAT_Z32_FLOAT_S8X24_UINT] = true;
- screen->mesa_format_supports_texture[MESA_FORMAT_S_UINT8] = true;
-
- /* Benchmarking shows that Z16 is slower than Z24, so there's no reason to
- * use it unless you're under memory (not memory bandwidth) pressure.
- *
- * Apparently, the GPU's depth scoreboarding works on a 32-bit granularity,
- * which corresponds to one pixel in the depth buffer for Z24 or Z32 formats.
- * However, it corresponds to two pixels with Z16, which means both need to
- * hit the early depth case in order for it to happen.
- *
- * Other speculation is that we may be hitting increased fragment shader
- * execution from GL_LEQUAL/GL_EQUAL depth tests at reduced precision.
- *
- * With the PMA stall workaround in place, Z16 is faster than Z24, as it
- * should be.
- */
- if (devinfo->ver >= 8)
- screen->mesa_format_supports_texture[MESA_FORMAT_Z_UNORM16] = true;
-
- /* The RGBX formats are not renderable. Normally these get mapped
- * internally to RGBA formats when rendering. However on Gfx9+ when this
- * internal override is used fast clears don't work so they are disabled in
- * brw_meta_fast_clear. To avoid this problem we can just pretend not to
- * support RGBX formats at all. This will cause the upper layers of Mesa to
- * pick the RGBA formats instead. This works fine because when it is used
- * as a texture source the swizzle state is programmed to force the alpha
- * channel to 1.0 anyway. We could also do this for all gens except that
- * it's a bit more difficult when the hardware doesn't support texture
- * swizzling. Gens using the blorp have further problems because that
- * doesn't implement this swizzle override. We don't need to do this for
- * BGRX because that actually is supported natively on Gfx8+.
- */
- if (devinfo->ver >= 9) {
- static const mesa_format rgbx_formats[] = {
- MESA_FORMAT_R8G8B8X8_UNORM,
- MESA_FORMAT_R8G8B8X8_SRGB,
- MESA_FORMAT_RGBX_UNORM16,
- MESA_FORMAT_RGBX_FLOAT16,
- MESA_FORMAT_RGBX_FLOAT32
- };
-
- for (int i = 0; i < ARRAY_SIZE(rgbx_formats); i++) {
- screen->mesa_format_supports_texture[rgbx_formats[i]] = false;
- screen->mesa_format_supports_render[rgbx_formats[i]] = false;
- }
- }
-
- /* On hardware that lacks support for ETC1, we map ETC1 to RGBX
- * during glCompressedTexImage2D(). See brw_mipmap_tree::wraps_etc1.
- */
- screen->mesa_format_supports_texture[MESA_FORMAT_ETC1_RGB8] = true;
-
- /* On hardware that lacks support for ETC2, we map ETC2 to a suitable
- * MESA_FORMAT during glCompressedTexImage2D().
- * See brw_mipmap_tree::wraps_etc2.
- */
- screen->mesa_format_supports_texture[MESA_FORMAT_ETC2_RGB8] = true;
- screen->mesa_format_supports_texture[MESA_FORMAT_ETC2_SRGB8] = true;
- screen->mesa_format_supports_texture[MESA_FORMAT_ETC2_RGBA8_EAC] = true;
- screen->mesa_format_supports_texture[MESA_FORMAT_ETC2_SRGB8_ALPHA8_EAC] = true;
- screen->mesa_format_supports_texture[MESA_FORMAT_ETC2_R11_EAC] = true;
- screen->mesa_format_supports_texture[MESA_FORMAT_ETC2_RG11_EAC] = true;
- screen->mesa_format_supports_texture[MESA_FORMAT_ETC2_SIGNED_R11_EAC] = true;
- screen->mesa_format_supports_texture[MESA_FORMAT_ETC2_SIGNED_RG11_EAC] = true;
- screen->mesa_format_supports_texture[MESA_FORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1] = true;
- screen->mesa_format_supports_texture[MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1] = true;
-}
-
-void
-brw_init_surface_formats(struct brw_context *brw)
-{
- struct brw_screen *screen = brw->screen;
- struct gl_context *ctx = &brw->ctx;
-
- brw->mesa_format_supports_render = screen->mesa_format_supports_render;
- brw->mesa_to_isl_render_format = screen->mesa_to_isl_render_format;
-
- STATIC_ASSERT(ARRAY_SIZE(ctx->TextureFormatSupported) ==
- ARRAY_SIZE(screen->mesa_format_supports_texture));
-
- for (unsigned i = 0; i < ARRAY_SIZE(ctx->TextureFormatSupported); ++i) {
- ctx->TextureFormatSupported[i] = screen->mesa_format_supports_texture[i];
- }
-}
-
-bool
-brw_render_target_supported(struct brw_context *brw,
- struct gl_renderbuffer *rb)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- mesa_format format = rb->Format;
-
- /* Many integer formats are promoted to RGBA (like XRGB8888 is), which means
- * we would consider them renderable even though we don't have surface
- * support for their alpha behavior and don't have the blending unit
- * available to fake it like we do for XRGB8888. Force them to being
- * unsupported.
- */
- if (_mesa_is_format_integer_color(format) &&
- rb->_BaseFormat != GL_RGBA &&
- rb->_BaseFormat != GL_RG &&
- rb->_BaseFormat != GL_RED)
- return false;
-
- /* Under some conditions, MSAA is not supported for formats whose width is
- * more than 64 bits.
- */
- if (devinfo->ver < 8 &&
- rb->NumSamples > 0 && _mesa_get_format_bytes(format) > 8) {
- /* Gfx6: MSAA on >64 bit formats is unsupported. */
- if (devinfo->ver <= 6)
- return false;
-
- /* Gfx7: 8x MSAA on >64 bit formats is unsupported. */
- if (rb->NumSamples >= 8)
- return false;
- }
-
- return brw->mesa_format_supports_render[format];
-}
-
-enum isl_format
-translate_tex_format(struct brw_context *brw,
- mesa_format mesa_format,
- GLenum srgb_decode)
-{
- struct gl_context *ctx = &brw->ctx;
- if (srgb_decode == GL_SKIP_DECODE_EXT)
- mesa_format = _mesa_get_srgb_format_linear(mesa_format);
-
- switch( mesa_format ) {
-
- case MESA_FORMAT_Z_UNORM16:
- return ISL_FORMAT_R16_UNORM;
-
- case MESA_FORMAT_Z24_UNORM_S8_UINT:
- case MESA_FORMAT_Z24_UNORM_X8_UINT:
- return ISL_FORMAT_R24_UNORM_X8_TYPELESS;
-
- case MESA_FORMAT_Z_FLOAT32:
- return ISL_FORMAT_R32_FLOAT;
-
- case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
- return ISL_FORMAT_R32_FLOAT_X8X24_TYPELESS;
-
- case MESA_FORMAT_RGBA_FLOAT32:
- /* The value of this ISL surface format is 0, which tricks the
- * assertion below.
- */
- return ISL_FORMAT_R32G32B32A32_FLOAT;
-
- case MESA_FORMAT_RGBA_ASTC_4x4:
- case MESA_FORMAT_RGBA_ASTC_5x4:
- case MESA_FORMAT_RGBA_ASTC_5x5:
- case MESA_FORMAT_RGBA_ASTC_6x5:
- case MESA_FORMAT_RGBA_ASTC_6x6:
- case MESA_FORMAT_RGBA_ASTC_8x5:
- case MESA_FORMAT_RGBA_ASTC_8x6:
- case MESA_FORMAT_RGBA_ASTC_8x8:
- case MESA_FORMAT_RGBA_ASTC_10x5:
- case MESA_FORMAT_RGBA_ASTC_10x6:
- case MESA_FORMAT_RGBA_ASTC_10x8:
- case MESA_FORMAT_RGBA_ASTC_10x10:
- case MESA_FORMAT_RGBA_ASTC_12x10:
- case MESA_FORMAT_RGBA_ASTC_12x12: {
- enum isl_format isl_fmt =
- brw_isl_format_for_mesa_format(mesa_format);
-
- /**
- * It is possible to process these formats using the LDR Profile
- * or the Full Profile mode of the hardware. Because, it isn't
- * possible to determine if an HDR or LDR texture is being rendered, we
- * can't determine which mode to enable in the hardware. Therefore, to
- * handle all cases, always default to Full profile unless we are
- * processing sRGBs, which are incompatible with this mode.
- */
- if (ctx->Extensions.KHR_texture_compression_astc_hdr)
- isl_fmt |= GFX9_SURFACE_ASTC_HDR_FORMAT_BIT;
-
- return isl_fmt;
- }
-
- default:
- return brw_isl_format_for_mesa_format(mesa_format);
- }
-}
-
-/**
- * Convert a MESA_FORMAT to the corresponding BRW_DEPTHFORMAT enum.
- */
-uint32_t
-brw_depth_format(struct brw_context *brw, mesa_format format)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- switch (format) {
- case MESA_FORMAT_Z_UNORM16:
- return BRW_DEPTHFORMAT_D16_UNORM;
- case MESA_FORMAT_Z_FLOAT32:
- return BRW_DEPTHFORMAT_D32_FLOAT;
- case MESA_FORMAT_Z24_UNORM_X8_UINT:
- if (devinfo->ver >= 6) {
- return BRW_DEPTHFORMAT_D24_UNORM_X8_UINT;
- } else {
- /* Use D24_UNORM_S8, not D24_UNORM_X8.
- *
- * D24_UNORM_X8 was not introduced until Gfx5. (See the Ironlake PRM,
- * Volume 2, Part 1, Section 8.4.6 "Depth/Stencil Buffer State", Bits
- * 3DSTATE_DEPTH_BUFFER.Surface_Format).
- *
- * However, on Gfx5, D24_UNORM_X8 may be used only if separate
- * stencil is enabled, and we never enable it. From the Ironlake PRM,
- * same section as above, 3DSTATE_DEPTH_BUFFER's
- * "Separate Stencil Buffer Enable" bit:
- *
- * "If this field is disabled, the Surface Format of the depth
- * buffer cannot be D24_UNORM_X8_UINT."
- */
- return BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
- }
- case MESA_FORMAT_Z24_UNORM_S8_UINT:
- return BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
- case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
- return BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT;
- default:
- unreachable("Unexpected depth format.");
- }
-}
+++ /dev/null
-/*
- * Copyright © 2008 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- * Eric Anholt <eric@anholt.net>
- *
- */
-
-/**
- * \file
- * \brief Support for GL_ARB_sync and EGL_KHR_fence_sync.
- *
- * GL_ARB_sync is implemented by flushing the current batchbuffer and keeping a
- * reference on it. We can then check for completion or wait for completion
- * using the normal buffer object mechanisms. This does mean that if an
- * application is using many sync objects, it will emit small batchbuffers
- * which may end up being a significant overhead. In other tests of removing
- * gratuitous batchbuffer syncs in Mesa, it hasn't appeared to be a significant
- * performance bottleneck, though.
- */
-
-#include <libsync.h> /* Requires Android or libdrm-2.4.72 */
-
-#include "util/os_file.h"
-#include "util/u_memory.h"
-#include <xf86drm.h>
-
-#include "brw_context.h"
-#include "brw_batch.h"
-#include "mesa/main/externalobjects.h"
-
-struct brw_fence {
- struct brw_context *brw;
-
- enum brw_fence_type {
- /** The fence waits for completion of brw_fence::batch_bo. */
- BRW_FENCE_TYPE_BO_WAIT,
-
- /** The fence waits for brw_fence::sync_fd to signal. */
- BRW_FENCE_TYPE_SYNC_FD,
- } type;
-
- union {
- struct brw_bo *batch_bo;
-
- /* This struct owns the fd. */
- int sync_fd;
- };
-
- mtx_t mutex;
- bool signalled;
-};
-
-struct brw_gl_sync {
- struct gl_sync_object gl;
- struct brw_fence fence;
-};
-
-struct intel_semaphore_object {
- struct gl_semaphore_object Base;
- struct drm_syncobj_handle *syncobj;
-};
-
-static inline struct intel_semaphore_object *
-intel_semaphore_object(struct gl_semaphore_object *sem_obj) {
- return (struct intel_semaphore_object*) sem_obj;
-}
-
-static struct gl_semaphore_object *
-intel_semaphoreobj_alloc(struct gl_context *ctx, GLuint name)
-{
- struct intel_semaphore_object *is_obj = CALLOC_STRUCT(intel_semaphore_object);
- if (!is_obj)
- return NULL;
-
- _mesa_initialize_semaphore_object(ctx, &is_obj->Base, name);
- return &is_obj->Base;
-}
-
-static void
-intel_semaphoreobj_free(struct gl_context *ctx,
- struct gl_semaphore_object *semObj)
-{
- _mesa_delete_semaphore_object(ctx, semObj);
-}
-
-static void
-intel_semaphoreobj_import(struct gl_context *ctx,
- struct gl_semaphore_object *semObj,
- int fd)
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_screen *screen = brw->screen;
- struct intel_semaphore_object *iSemObj = intel_semaphore_object(semObj);
- iSemObj->syncobj = CALLOC_STRUCT(drm_syncobj_handle);
- iSemObj->syncobj->fd = fd;
-
- if (drmIoctl(screen->fd, DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, iSemObj->syncobj) < 0) {
- fprintf(stderr, "DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE failed: %s\n",
- strerror(errno));
- free(iSemObj->syncobj);
- }
-}
-
-static void
-intel_semaphoreobj_signal(struct gl_context *ctx,
- struct gl_semaphore_object *semObj,
- GLuint numBufferBarriers,
- struct gl_buffer_object **bufObjs,
- GLuint numTextureBarriers,
- struct gl_texture_object **texObjs,
- const GLenum *dstLayouts)
-{
- struct brw_context *brw = brw_context(ctx);
- struct intel_semaphore_object *iSemObj = intel_semaphore_object(semObj);
- struct drm_i915_gem_exec_fence *fence =
- util_dynarray_grow(&brw->batch.exec_fences, struct drm_i915_gem_exec_fence *, 1);
- fence->flags = I915_EXEC_FENCE_SIGNAL;
- fence->handle = iSemObj->syncobj->handle;
- brw->batch.contains_fence_signal = true;
-}
-
-static void
-intel_semaphoreobj_wait(struct gl_context *ctx,
- struct gl_semaphore_object *semObj,
- GLuint numBufferBarriers,
- struct gl_buffer_object **bufObjs,
- GLuint numTextureBarriers,
- struct gl_texture_object **texObjs,
- const GLenum *srcLayouts)
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_screen *screen = brw->screen;
- struct intel_semaphore_object *iSemObj = intel_semaphore_object(semObj);
- struct drm_syncobj_wait args = {
- .handles = (uintptr_t)&iSemObj->syncobj->handle,
- .count_handles = 1,
- };
-
- drmIoctl(screen->fd, DRM_IOCTL_SYNCOBJ_WAIT, &args);
-}
-
-static void
-brw_fence_init(struct brw_context *brw, struct brw_fence *fence,
- enum brw_fence_type type)
-{
- fence->brw = brw;
- fence->type = type;
- mtx_init(&fence->mutex, mtx_plain);
-
- switch (type) {
- case BRW_FENCE_TYPE_BO_WAIT:
- fence->batch_bo = NULL;
- break;
- case BRW_FENCE_TYPE_SYNC_FD:
- fence->sync_fd = -1;
- break;
- }
-}
-
-static void
-brw_fence_finish(struct brw_fence *fence)
-{
- switch (fence->type) {
- case BRW_FENCE_TYPE_BO_WAIT:
- if (fence->batch_bo)
- brw_bo_unreference(fence->batch_bo);
- break;
- case BRW_FENCE_TYPE_SYNC_FD:
- if (fence->sync_fd != -1)
- close(fence->sync_fd);
- break;
- }
-
- mtx_destroy(&fence->mutex);
-}
-
-static bool MUST_CHECK
-brw_fence_insert_locked(struct brw_context *brw, struct brw_fence *fence)
-{
- __DRIcontext *driContext = brw->driContext;
- __DRIdrawable *driDrawable = driContext->driDrawablePriv;
-
- /*
- * From KHR_fence_sync:
- *
- * When the condition of the sync object is satisfied by the fence
- * command, the sync is signaled by the associated client API context,
- * causing any eglClientWaitSyncKHR commands (see below) blocking on
- * <sync> to unblock. The only condition currently supported is
- * EGL_SYNC_PRIOR_COMMANDS_COMPLETE_KHR, which is satisfied by
- * completion of the fence command corresponding to the sync object,
- * and all preceding commands in the associated client API context's
- * command stream. The sync object will not be signaled until all
- * effects from these commands on the client API's internal and
- * framebuffer state are fully realized. No other state is affected by
- * execution of the fence command.
- *
- * Note the emphasis there on ensuring that the framebuffer is fully
- * realised before the fence is signaled. We cannot just flush the batch,
- * but must also resolve the drawable first. The importance of this is,
- * for example, in creating a fence for a frame to be passed to a
- * remote compositor. Without us flushing the drawable explicitly, the
- * resolve will be in a following batch (when the client finally calls
- * SwapBuffers, or triggers a resolve via some other path) and so the
- * compositor may read the incomplete framebuffer instead.
- */
- if (driDrawable)
- brw_resolve_for_dri2_flush(brw, driDrawable);
- brw_emit_mi_flush(brw);
-
- switch (fence->type) {
- case BRW_FENCE_TYPE_BO_WAIT:
- assert(!fence->batch_bo);
- assert(!fence->signalled);
-
- fence->batch_bo = brw->batch.batch.bo;
- brw_bo_reference(fence->batch_bo);
-
- if (brw_batch_flush(brw) < 0) {
- brw_bo_unreference(fence->batch_bo);
- fence->batch_bo = NULL;
- return false;
- }
- break;
- case BRW_FENCE_TYPE_SYNC_FD:
- assert(!fence->signalled);
-
- if (fence->sync_fd == -1) {
- /* Create an out-fence that signals after all pending commands
- * complete.
- */
- if (brw_batch_flush_fence(brw, -1, &fence->sync_fd) < 0)
- return false;
- assert(fence->sync_fd != -1);
- } else {
- /* Wait on the in-fence before executing any subsequently submitted
- * commands.
- */
- if (brw_batch_flush(brw) < 0)
- return false;
-
- /* Emit a dummy batch just for the fence. */
- brw_emit_mi_flush(brw);
- if (brw_batch_flush_fence(brw, fence->sync_fd, NULL) < 0)
- return false;
- }
- break;
- }
-
- return true;
-}
-
-static bool MUST_CHECK
-brw_fence_insert(struct brw_context *brw, struct brw_fence *fence)
-{
- bool ret;
-
- mtx_lock(&fence->mutex);
- ret = brw_fence_insert_locked(brw, fence);
- mtx_unlock(&fence->mutex);
-
- return ret;
-}
-
-static bool
-brw_fence_has_completed_locked(struct brw_fence *fence)
-{
- if (fence->signalled)
- return true;
-
- switch (fence->type) {
- case BRW_FENCE_TYPE_BO_WAIT:
- if (!fence->batch_bo) {
- /* There may be no batch if brw_batch_flush() failed. */
- return false;
- }
-
- if (brw_bo_busy(fence->batch_bo))
- return false;
-
- brw_bo_unreference(fence->batch_bo);
- fence->batch_bo = NULL;
- fence->signalled = true;
-
- return true;
-
- case BRW_FENCE_TYPE_SYNC_FD:
- assert(fence->sync_fd != -1);
-
- if (sync_wait(fence->sync_fd, 0) == -1)
- return false;
-
- fence->signalled = true;
-
- return true;
- }
-
- return false;
-}
-
-static bool
-brw_fence_has_completed(struct brw_fence *fence)
-{
- bool ret;
-
- mtx_lock(&fence->mutex);
- ret = brw_fence_has_completed_locked(fence);
- mtx_unlock(&fence->mutex);
-
- return ret;
-}
-
-static bool
-brw_fence_client_wait_locked(struct brw_context *brw, struct brw_fence *fence,
- uint64_t timeout)
-{
- int32_t timeout_i32;
-
- if (fence->signalled)
- return true;
-
- switch (fence->type) {
- case BRW_FENCE_TYPE_BO_WAIT:
- if (!fence->batch_bo) {
- /* There may be no batch if brw_batch_flush() failed. */
- return false;
- }
-
- /* DRM_IOCTL_I915_GEM_WAIT uses a signed 64 bit timeout and returns
- * immediately for timeouts <= 0. The best we can do is to clamp the
- * timeout to INT64_MAX. This limits the maximum timeout from 584 years to
- * 292 years - likely not a big deal.
- */
- if (timeout > INT64_MAX)
- timeout = INT64_MAX;
-
- if (brw_bo_wait(fence->batch_bo, timeout) != 0)
- return false;
-
- fence->signalled = true;
- brw_bo_unreference(fence->batch_bo);
- fence->batch_bo = NULL;
-
- return true;
- case BRW_FENCE_TYPE_SYNC_FD:
- if (fence->sync_fd == -1)
- return false;
-
- if (timeout > INT32_MAX)
- timeout_i32 = -1;
- else
- timeout_i32 = timeout;
-
- if (sync_wait(fence->sync_fd, timeout_i32) == -1)
- return false;
-
- fence->signalled = true;
- return true;
- }
-
- assert(!"bad enum brw_fence_type");
- return false;
-}
-
-/**
- * Return true if the function successfully signals or has already signalled.
- * (This matches the behavior expected from __DRI2fence::client_wait_sync).
- */
-static bool
-brw_fence_client_wait(struct brw_context *brw, struct brw_fence *fence,
- uint64_t timeout)
-{
- bool ret;
-
- mtx_lock(&fence->mutex);
- ret = brw_fence_client_wait_locked(brw, fence, timeout);
- mtx_unlock(&fence->mutex);
-
- return ret;
-}
-
-static void
-brw_fence_server_wait(struct brw_context *brw, struct brw_fence *fence)
-{
- switch (fence->type) {
- case BRW_FENCE_TYPE_BO_WAIT:
- /* We have nothing to do for WaitSync. Our GL command stream is sequential,
- * so given that the sync object has already flushed the batchbuffer, any
- * batchbuffers coming after this waitsync will naturally not occur until
- * the previous one is done.
- */
- break;
- case BRW_FENCE_TYPE_SYNC_FD:
- assert(fence->sync_fd != -1);
-
- /* The user wants explicit synchronization, so give them what they want. */
- if (!brw_fence_insert(brw, fence)) {
- /* FIXME: There exists no way yet to report an error here. If an error
- * occurs, continue silently and hope for the best.
- */
- }
- break;
- }
-}
-
-static struct gl_sync_object *
-brw_gl_new_sync(struct gl_context *ctx)
-{
- struct brw_gl_sync *sync;
-
- sync = calloc(1, sizeof(*sync));
- if (!sync)
- return NULL;
-
- return &sync->gl;
-}
-
-static void
-brw_gl_delete_sync(struct gl_context *ctx, struct gl_sync_object *_sync)
-{
- struct brw_gl_sync *sync = (struct brw_gl_sync *) _sync;
-
- brw_fence_finish(&sync->fence);
- free(sync->gl.Label);
- free(sync);
-}
-
-static void
-brw_gl_fence_sync(struct gl_context *ctx, struct gl_sync_object *_sync,
- GLenum condition, GLbitfield flags)
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_gl_sync *sync = (struct brw_gl_sync *) _sync;
-
- /* brw_fence_insert_locked() assumes it must do a complete flush */
- assert(condition == GL_SYNC_GPU_COMMANDS_COMPLETE);
-
- brw_fence_init(brw, &sync->fence, BRW_FENCE_TYPE_BO_WAIT);
-
- if (!brw_fence_insert_locked(brw, &sync->fence)) {
- /* FIXME: There exists no way to report a GL error here. If an error
- * occurs, continue silently and hope for the best.
- */
- }
-}
-
-static void
-brw_gl_client_wait_sync(struct gl_context *ctx, struct gl_sync_object *_sync,
- GLbitfield flags, GLuint64 timeout)
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_gl_sync *sync = (struct brw_gl_sync *) _sync;
-
- if (brw_fence_client_wait(brw, &sync->fence, timeout))
- sync->gl.StatusFlag = 1;
-}
-
-static void
-brw_gl_server_wait_sync(struct gl_context *ctx, struct gl_sync_object *_sync,
- GLbitfield flags, GLuint64 timeout)
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_gl_sync *sync = (struct brw_gl_sync *) _sync;
-
- brw_fence_server_wait(brw, &sync->fence);
-}
-
-static void
-brw_gl_check_sync(struct gl_context *ctx, struct gl_sync_object *_sync)
-{
- struct brw_gl_sync *sync = (struct brw_gl_sync *) _sync;
-
- if (brw_fence_has_completed(&sync->fence))
- sync->gl.StatusFlag = 1;
-}
-
-void
-brw_init_syncobj_functions(struct dd_function_table *functions)
-{
- functions->NewSyncObject = brw_gl_new_sync;
- functions->DeleteSyncObject = brw_gl_delete_sync;
- functions->FenceSync = brw_gl_fence_sync;
- functions->CheckSync = brw_gl_check_sync;
- functions->ClientWaitSync = brw_gl_client_wait_sync;
- functions->ServerWaitSync = brw_gl_server_wait_sync;
- functions->NewSemaphoreObject = intel_semaphoreobj_alloc;
- functions->DeleteSemaphoreObject = intel_semaphoreobj_free;
- functions->ImportSemaphoreFd = intel_semaphoreobj_import;
- functions->ServerSignalSemaphoreObject = intel_semaphoreobj_signal;
- functions->ServerWaitSemaphoreObject = intel_semaphoreobj_wait;
-}
-
-static void *
-brw_dri_create_fence(__DRIcontext *ctx)
-{
- struct brw_context *brw = ctx->driverPrivate;
- struct brw_fence *fence;
-
- fence = calloc(1, sizeof(*fence));
- if (!fence)
- return NULL;
-
- brw_fence_init(brw, fence, BRW_FENCE_TYPE_BO_WAIT);
-
- if (!brw_fence_insert_locked(brw, fence)) {
- brw_fence_finish(fence);
- free(fence);
- return NULL;
- }
-
- return fence;
-}
-
-static void
-brw_dri_destroy_fence(__DRIscreen *dri_screen, void *_fence)
-{
- struct brw_fence *fence = _fence;
-
- brw_fence_finish(fence);
- free(fence);
-}
-
-static GLboolean
-brw_dri_client_wait_sync(__DRIcontext *ctx, void *_fence, unsigned flags,
- uint64_t timeout)
-{
- struct brw_fence *fence = _fence;
-
- return brw_fence_client_wait(fence->brw, fence, timeout);
-}
-
-static void
-brw_dri_server_wait_sync(__DRIcontext *ctx, void *_fence, unsigned flags)
-{
- struct brw_fence *fence = _fence;
-
- /* We might be called here with a NULL fence as a result of WaitSyncKHR
- * on a EGL_KHR_reusable_sync fence. Nothing to do here in such case.
- */
- if (!fence)
- return;
-
- brw_fence_server_wait(fence->brw, fence);
-}
-
-static unsigned
-brw_dri_get_capabilities(__DRIscreen *dri_screen)
-{
- struct brw_screen *screen = dri_screen->driverPrivate;
- unsigned caps = 0;
-
- if (screen->has_exec_fence)
- caps |= __DRI_FENCE_CAP_NATIVE_FD;
-
- return caps;
-}
-
-static void *
-brw_dri_create_fence_fd(__DRIcontext *dri_ctx, int fd)
-{
- struct brw_context *brw = dri_ctx->driverPrivate;
- struct brw_fence *fence;
-
- assert(brw->screen->has_exec_fence);
-
- fence = calloc(1, sizeof(*fence));
- if (!fence)
- return NULL;
-
- brw_fence_init(brw, fence, BRW_FENCE_TYPE_SYNC_FD);
-
- if (fd == -1) {
- /* Create an out-fence fd */
- if (!brw_fence_insert_locked(brw, fence))
- goto fail;
- } else {
- /* Import the sync fd as an in-fence. */
- fence->sync_fd = os_dupfd_cloexec(fd);
- }
-
- assert(fence->sync_fd != -1);
-
- return fence;
-
-fail:
- brw_fence_finish(fence);
- free(fence);
- return NULL;
-}
-
-static int
-brw_dri_get_fence_fd_locked(struct brw_fence *fence)
-{
- assert(fence->type == BRW_FENCE_TYPE_SYNC_FD);
- return os_dupfd_cloexec(fence->sync_fd);
-}
-
-static int
-brw_dri_get_fence_fd(__DRIscreen *dri_screen, void *_fence)
-{
- struct brw_fence *fence = _fence;
- int fd;
-
- mtx_lock(&fence->mutex);
- fd = brw_dri_get_fence_fd_locked(fence);
- mtx_unlock(&fence->mutex);
-
- return fd;
-}
-
-const __DRI2fenceExtension brwFenceExtension = {
- .base = { __DRI2_FENCE, 2 },
-
- .create_fence = brw_dri_create_fence,
- .destroy_fence = brw_dri_destroy_fence,
- .client_wait_sync = brw_dri_client_wait_sync,
- .server_wait_sync = brw_dri_server_wait_sync,
- .get_fence_from_cl_event = NULL,
- .get_capabilities = brw_dri_get_capabilities,
- .create_fence_fd = brw_dri_create_fence_fd,
- .get_fence_fd = brw_dri_get_fence_fd,
-};
+++ /dev/null
-/*
- * Copyright © 2014 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * \file brw_tcs.c
- *
- * Tessellation control shader state upload code.
- */
-
-#include "brw_context.h"
-#include "compiler/brw_nir.h"
-#include "brw_program.h"
-#include "brw_state.h"
-#include "program/prog_parameter.h"
-#include "nir_builder.h"
-
-static bool
-brw_codegen_tcs_prog(struct brw_context *brw, struct brw_program *tcp,
- struct brw_program *tep, struct brw_tcs_prog_key *key)
-{
- struct gl_context *ctx = &brw->ctx;
- const struct brw_compiler *compiler = brw->screen->compiler;
- const struct intel_device_info *devinfo = compiler->devinfo;
- struct brw_stage_state *stage_state = &brw->tcs.base;
- nir_shader *nir;
- struct brw_tcs_prog_data prog_data;
- bool start_busy = false;
- double start_time = 0;
-
- void *mem_ctx = ralloc_context(NULL);
- if (tcp) {
- nir = nir_shader_clone(mem_ctx, tcp->program.nir);
- } else {
- const nir_shader_compiler_options *options =
- ctx->Const.ShaderCompilerOptions[MESA_SHADER_TESS_CTRL].NirOptions;
- nir = brw_nir_create_passthrough_tcs(mem_ctx, compiler, options, key);
- }
-
- memset(&prog_data, 0, sizeof(prog_data));
-
- if (tcp) {
- brw_assign_common_binding_table_offsets(devinfo, &tcp->program,
- &prog_data.base.base, 0);
-
- brw_nir_setup_glsl_uniforms(mem_ctx, nir, &tcp->program,
- &prog_data.base.base,
- compiler->scalar_stage[MESA_SHADER_TESS_CTRL]);
- if (brw->can_push_ubos) {
- brw_nir_analyze_ubo_ranges(compiler, nir, NULL,
- prog_data.base.base.ubo_ranges);
- }
- } else {
- /* Upload the Patch URB Header as the first two uniforms.
- * Do the annoying scrambling so the shader doesn't have to.
- */
- assert(nir->num_uniforms == 32);
- prog_data.base.base.param = rzalloc_array(mem_ctx, uint32_t, 8);
- prog_data.base.base.nr_params = 8;
-
- uint32_t *param = prog_data.base.base.param;
- for (int i = 0; i < 8; i++)
- param[i] = BRW_PARAM_BUILTIN_ZERO;
-
- if (key->tes_primitive_mode == GL_QUADS) {
- for (int i = 0; i < 4; i++)
- param[7 - i] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i;
-
- param[3] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X;
- param[2] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_Y;
- } else if (key->tes_primitive_mode == GL_TRIANGLES) {
- for (int i = 0; i < 3; i++)
- param[7 - i] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i;
-
- param[4] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X;
- } else {
- assert(key->tes_primitive_mode == GL_ISOLINES);
- param[7] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_Y;
- param[6] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X;
- }
- }
-
- int st_index = -1;
- if (INTEL_DEBUG(DEBUG_SHADER_TIME) && tep)
- st_index = brw_get_shader_time_index(brw, &tep->program, ST_TCS, true);
-
- if (unlikely(brw->perf_debug)) {
- start_busy = brw->batch.last_bo && brw_bo_busy(brw->batch.last_bo);
- start_time = get_time();
- }
-
- char *error_str;
- const unsigned *program =
- brw_compile_tcs(compiler, brw, mem_ctx, key, &prog_data, nir, st_index,
- NULL, &error_str);
- if (program == NULL) {
- if (tep) {
- tep->program.sh.data->LinkStatus = LINKING_FAILURE;
- ralloc_strcat(&tep->program.sh.data->InfoLog, error_str);
- }
-
- _mesa_problem(NULL, "Failed to compile tessellation control shader: "
- "%s\n", error_str);
-
- ralloc_free(mem_ctx);
- return false;
- }
-
- if (unlikely(brw->perf_debug)) {
- if (tcp) {
- if (tcp->compiled_once) {
- brw_debug_recompile(brw, MESA_SHADER_TESS_CTRL, tcp->program.Id,
- &key->base);
- }
- tcp->compiled_once = true;
- }
-
- if (start_busy && !brw_bo_busy(brw->batch.last_bo)) {
- perf_debug("TCS compile took %.03f ms and stalled the GPU\n",
- (get_time() - start_time) * 1000);
- }
- }
-
- /* Scratch space is used for register spilling */
- brw_alloc_stage_scratch(brw, stage_state,
- prog_data.base.base.total_scratch);
-
- /* The param and pull_param arrays will be freed by the shader cache. */
- ralloc_steal(NULL, prog_data.base.base.param);
- ralloc_steal(NULL, prog_data.base.base.pull_param);
- brw_upload_cache(&brw->cache, BRW_CACHE_TCS_PROG,
- key, sizeof(*key),
- program, prog_data.base.base.program_size,
- &prog_data, sizeof(prog_data),
- &stage_state->prog_offset, &brw->tcs.base.prog_data);
- ralloc_free(mem_ctx);
-
- return true;
-}
-
-void
-brw_tcs_populate_key(struct brw_context *brw,
- struct brw_tcs_prog_key *key)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- const struct brw_compiler *compiler = brw->screen->compiler;
- struct brw_program *tcp =
- (struct brw_program *) brw->programs[MESA_SHADER_TESS_CTRL];
- struct brw_program *tep =
- (struct brw_program *) brw->programs[MESA_SHADER_TESS_EVAL];
- struct gl_program *tes_prog = &tep->program;
-
- uint64_t per_vertex_slots = tes_prog->info.inputs_read;
- uint32_t per_patch_slots = tes_prog->info.patch_inputs_read;
-
- memset(key, 0, sizeof(*key));
-
- if (tcp) {
- struct gl_program *prog = &tcp->program;
- per_vertex_slots |= prog->info.outputs_written;
- per_patch_slots |= prog->info.patch_outputs_written;
- }
-
- if (devinfo->ver < 8 || !tcp || compiler->use_tcs_8_patch)
- key->input_vertices = brw->ctx.TessCtrlProgram.patch_vertices;
- key->outputs_written = per_vertex_slots;
- key->patch_outputs_written = per_patch_slots;
-
- /* We need to specialize our code generation for tessellation levels
- * based on the domain the DS is expecting to tessellate.
- */
- key->tes_primitive_mode = tep->program.info.tess.primitive_mode;
- key->quads_workaround = devinfo->ver < 9 &&
- tep->program.info.tess.primitive_mode == GL_QUADS &&
- tep->program.info.tess.spacing == TESS_SPACING_EQUAL;
-
- if (tcp) {
- /* _NEW_TEXTURE */
- brw_populate_base_prog_key(&brw->ctx, tcp, &key->base);
- }
-}
-
-void
-brw_upload_tcs_prog(struct brw_context *brw)
-{
- struct brw_stage_state *stage_state = &brw->tcs.base;
- struct brw_tcs_prog_key key;
- /* BRW_NEW_TESS_PROGRAMS */
- struct brw_program *tcp =
- (struct brw_program *) brw->programs[MESA_SHADER_TESS_CTRL];
- ASSERTED struct brw_program *tep =
- (struct brw_program *) brw->programs[MESA_SHADER_TESS_EVAL];
- assert(tep);
-
- if (!brw_state_dirty(brw,
- _NEW_TEXTURE,
- BRW_NEW_PATCH_PRIMITIVE |
- BRW_NEW_TESS_PROGRAMS))
- return;
-
- brw_tcs_populate_key(brw, &key);
-
- if (brw_search_cache(&brw->cache, BRW_CACHE_TCS_PROG, &key, sizeof(key),
- &stage_state->prog_offset, &brw->tcs.base.prog_data,
- true))
- return;
-
- if (brw_disk_cache_upload_program(brw, MESA_SHADER_TESS_CTRL))
- return;
-
- tcp = (struct brw_program *) brw->programs[MESA_SHADER_TESS_CTRL];
- if (tcp)
- tcp->id = key.base.program_string_id;
-
- ASSERTED bool success = brw_codegen_tcs_prog(brw, tcp, tep, &key);
- assert(success);
-}
-
-void
-brw_tcs_populate_default_key(const struct brw_compiler *compiler,
- struct brw_tcs_prog_key *key,
- struct gl_shader_program *sh_prog,
- struct gl_program *prog)
-{
- const struct intel_device_info *devinfo = compiler->devinfo;
- struct brw_program *btcp = brw_program(prog);
- const struct gl_linked_shader *tes =
- sh_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL];
-
- memset(key, 0, sizeof(*key));
-
- brw_populate_default_base_prog_key(devinfo, btcp, &key->base);
-
- /* Guess that the input and output patches have the same dimensionality. */
- if (devinfo->ver < 8 || compiler->use_tcs_8_patch)
- key->input_vertices = prog->info.tess.tcs_vertices_out;
-
- if (tes) {
- key->tes_primitive_mode = tes->Program->info.tess.primitive_mode;
- key->quads_workaround = devinfo->ver < 9 &&
- tes->Program->info.tess.primitive_mode == GL_QUADS &&
- tes->Program->info.tess.spacing == TESS_SPACING_EQUAL;
- } else {
- key->tes_primitive_mode = GL_TRIANGLES;
- }
-
- key->outputs_written = prog->nir->info.outputs_written;
- key->patch_outputs_written = prog->nir->info.patch_outputs_written;
-}
-
-bool
-brw_tcs_precompile(struct gl_context *ctx,
- struct gl_shader_program *shader_prog,
- struct gl_program *prog)
-{
- struct brw_context *brw = brw_context(ctx);
- const struct brw_compiler *compiler = brw->screen->compiler;
- struct brw_tcs_prog_key key;
- uint32_t old_prog_offset = brw->tcs.base.prog_offset;
- struct brw_stage_prog_data *old_prog_data = brw->tcs.base.prog_data;
- bool success;
-
- struct brw_program *btcp = brw_program(prog);
- const struct gl_linked_shader *tes =
- shader_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL];
- struct brw_program *btep = tes ? brw_program(tes->Program) : NULL;
-
- brw_tcs_populate_default_key(compiler, &key, shader_prog, prog);
-
- success = brw_codegen_tcs_prog(brw, btcp, btep, &key);
-
- brw->tcs.base.prog_offset = old_prog_offset;
- brw->tcs.base.prog_data = old_prog_data;
-
- return success;
-}
+++ /dev/null
-/*
- * Copyright © 2013 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-#include "main/mtypes.h"
-#include "program/prog_parameter.h"
-#include "main/shaderapi.h"
-
-#include "brw_context.h"
-#include "brw_state.h"
-
-
-/* Creates a new TCS constant buffer reflecting the current TCS program's
- * constants, if needed by the TCS program.
- *
- * Otherwise, constants go through the CURBEs using the brw_constant_buffer
- * state atom.
- */
-static void
-brw_upload_tcs_pull_constants(struct brw_context *brw)
-{
- struct brw_stage_state *stage_state = &brw->tcs.base;
-
- /* BRW_NEW_TESS_PROGRAMS */
- struct brw_program *tcp =
- (struct brw_program *) brw->programs[MESA_SHADER_TESS_CTRL];
-
- if (!tcp)
- return;
-
- /* BRW_NEW_TCS_PROG_DATA */
- const struct brw_stage_prog_data *prog_data = brw->tcs.base.prog_data;
-
- _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_TESS_CTRL);
- /* _NEW_PROGRAM_CONSTANTS */
- brw_upload_pull_constants(brw, BRW_NEW_TCS_CONSTBUF, &tcp->program,
- stage_state, prog_data);
-}
-
-const struct brw_tracked_state brw_tcs_pull_constants = {
- .dirty = {
- .mesa = _NEW_PROGRAM_CONSTANTS,
- .brw = BRW_NEW_BATCH |
- BRW_NEW_TCS_PROG_DATA |
- BRW_NEW_TESS_PROGRAMS,
- },
- .emit = brw_upload_tcs_pull_constants,
-};
-
-static void
-brw_upload_tcs_ubo_surfaces(struct brw_context *brw)
-{
- struct gl_context *ctx = &brw->ctx;
-
- /* _NEW_PROGRAM */
- struct gl_program *prog =
- ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_CTRL];
-
- /* BRW_NEW_TCS_PROG_DATA */
- struct brw_stage_prog_data *prog_data = brw->tcs.base.prog_data;
-
- brw_upload_ubo_surfaces(brw, prog, &brw->tcs.base, prog_data);
-}
-
-const struct brw_tracked_state brw_tcs_ubo_surfaces = {
- .dirty = {
- .mesa = _NEW_PROGRAM,
- .brw = BRW_NEW_BATCH |
- BRW_NEW_TCS_PROG_DATA |
- BRW_NEW_UNIFORM_BUFFER,
- },
- .emit = brw_upload_tcs_ubo_surfaces,
-};
-
-static void
-brw_upload_tcs_image_surfaces(struct brw_context *brw)
-{
- /* BRW_NEW_TESS_PROGRAMS */
- const struct gl_program *tcp = brw->programs[MESA_SHADER_TESS_CTRL];
-
- if (tcp) {
- /* BRW_NEW_TCS_PROG_DATA, BRW_NEW_IMAGE_UNITS */
- brw_upload_image_surfaces(brw, tcp, &brw->tcs.base,
- brw->tcs.base.prog_data);
- }
-}
-
-const struct brw_tracked_state brw_tcs_image_surfaces = {
- .dirty = {
- .brw = BRW_NEW_BATCH |
- BRW_NEW_AUX_STATE |
- BRW_NEW_IMAGE_UNITS |
- BRW_NEW_TCS_PROG_DATA |
- BRW_NEW_TESS_PROGRAMS,
- },
- .emit = brw_upload_tcs_image_surfaces,
-};
+++ /dev/null
-/*
- * Copyright © 2014 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * \file brw_tes.c
- *
- * Tessellation evaluation shader state upload code.
- */
-
-#include "brw_context.h"
-#include "compiler/brw_nir.h"
-#include "brw_program.h"
-#include "brw_state.h"
-#include "program/prog_parameter.h"
-
-static bool
-brw_codegen_tes_prog(struct brw_context *brw,
- struct brw_program *tep,
- struct brw_tes_prog_key *key)
-{
- const struct brw_compiler *compiler = brw->screen->compiler;
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- struct brw_stage_state *stage_state = &brw->tes.base;
- struct brw_tes_prog_data prog_data;
- bool start_busy = false;
- double start_time = 0;
-
- memset(&prog_data, 0, sizeof(prog_data));
-
- void *mem_ctx = ralloc_context(NULL);
-
- nir_shader *nir = nir_shader_clone(mem_ctx, tep->program.nir);
-
- brw_assign_common_binding_table_offsets(devinfo, &tep->program,
- &prog_data.base.base, 0);
-
- brw_nir_setup_glsl_uniforms(mem_ctx, nir, &tep->program,
- &prog_data.base.base,
- compiler->scalar_stage[MESA_SHADER_TESS_EVAL]);
- if (brw->can_push_ubos) {
- brw_nir_analyze_ubo_ranges(compiler, nir, NULL,
- prog_data.base.base.ubo_ranges);
- }
-
- int st_index = -1;
- if (INTEL_DEBUG(DEBUG_SHADER_TIME))
- st_index = brw_get_shader_time_index(brw, &tep->program, ST_TES, true);
-
- if (unlikely(brw->perf_debug)) {
- start_busy = brw->batch.last_bo && brw_bo_busy(brw->batch.last_bo);
- start_time = get_time();
- }
-
- struct brw_vue_map input_vue_map;
- brw_compute_tess_vue_map(&input_vue_map, key->inputs_read,
- key->patch_inputs_read);
-
- char *error_str;
- const unsigned *program =
- brw_compile_tes(compiler, brw, mem_ctx, key, &input_vue_map, &prog_data,
- nir, st_index, NULL, &error_str);
- if (program == NULL) {
- tep->program.sh.data->LinkStatus = LINKING_FAILURE;
- ralloc_strcat(&tep->program.sh.data->InfoLog, error_str);
-
- _mesa_problem(NULL, "Failed to compile tessellation evaluation shader: "
- "%s\n", error_str);
-
- ralloc_free(mem_ctx);
- return false;
- }
-
- if (unlikely(brw->perf_debug)) {
- if (tep->compiled_once) {
- brw_debug_recompile(brw, MESA_SHADER_TESS_EVAL, tep->program.Id,
- &key->base);
- }
- if (start_busy && !brw_bo_busy(brw->batch.last_bo)) {
- perf_debug("TES compile took %.03f ms and stalled the GPU\n",
- (get_time() - start_time) * 1000);
- }
- tep->compiled_once = true;
- }
-
- /* Scratch space is used for register spilling */
- brw_alloc_stage_scratch(brw, stage_state,
- prog_data.base.base.total_scratch);
-
- /* The param and pull_param arrays will be freed by the shader cache. */
- ralloc_steal(NULL, prog_data.base.base.param);
- ralloc_steal(NULL, prog_data.base.base.pull_param);
- brw_upload_cache(&brw->cache, BRW_CACHE_TES_PROG,
- key, sizeof(*key),
- program, prog_data.base.base.program_size,
- &prog_data, sizeof(prog_data),
- &stage_state->prog_offset, &brw->tes.base.prog_data);
- ralloc_free(mem_ctx);
-
- return true;
-}
-
-void
-brw_tes_populate_key(struct brw_context *brw,
- struct brw_tes_prog_key *key)
-{
- struct brw_program *tcp =
- (struct brw_program *) brw->programs[MESA_SHADER_TESS_CTRL];
- struct brw_program *tep =
- (struct brw_program *) brw->programs[MESA_SHADER_TESS_EVAL];
- struct gl_program *prog = &tep->program;
-
- uint64_t per_vertex_slots = prog->info.inputs_read;
- uint32_t per_patch_slots = prog->info.patch_inputs_read;
-
- memset(key, 0, sizeof(*key));
-
- /* _NEW_TEXTURE */
- brw_populate_base_prog_key(&brw->ctx, tep, &key->base);
-
- /* The TCS may have additional outputs which aren't read by the
- * TES (possibly for cross-thread communication). These need to
- * be stored in the Patch URB Entry as well.
- */
- if (tcp) {
- struct gl_program *tcp_prog = &tcp->program;
- per_vertex_slots |= tcp_prog->info.outputs_written &
- ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
- per_patch_slots |= tcp_prog->info.patch_outputs_written;
- }
-
- key->inputs_read = per_vertex_slots;
- key->patch_inputs_read = per_patch_slots;
-}
-
-void
-brw_upload_tes_prog(struct brw_context *brw)
-{
- struct brw_stage_state *stage_state = &brw->tes.base;
- struct brw_tes_prog_key key;
- /* BRW_NEW_TESS_PROGRAMS */
- struct brw_program *tep =
- (struct brw_program *) brw->programs[MESA_SHADER_TESS_EVAL];
-
- if (!brw_state_dirty(brw,
- _NEW_TEXTURE,
- BRW_NEW_TESS_PROGRAMS))
- return;
-
- brw_tes_populate_key(brw, &key);
-
- if (brw_search_cache(&brw->cache, BRW_CACHE_TES_PROG, &key, sizeof(key),
- &stage_state->prog_offset, &brw->tes.base.prog_data,
- true))
- return;
-
- if (brw_disk_cache_upload_program(brw, MESA_SHADER_TESS_EVAL))
- return;
-
- tep = (struct brw_program *) brw->programs[MESA_SHADER_TESS_EVAL];
- tep->id = key.base.program_string_id;
-
- ASSERTED bool success = brw_codegen_tes_prog(brw, tep, &key);
- assert(success);
-}
-
-void
-brw_tes_populate_default_key(const struct brw_compiler *compiler,
- struct brw_tes_prog_key *key,
- struct gl_shader_program *sh_prog,
- struct gl_program *prog)
-{
- const struct intel_device_info *devinfo = compiler->devinfo;
- struct brw_program *btep = brw_program(prog);
-
- memset(key, 0, sizeof(*key));
-
- brw_populate_default_base_prog_key(devinfo, btep, &key->base);
-
- key->inputs_read = prog->nir->info.inputs_read;
- key->patch_inputs_read = prog->nir->info.patch_inputs_read;
-
- if (sh_prog->_LinkedShaders[MESA_SHADER_TESS_CTRL]) {
- struct gl_program *tcp =
- sh_prog->_LinkedShaders[MESA_SHADER_TESS_CTRL]->Program;
- key->inputs_read |= tcp->nir->info.outputs_written &
- ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
- key->patch_inputs_read |= tcp->nir->info.patch_outputs_written;
- }
-}
-
-bool
-brw_tes_precompile(struct gl_context *ctx,
- struct gl_shader_program *shader_prog,
- struct gl_program *prog)
-{
- struct brw_context *brw = brw_context(ctx);
- const struct brw_compiler *compiler = brw->screen->compiler;
- struct brw_tes_prog_key key;
- uint32_t old_prog_offset = brw->tes.base.prog_offset;
- struct brw_stage_prog_data *old_prog_data = brw->tes.base.prog_data;
- bool success;
-
- struct brw_program *btep = brw_program(prog);
-
- brw_tes_populate_default_key(compiler, &key, shader_prog, prog);
-
- success = brw_codegen_tes_prog(brw, btep, &key);
-
- brw->tes.base.prog_offset = old_prog_offset;
- brw->tes.base.prog_data = old_prog_data;
-
- return success;
-}
+++ /dev/null
-/*
- * Copyright © 2013 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-#include "main/mtypes.h"
-#include "program/prog_parameter.h"
-#include "main/shaderapi.h"
-
-#include "brw_context.h"
-#include "brw_state.h"
-
-
-/* Creates a new TES constant buffer reflecting the current TES program's
- * constants, if needed by the TES program.
- *
- * Otherwise, constants go through the CURBEs using the brw_constant_buffer
- * state atom.
- */
-static void
-brw_upload_tes_pull_constants(struct brw_context *brw)
-{
- struct brw_stage_state *stage_state = &brw->tes.base;
-
- /* BRW_NEW_TESS_PROGRAMS */
- struct brw_program *dp =
- (struct brw_program *) brw->programs[MESA_SHADER_TESS_EVAL];
-
- if (!dp)
- return;
-
- /* BRW_NEW_TES_PROG_DATA */
- const struct brw_stage_prog_data *prog_data = brw->tes.base.prog_data;
-
- _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_TESS_EVAL);
- /* _NEW_PROGRAM_CONSTANTS */
- brw_upload_pull_constants(brw, BRW_NEW_TES_CONSTBUF, &dp->program,
- stage_state, prog_data);
-}
-
-const struct brw_tracked_state brw_tes_pull_constants = {
- .dirty = {
- .mesa = _NEW_PROGRAM_CONSTANTS,
- .brw = BRW_NEW_BATCH |
- BRW_NEW_TES_PROG_DATA |
- BRW_NEW_TESS_PROGRAMS,
- },
- .emit = brw_upload_tes_pull_constants,
-};
-
-static void
-brw_upload_tes_ubo_surfaces(struct brw_context *brw)
-{
- struct gl_context *ctx = &brw->ctx;
-
- /* _NEW_PROGRAM */
- struct gl_program *prog =
- ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_EVAL];
-
- /* BRW_NEW_TES_PROG_DATA */
- struct brw_stage_prog_data *prog_data = brw->tes.base.prog_data;
-
- brw_upload_ubo_surfaces(brw, prog, &brw->tes.base, prog_data);
-}
-
-const struct brw_tracked_state brw_tes_ubo_surfaces = {
- .dirty = {
- .mesa = _NEW_PROGRAM,
- .brw = BRW_NEW_BATCH |
- BRW_NEW_TES_PROG_DATA |
- BRW_NEW_UNIFORM_BUFFER,
- },
- .emit = brw_upload_tes_ubo_surfaces,
-};
-
-static void
-brw_upload_tes_image_surfaces(struct brw_context *brw)
-{
- /* BRW_NEW_TESS_PROGRAMS */
- const struct gl_program *tep = brw->programs[MESA_SHADER_TESS_EVAL];
-
- if (tep) {
- /* BRW_NEW_TES_PROG_DATA, BRW_NEW_IMAGE_UNITS */
- brw_upload_image_surfaces(brw, tep, &brw->tes.base,
- brw->tes.base.prog_data);
- }
-}
-
-const struct brw_tracked_state brw_tes_image_surfaces = {
- .dirty = {
- .brw = BRW_NEW_BATCH |
- BRW_NEW_AUX_STATE |
- BRW_NEW_IMAGE_UNITS |
- BRW_NEW_TESS_PROGRAMS |
- BRW_NEW_TES_PROG_DATA,
- },
- .emit = brw_upload_tes_image_surfaces,
-};
+++ /dev/null
-#include "swrast/swrast.h"
-#include "main/renderbuffer.h"
-#include "main/texobj.h"
-#include "main/teximage.h"
-#include "main/mipmap.h"
-#include "drivers/common/meta.h"
-#include "brw_context.h"
-#include "brw_defines.h"
-#include "brw_buffer_objects.h"
-#include "brw_mipmap_tree.h"
-#include "brw_tex.h"
-#include "brw_fbo.h"
-#include "brw_state.h"
-#include "util/u_memory.h"
-
-#define FILE_DEBUG_FLAG DEBUG_TEXTURE
-
-static struct gl_texture_image *
-brw_new_texture_image(struct gl_context *ctx)
-{
- DBG("%s\n", __func__);
- (void) ctx;
- return (struct gl_texture_image *) CALLOC_STRUCT(brw_texture_image);
-}
-
-static void
-brw_delete_texture_image(struct gl_context *ctx, struct gl_texture_image *img)
-{
- /* nothing special (yet) for brw_texture_image */
- _mesa_delete_texture_image(ctx, img);
-}
-
-
-static struct gl_texture_object *
-brw_new_texture_object(struct gl_context *ctx, GLuint name, GLenum target)
-{
- struct brw_texture_object *obj = CALLOC_STRUCT(brw_texture_object);
-
- (void) ctx;
-
- DBG("%s\n", __func__);
-
- if (obj == NULL)
- return NULL;
-
- _mesa_initialize_texture_object(ctx, &obj->base, name, target);
-
- obj->needs_validate = true;
-
- return &obj->base;
-}
-
-static void
-brw_delete_texture_object(struct gl_context *ctx,
- struct gl_texture_object *texObj)
-{
- struct brw_texture_object *brw_obj = brw_texture_object(texObj);
-
- brw_miptree_release(&brw_obj->mt);
- _mesa_delete_texture_object(ctx, texObj);
-}
-
-static GLboolean
-brw_alloc_texture_image_buffer(struct gl_context *ctx,
- struct gl_texture_image *image)
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_texture_image *intel_image = brw_texture_image(image);
- struct gl_texture_object *texobj = image->TexObject;
- struct brw_texture_object *intel_texobj = brw_texture_object(texobj);
-
- assert(image->Border == 0);
-
- /* Quantize sample count */
- if (image->NumSamples) {
- image->NumSamples = brw_quantize_num_samples(brw->screen, image->NumSamples);
- if (!image->NumSamples)
- return false;
- }
-
- /* Because the driver uses AllocTextureImageBuffer() internally, it may end
- * up mismatched with FreeTextureImageBuffer(), but that is safe to call
- * multiple times.
- */
- ctx->Driver.FreeTextureImageBuffer(ctx, image);
-
- if (!_swrast_init_texture_image(image))
- return false;
-
- if (intel_texobj->mt &&
- brw_miptree_match_image(intel_texobj->mt, image)) {
- brw_miptree_reference(&intel_image->mt, intel_texobj->mt);
- DBG("%s: alloc obj %p level %d %dx%dx%d using object's miptree %p\n",
- __func__, texobj, image->Level,
- image->Width, image->Height, image->Depth, intel_texobj->mt);
- } else {
- intel_image->mt = brw_miptree_create_for_teximage(brw, intel_texobj,
- intel_image,
- MIPTREE_CREATE_DEFAULT);
- if (!intel_image->mt)
- return false;
-
- /* Even if the object currently has a mipmap tree associated
- * with it, this one is a more likely candidate to represent the
- * whole object since our level didn't fit what was there
- * before, and any lower levels would fit into our miptree.
- */
- brw_miptree_reference(&intel_texobj->mt, intel_image->mt);
-
- DBG("%s: alloc obj %p level %d %dx%dx%d using new miptree %p\n",
- __func__, texobj, image->Level,
- image->Width, image->Height, image->Depth, intel_image->mt);
- }
-
- intel_texobj->needs_validate = true;
-
- return true;
-}
-
-/**
- * ctx->Driver.AllocTextureStorage() handler.
- *
- * Compare this to _mesa_AllocTextureStorage_sw, which would call into
- * brw_alloc_texture_image_buffer() above.
- */
-static GLboolean
-brw_alloc_texture_storage(struct gl_context *ctx,
- struct gl_texture_object *texobj,
- GLsizei levels, GLsizei width,
- GLsizei height, GLsizei depth)
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_texture_object *intel_texobj = brw_texture_object(texobj);
- struct gl_texture_image *first_image = texobj->Image[0][0];
- int num_samples = brw_quantize_num_samples(brw->screen,
- first_image->NumSamples);
- const int numFaces = _mesa_num_tex_faces(texobj->Target);
- int face;
- int level;
-
- /* If the object's current miptree doesn't match what we need, make a new
- * one.
- */
- if (!intel_texobj->mt ||
- !brw_miptree_match_image(intel_texobj->mt, first_image) ||
- intel_texobj->mt->last_level != levels - 1) {
- brw_miptree_release(&intel_texobj->mt);
-
- brw_get_image_dims(first_image, &width, &height, &depth);
- intel_texobj->mt = brw_miptree_create(brw, texobj->Target,
- first_image->TexFormat,
- 0, levels - 1,
- width, height, depth,
- MAX2(num_samples, 1),
- MIPTREE_CREATE_DEFAULT);
-
- if (intel_texobj->mt == NULL) {
- return false;
- }
- }
-
- for (face = 0; face < numFaces; face++) {
- for (level = 0; level < levels; level++) {
- struct gl_texture_image *image = texobj->Image[face][level];
- struct brw_texture_image *intel_image = brw_texture_image(image);
-
- image->NumSamples = num_samples;
-
- _swrast_free_texture_image_buffer(ctx, image);
- if (!_swrast_init_texture_image(image))
- return false;
-
- brw_miptree_reference(&intel_image->mt, intel_texobj->mt);
- }
- }
-
- /* The miptree is in a validated state, so no need to check later. */
- intel_texobj->needs_validate = false;
- intel_texobj->validated_first_level = 0;
- intel_texobj->validated_last_level = levels - 1;
- intel_texobj->_Format = first_image->TexFormat;
-
- return true;
-}
-
-
-static void
-brw_free_texture_image_buffer(struct gl_context * ctx,
- struct gl_texture_image *texImage)
-{
- struct brw_texture_image *brw_image = brw_texture_image(texImage);
-
- DBG("%s\n", __func__);
-
- brw_miptree_release(&brw_image->mt);
-
- _swrast_free_texture_image_buffer(ctx, texImage);
-}
-
-/**
- * Map texture memory/buffer into user space.
- * Note: the region of interest parameters are ignored here.
- * \param mode bitmask of GL_MAP_READ_BIT, GL_MAP_WRITE_BIT
- * \param mapOut returns start of mapping of region of interest
- * \param rowStrideOut returns row stride in bytes
- */
-static void
-brw_map_texture_image(struct gl_context *ctx,
- struct gl_texture_image *tex_image,
- GLuint slice,
- GLuint x, GLuint y, GLuint w, GLuint h,
- GLbitfield mode,
- GLubyte **map,
- GLint *out_stride)
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_texture_image *intel_image = brw_texture_image(tex_image);
- struct brw_mipmap_tree *mt = intel_image->mt;
- ptrdiff_t stride;
-
- /* Our texture data is always stored in a miptree. */
- assert(mt);
-
- /* Check that our caller wasn't confused about how to map a 1D texture. */
- assert(tex_image->TexObject->Target != GL_TEXTURE_1D_ARRAY || h == 1);
-
- /* brw_miptree_map operates on a unified "slice" number that references the
- * cube face, since it's all just slices to the miptree code.
- */
- if (tex_image->TexObject->Target == GL_TEXTURE_CUBE_MAP)
- slice = tex_image->Face;
-
- brw_miptree_map(brw, mt,
- tex_image->Level + tex_image->TexObject->Attrib.MinLevel,
- slice + tex_image->TexObject->Attrib.MinLayer,
- x, y, w, h, mode,
- (void **)map, &stride);
-
- *out_stride = stride;
-}
-
-static void
-brw_unmap_texture_image(struct gl_context *ctx,
- struct gl_texture_image *tex_image, GLuint slice)
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_texture_image *intel_image = brw_texture_image(tex_image);
- struct brw_mipmap_tree *mt = intel_image->mt;
-
- if (tex_image->TexObject->Target == GL_TEXTURE_CUBE_MAP)
- slice = tex_image->Face;
-
- brw_miptree_unmap(brw, mt,
- tex_image->Level + tex_image->TexObject->Attrib.MinLevel,
- slice + tex_image->TexObject->Attrib.MinLayer);
-}
-
-static GLboolean
-brw_texture_view(struct gl_context *ctx,
- struct gl_texture_object *texObj,
- struct gl_texture_object *origTexObj)
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_texture_object *intel_tex = brw_texture_object(texObj);
- struct brw_texture_object *intel_orig_tex = brw_texture_object(origTexObj);
-
- assert(intel_orig_tex->mt);
- brw_miptree_reference(&intel_tex->mt, intel_orig_tex->mt);
-
- /* Since we can only make views of immutable-format textures,
- * we can assume that everything is in origTexObj's miptree.
- *
- * Mesa core has already made us a copy of all the teximage objects,
- * except it hasn't copied our mt pointers, etc.
- */
- const int numFaces = _mesa_num_tex_faces(texObj->Target);
- const int numLevels = texObj->Attrib.NumLevels;
-
- int face;
- int level;
-
- for (face = 0; face < numFaces; face++) {
- for (level = 0; level < numLevels; level++) {
- struct gl_texture_image *image = texObj->Image[face][level];
- struct brw_texture_image *intel_image = brw_texture_image(image);
-
- brw_miptree_reference(&intel_image->mt, intel_orig_tex->mt);
- }
- }
-
- /* The miptree is in a validated state, so no need to check later. */
- intel_tex->needs_validate = false;
- intel_tex->validated_first_level = 0;
- intel_tex->validated_last_level = numLevels - 1;
-
- /* Set the validated texture format, with the same adjustments that
- * would have been applied to determine the underlying texture's
- * mt->format.
- */
- intel_tex->_Format = brw_depth_format_for_depthstencil_format(
- brw_lower_compressed_format(brw, texObj->Image[0][0]->TexFormat));
-
- return GL_TRUE;
-}
-
-static void
-brw_texture_barrier(struct gl_context *ctx)
-{
- struct brw_context *brw = brw_context(ctx);
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- if (devinfo->ver >= 6) {
- brw_emit_pipe_control_flush(brw,
- PIPE_CONTROL_DEPTH_CACHE_FLUSH |
- PIPE_CONTROL_RENDER_TARGET_FLUSH |
- PIPE_CONTROL_CS_STALL);
-
- brw_emit_pipe_control_flush(brw,
- PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
- } else {
- brw_emit_mi_flush(brw);
- }
-}
-
-/* Return the usual surface usage flags for the given format. */
-static isl_surf_usage_flags_t
-isl_surf_usage(mesa_format format)
-{
- switch(_mesa_get_format_base_format(format)) {
- case GL_DEPTH_COMPONENT:
- return ISL_SURF_USAGE_DEPTH_BIT | ISL_SURF_USAGE_TEXTURE_BIT;
- case GL_DEPTH_STENCIL:
- return ISL_SURF_USAGE_DEPTH_BIT | ISL_SURF_USAGE_STENCIL_BIT |
- ISL_SURF_USAGE_TEXTURE_BIT;
- case GL_STENCIL_INDEX:
- return ISL_SURF_USAGE_STENCIL_BIT | ISL_SURF_USAGE_TEXTURE_BIT;
- default:
- return ISL_SURF_USAGE_RENDER_TARGET_BIT | ISL_SURF_USAGE_TEXTURE_BIT;
- }
-}
-
-static GLboolean
-intel_texture_for_memory_object(struct gl_context *ctx,
- struct gl_texture_object *tex_obj,
- struct gl_memory_object *mem_obj,
- GLsizei levels, GLsizei width,
- GLsizei height, GLsizei depth,
- GLuint64 offset)
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_memory_object *intel_memobj = brw_memory_object(mem_obj);
- struct brw_texture_object *intel_texobj = brw_texture_object(tex_obj);
- struct gl_texture_image *image = tex_obj->Image[0][0];
- struct isl_surf surf;
-
- /* Only color formats are supported. */
- if (!_mesa_is_format_color_format(image->TexFormat))
- return GL_FALSE;
-
- isl_tiling_flags_t tiling_flags = ISL_TILING_ANY_MASK;
- if (tex_obj->TextureTiling == GL_LINEAR_TILING_EXT)
- tiling_flags = ISL_TILING_LINEAR_BIT;
-
- UNUSED const bool isl_surf_created_successfully =
- isl_surf_init(&brw->screen->isl_dev, &surf,
- .dim = get_isl_surf_dim(tex_obj->Target),
- .format = brw_isl_format_for_mesa_format(image->TexFormat),
- .width = width,
- .height = height,
- .depth = depth,
- .levels = levels,
- .array_len = tex_obj->Target == GL_TEXTURE_3D ? 1 : depth,
- .samples = MAX2(image->NumSamples, 1),
- .usage = isl_surf_usage(image->TexFormat),
- .tiling_flags = tiling_flags);
-
- assert(isl_surf_created_successfully);
-
- intel_texobj->mt = brw_miptree_create_for_bo(brw,
- intel_memobj->bo,
- image->TexFormat,
- offset,
- width,
- height,
- depth,
- surf.row_pitch_B,
- surf.tiling,
- MIPTREE_CREATE_NO_AUX);
- assert(intel_texobj->mt);
- brw_alloc_texture_image_buffer(ctx, image);
-
- intel_texobj->needs_validate = false;
- intel_texobj->validated_first_level = 0;
- intel_texobj->validated_last_level = levels - 1;
- intel_texobj->_Format = image->TexFormat;
-
- return GL_TRUE;
-}
-
-void
-brw_init_texture_functions(struct dd_function_table *functions)
-{
- functions->NewTextureObject = brw_new_texture_object;
- functions->NewTextureImage = brw_new_texture_image;
- functions->DeleteTextureImage = brw_delete_texture_image;
- functions->DeleteTexture = brw_delete_texture_object;
- functions->AllocTextureImageBuffer = brw_alloc_texture_image_buffer;
- functions->FreeTextureImageBuffer = brw_free_texture_image_buffer;
- functions->AllocTextureStorage = brw_alloc_texture_storage;
- functions->MapTextureImage = brw_map_texture_image;
- functions->UnmapTextureImage = brw_unmap_texture_image;
- functions->TextureView = brw_texture_view;
- functions->TextureBarrier = brw_texture_barrier;
- functions->SetTextureStorageForMemoryObject = intel_texture_for_memory_object;
-}
+++ /dev/null
-/*
- * Copyright 2003 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef INTELTEX_INC
-#define INTELTEX_INC
-
-#include "main/mtypes.h"
-#include "main/formats.h"
-#include "brw_context.h"
-#include "brw_mipmap_tree.h"
-
-void brw_init_texture_functions(struct dd_function_table *functions);
-
-void brw_init_texture_image_functions(struct dd_function_table *functions);
-
-void brw_init_texture_copy_image_functions(struct dd_function_table *functs);
-
-void brw_init_copy_image_functions(struct dd_function_table *functions);
-
-void brw_set_texbuffer(__DRIcontext *pDRICtx,
- GLint target, __DRIdrawable *pDraw);
-void brw_set_texbuffer2(__DRIcontext *pDRICtx,
- GLint target, GLint format, __DRIdrawable *pDraw);
-void brw_release_texbuffer(__DRIcontext *pDRICtx, GLint target,
- __DRIdrawable *dPriv);
-
-struct brw_mipmap_tree *
-brw_miptree_create_for_teximage(struct brw_context *brw,
- struct brw_texture_object *brw_obj,
- struct brw_texture_image *brw_image,
- enum brw_miptree_create_flags flags);
-
-void brw_finalize_mipmap_tree(struct brw_context *brw,
- struct gl_texture_object *tex_obj);
-
-#endif
+++ /dev/null
-/*
- * Copyright 2003 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "main/mtypes.h"
-#include "main/enums.h"
-#include "main/image.h"
-#include "main/teximage.h"
-#include "main/texobj.h"
-#include "main/texstate.h"
-#include "main/fbobject.h"
-
-#include "drivers/common/meta.h"
-
-#include "brw_screen.h"
-#include "brw_mipmap_tree.h"
-#include "brw_fbo.h"
-#include "brw_tex.h"
-#include "brw_context.h"
-
-#define FILE_DEBUG_FLAG DEBUG_TEXTURE
-
-
-static void
-brw_copytexsubimage(struct gl_context *ctx, GLuint dims,
- struct gl_texture_image *texImage,
- GLint xoffset, GLint yoffset, GLint slice,
- struct gl_renderbuffer *rb,
- GLint x, GLint y,
- GLsizei width, GLsizei height)
-{
- struct brw_context *brw = brw_context(ctx);
-
- /* Try BLORP first. It can handle almost everything. */
- if (brw_blorp_copytexsubimage(brw, rb, texImage, slice, x, y,
- xoffset, yoffset, width, height))
- return;
-
- /* Finally, fall back to meta. This will likely be slow. */
- perf_debug("%s - fallback to swrast\n", __func__);
- _mesa_meta_CopyTexSubImage(ctx, dims, texImage,
- xoffset, yoffset, slice,
- rb, x, y, width, height);
-}
-
-
-void
-brw_init_texture_copy_image_functions(struct dd_function_table *functions)
-{
- functions->CopyTexSubImage = brw_copytexsubimage;
-}
+++ /dev/null
-
-#include "main/macros.h"
-#include "main/mtypes.h"
-#include "main/enums.h"
-#include "main/bufferobj.h"
-#include "main/context.h"
-#include "main/formats.h"
-#include "main/glformats.h"
-#include "main/image.h"
-#include "main/pbo.h"
-#include "main/renderbuffer.h"
-#include "main/texcompress.h"
-#include "main/texgetimage.h"
-#include "main/texobj.h"
-#include "main/teximage.h"
-#include "main/texstore.h"
-#include "main/glthread.h"
-
-#include "drivers/common/meta.h"
-
-#include "brw_mipmap_tree.h"
-#include "brw_buffer_objects.h"
-#include "brw_batch.h"
-#include "brw_tex.h"
-#include "brw_fbo.h"
-#include "brw_image.h"
-#include "brw_context.h"
-#include "brw_blorp.h"
-
-#define FILE_DEBUG_FLAG DEBUG_TEXTURE
-
-/* Make sure one doesn't end up shrinking base level zero unnecessarily.
- * Determining the base level dimension by shifting higher level dimension
- * ends up in off-by-one value in case base level has NPOT size (for example,
- * 293 != 146 << 1).
- * Choose the original base level dimension when shifted dimensions agree.
- * Otherwise assume real resize is intended and use the new shifted value.
- */
-static unsigned
-get_base_dim(unsigned old_base_dim, unsigned new_level_dim, unsigned level)
-{
- const unsigned old_level_dim = old_base_dim >> level;
- const unsigned new_base_dim = new_level_dim << level;
-
- return old_level_dim == new_level_dim ? old_base_dim : new_base_dim;
-}
-
-/* Work back from the specified level of the image to the baselevel and create a
- * miptree of that size.
- */
-struct brw_mipmap_tree *
-brw_miptree_create_for_teximage(struct brw_context *brw,
- struct brw_texture_object *brw_obj,
- struct brw_texture_image *brw_image,
- enum brw_miptree_create_flags flags)
-{
- GLuint lastLevel;
- int width, height, depth;
- unsigned old_width = 0, old_height = 0, old_depth = 0;
- const struct brw_mipmap_tree *old_mt = brw_obj->mt;
- const unsigned level = brw_image->base.Base.Level;
-
- brw_get_image_dims(&brw_image->base.Base, &width, &height, &depth);
-
- if (old_mt) {
- old_width = old_mt->surf.logical_level0_px.width;
- old_height = old_mt->surf.logical_level0_px.height;
- old_depth = old_mt->surf.dim == ISL_SURF_DIM_3D ?
- old_mt->surf.logical_level0_px.depth :
- old_mt->surf.logical_level0_px.array_len;
- }
-
- DBG("%s\n", __func__);
-
- /* Figure out image dimensions at start level. */
- switch(brw_obj->base.Target) {
- case GL_TEXTURE_2D_MULTISAMPLE:
- case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
- case GL_TEXTURE_RECTANGLE:
- case GL_TEXTURE_EXTERNAL_OES:
- assert(level == 0);
- break;
- case GL_TEXTURE_3D:
- depth = old_mt ? get_base_dim(old_depth, depth, level) :
- depth << level;
- FALLTHROUGH;
- case GL_TEXTURE_2D:
- case GL_TEXTURE_2D_ARRAY:
- case GL_TEXTURE_CUBE_MAP:
- case GL_TEXTURE_CUBE_MAP_ARRAY:
- height = old_mt ? get_base_dim(old_height, height, level) :
- height << level;
- FALLTHROUGH;
- case GL_TEXTURE_1D:
- case GL_TEXTURE_1D_ARRAY:
- width = old_mt ? get_base_dim(old_width, width, level) :
- width << level;
- break;
- default:
- unreachable("Unexpected target");
- }
-
- /* Guess a reasonable value for lastLevel. This is probably going
- * to be wrong fairly often and might mean that we have to look at
- * resizable buffers, or require that buffers implement lazy
- * pagetable arrangements.
- */
- if ((brw_obj->base.Sampler.Attrib.MinFilter == GL_NEAREST ||
- brw_obj->base.Sampler.Attrib.MinFilter == GL_LINEAR) &&
- brw_image->base.Base.Level == 0 &&
- !brw_obj->base.Attrib.GenerateMipmap) {
- lastLevel = 0;
- } else {
- lastLevel = _mesa_get_tex_max_num_levels(brw_obj->base.Target,
- width, height, depth) - 1;
- }
-
- return brw_miptree_create(brw,
- brw_obj->base.Target,
- brw_image->base.Base.TexFormat,
- 0,
- lastLevel,
- width,
- height,
- depth,
- MAX2(brw_image->base.Base.NumSamples, 1),
- flags);
-}
-
-static bool
-brw_texsubimage_blorp(struct brw_context *brw, GLuint dims,
- struct gl_texture_image *tex_image,
- unsigned x, unsigned y, unsigned z,
- unsigned width, unsigned height, unsigned depth,
- GLenum format, GLenum type, const void *pixels,
- const struct gl_pixelstore_attrib *packing)
-{
- struct brw_texture_image *intel_image = brw_texture_image(tex_image);
- const unsigned mt_level = tex_image->Level + tex_image->TexObject->Attrib.MinLevel;
- const unsigned mt_z = tex_image->TexObject->Attrib.MinLayer + tex_image->Face + z;
-
- /* The blorp path can't understand crazy format hackery */
- if (_mesa_base_tex_format(&brw->ctx, tex_image->InternalFormat) !=
- _mesa_get_format_base_format(tex_image->TexFormat))
- return false;
-
- return brw_blorp_upload_miptree(brw, intel_image->mt, tex_image->TexFormat,
- mt_level, x, y, mt_z, width, height, depth,
- tex_image->TexObject->Target, format, type,
- pixels, packing);
-}
-
-/**
- * \brief A fast path for glTexImage and glTexSubImage.
- *
- * This fast path is taken when the texture format is BGRA, RGBA,
- * A or L and when the texture memory is X- or Y-tiled. It uploads
- * the texture data by mapping the texture memory without a GTT fence, thus
- * acquiring a tiled view of the memory, and then copying sucessive
- * spans within each tile.
- *
- * This is a performance win over the conventional texture upload path because
- * it avoids the performance penalty of writing through the write-combine
- * buffer. In the conventional texture upload path,
- * texstore.c:store_texsubimage(), the texture memory is mapped through a GTT
- * fence, thus acquiring a linear view of the memory, then each row in the
- * image is memcpy'd. In this fast path, we replace each row's copy with
- * a sequence of copies over each linear span in tile.
- *
- * One use case is Google Chrome's paint rectangles. Chrome (as
- * of version 21) renders each page as a tiling of 256x256 GL_BGRA textures.
- * Each page's content is initially uploaded with glTexImage2D and damaged
- * regions are updated with glTexSubImage2D. On some workloads, the
- * performance gain of this fastpath on Sandybridge is over 5x.
- */
-static bool
-brw_texsubimage_tiled_memcpy(struct gl_context * ctx,
- GLuint dims,
- struct gl_texture_image *texImage,
- GLint xoffset, GLint yoffset, GLint zoffset,
- GLsizei width, GLsizei height, GLsizei depth,
- GLenum format, GLenum type,
- const GLvoid *pixels,
- const struct gl_pixelstore_attrib *packing)
-{
- struct brw_context *brw = brw_context(ctx);
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- struct brw_texture_image *image = brw_texture_image(texImage);
- int src_pitch;
-
- /* The miptree's buffer. */
- struct brw_bo *bo;
-
- uint32_t cpp;
- isl_memcpy_type copy_type;
-
- /* This fastpath is restricted to specific texture types:
- * a 2D BGRA, RGBA, L8 or A8 texture. It could be generalized to support
- * more types.
- *
- * FINISHME: The restrictions below on packing alignment and packing row
- * length are likely unneeded now because we calculate the source stride
- * with _mesa_image_row_stride. However, before removing the restrictions
- * we need tests.
- */
- if (!devinfo->has_llc ||
- !(type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) ||
- !(texImage->TexObject->Target == GL_TEXTURE_2D ||
- texImage->TexObject->Target == GL_TEXTURE_RECTANGLE) ||
- pixels == NULL ||
- packing->BufferObj ||
- packing->Alignment > 4 ||
- packing->SkipPixels > 0 ||
- packing->SkipRows > 0 ||
- (packing->RowLength != 0 && packing->RowLength != width) ||
- packing->SwapBytes ||
- packing->LsbFirst ||
- packing->Invert)
- return false;
-
- /* Only a simple blit, no scale, bias or other mapping. */
- if (ctx->_ImageTransferState)
- return false;
-
- copy_type = brw_miptree_get_memcpy_type(texImage->TexFormat, format, type,
- &cpp);
- if (copy_type == ISL_MEMCPY_INVALID)
- return false;
-
- /* If this is a nontrivial texture view, let another path handle it instead. */
- if (texImage->TexObject->Attrib.MinLayer)
- return false;
-
- if (!image->mt ||
- (image->mt->surf.tiling != ISL_TILING_X &&
- image->mt->surf.tiling != ISL_TILING_Y0)) {
- /* The algorithm is written only for X- or Y-tiled memory. */
- return false;
- }
-
- /* linear_to_tiled() assumes that if the object is swizzled, it is using
- * I915_BIT6_SWIZZLE_9_10 for X and I915_BIT6_SWIZZLE_9 for Y. This is only
- * true on gfx5 and above.
- *
- * The killer on top is that some gfx4 have an L-shaped swizzle mode, where
- * parts of the memory aren't swizzled at all. Userspace just can't handle
- * that.
- */
- if (devinfo->ver < 5 && devinfo->has_bit6_swizzle)
- return false;
-
- int level = texImage->Level + texImage->TexObject->Attrib.MinLevel;
-
- /* Since we are going to write raw data to the miptree, we need to resolve
- * any pending fast color clears before we start.
- */
- assert(image->mt->surf.logical_level0_px.depth == 1);
- assert(image->mt->surf.logical_level0_px.array_len == 1);
-
- brw_miptree_access_raw(brw, image->mt, level, 0, true);
-
- bo = image->mt->bo;
-
- if (brw_batch_references(&brw->batch, bo)) {
- perf_debug("Flushing before mapping a referenced bo.\n");
- brw_batch_flush(brw);
- }
-
- void *map = brw_bo_map(brw, bo, MAP_WRITE | MAP_RAW);
- if (map == NULL) {
- DBG("%s: failed to map bo\n", __func__);
- return false;
- }
-
- src_pitch = _mesa_image_row_stride(packing, width, format, type);
-
- /* We postponed printing this message until having committed to executing
- * the function.
- */
- DBG("%s: level=%d offset=(%d,%d) (w,h)=(%d,%d) format=0x%x type=0x%x "
- "mesa_format=0x%x tiling=%d "
- "packing=(alignment=%d row_length=%d skip_pixels=%d skip_rows=%d) ",
- __func__, texImage->Level, xoffset, yoffset, width, height,
- format, type, texImage->TexFormat, image->mt->surf.tiling,
- packing->Alignment, packing->RowLength, packing->SkipPixels,
- packing->SkipRows);
-
- /* Adjust x and y offset based on miplevel */
- unsigned level_x, level_y;
- brw_miptree_get_image_offset(image->mt, level, 0, &level_x, &level_y);
- xoffset += level_x;
- yoffset += level_y;
-
- isl_memcpy_linear_to_tiled(
- xoffset * cpp, (xoffset + width) * cpp,
- yoffset, yoffset + height,
- map,
- pixels,
- image->mt->surf.row_pitch_B, src_pitch,
- devinfo->has_bit6_swizzle,
- image->mt->surf.tiling,
- copy_type
- );
-
- brw_bo_unmap(bo);
- return true;
-}
-
-
-static void
-brw_upload_tex(struct gl_context * ctx,
- GLuint dims,
- struct gl_texture_image *texImage,
- GLint xoffset, GLint yoffset, GLint zoffset,
- GLsizei width, GLsizei height, GLsizei depth,
- GLenum format, GLenum type,
- const GLvoid * pixels,
- const struct gl_pixelstore_attrib *packing)
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_mipmap_tree *mt = brw_texture_image(texImage)->mt;
- bool ok;
-
- /* Check that there is actually data to store. */
- if (pixels == NULL && !packing->BufferObj)
- return;
-
- bool tex_busy = mt &&
- (brw_batch_references(&brw->batch, mt->bo) || brw_bo_busy(mt->bo));
-
- if (packing->BufferObj || tex_busy ||
- mt->aux_usage == ISL_AUX_USAGE_CCS_E) {
- ok = brw_texsubimage_blorp(brw, dims, texImage,
- xoffset, yoffset, zoffset,
- width, height, depth, format, type,
- pixels, packing);
- if (ok)
- return;
- }
-
- ok = brw_texsubimage_tiled_memcpy(ctx, dims, texImage,
- xoffset, yoffset, zoffset,
- width, height, depth,
- format, type, pixels, packing);
- if (ok)
- return;
-
- _mesa_store_texsubimage(ctx, dims, texImage,
- xoffset, yoffset, zoffset,
- width, height, depth,
- format, type, pixels, packing);
-}
-
-
-static void
-brw_teximage(struct gl_context * ctx,
- GLuint dims,
- struct gl_texture_image *texImage,
- GLenum format, GLenum type, const void *pixels,
- const struct gl_pixelstore_attrib *unpack)
-{
- DBG("%s mesa_format %s target %s format %s type %s level %d %dx%dx%d\n",
- __func__, _mesa_get_format_name(texImage->TexFormat),
- _mesa_enum_to_string(texImage->TexObject->Target),
- _mesa_enum_to_string(format), _mesa_enum_to_string(type),
- texImage->Level, texImage->Width, texImage->Height, texImage->Depth);
-
- /* Allocate storage for texture data. */
- if (!ctx->Driver.AllocTextureImageBuffer(ctx, texImage)) {
- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage%uD", dims);
- return;
- }
-
- assert(brw_texture_image(texImage)->mt);
-
- brw_upload_tex(ctx, dims, texImage, 0, 0, 0,
- texImage->Width, texImage->Height, texImage->Depth,
- format, type, pixels, unpack);
-}
-
-
-static void
-brw_texsubimage(struct gl_context * ctx,
- GLuint dims,
- struct gl_texture_image *texImage,
- GLint xoffset, GLint yoffset, GLint zoffset,
- GLsizei width, GLsizei height, GLsizei depth,
- GLenum format, GLenum type,
- const GLvoid * pixels,
- const struct gl_pixelstore_attrib *packing)
-{
- DBG("%s mesa_format %s target %s format %s type %s level %d %dx%dx%d\n",
- __func__, _mesa_get_format_name(texImage->TexFormat),
- _mesa_enum_to_string(texImage->TexObject->Target),
- _mesa_enum_to_string(format), _mesa_enum_to_string(type),
- texImage->Level, texImage->Width, texImage->Height, texImage->Depth);
-
- brw_upload_tex(ctx, dims, texImage, xoffset, yoffset, zoffset,
- width, height, depth, format, type, pixels, packing);
-}
-
-
-static void
-brw_set_texture_image_mt(struct brw_context *brw,
- struct gl_texture_image *image,
- GLenum internal_format,
- mesa_format format,
- struct brw_mipmap_tree *mt)
-
-{
- struct gl_texture_object *texobj = image->TexObject;
- struct brw_texture_object *intel_texobj = brw_texture_object(texobj);
- struct brw_texture_image *intel_image = brw_texture_image(image);
-
- _mesa_init_teximage_fields(&brw->ctx, image,
- mt->surf.logical_level0_px.width,
- mt->surf.logical_level0_px.height, 1,
- 0, internal_format, format);
-
- brw->ctx.Driver.FreeTextureImageBuffer(&brw->ctx, image);
-
- intel_texobj->needs_validate = true;
- intel_image->base.RowStride = mt->surf.row_pitch_B / mt->cpp;
- assert(mt->surf.row_pitch_B % mt->cpp == 0);
-
- brw_miptree_reference(&intel_image->mt, mt);
-
- /* Immediately validate the image to the object. */
- brw_miptree_reference(&intel_texobj->mt, mt);
-}
-
-
-void
-brw_set_texbuffer2(__DRIcontext *pDRICtx, GLint target,
- GLint texture_format,
- __DRIdrawable *dPriv)
-{
- struct gl_framebuffer *fb = dPriv->driverPrivate;
- struct brw_context *brw = pDRICtx->driverPrivate;
- struct gl_context *ctx = &brw->ctx;
- struct brw_renderbuffer *rb;
- struct gl_texture_object *texObj;
- struct gl_texture_image *texImage;
- mesa_format texFormat = MESA_FORMAT_NONE;
- GLenum internal_format = 0;
-
- _mesa_glthread_finish(ctx);
-
- texObj = _mesa_get_current_tex_object(ctx, target);
-
- if (!texObj)
- return;
-
- if (dPriv->lastStamp != dPriv->dri2.stamp ||
- !pDRICtx->driScreenPriv->dri2.useInvalidate)
- brw_update_renderbuffers(pDRICtx, dPriv);
-
- rb = brw_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
- /* If the miptree isn't set, then intel_update_renderbuffers was unable
- * to get the BO for the drawable from the window system.
- */
- if (!rb || !rb->mt)
- return;
-
- /* Neither the EGL and GLX texture_from_pixmap specs say anything about
- * sRGB. They are both from a time where sRGB was considered an extra
- * encoding step you did as part of rendering/blending and not a format.
- * Even though we have concept of sRGB visuals, X has classically assumed
- * that your data is just bits and sRGB rendering is entirely a client-side
- * rendering construct. The assumption is that the result of BindTexImage
- * is a texture with a linear format even if it was rendered with sRGB
- * encoding enabled.
- */
- texFormat = _mesa_get_srgb_format_linear(brw_rb_format(rb));
-
- if (rb->mt->cpp == 4) {
- /* The extra texture_format parameter indicates whether the alpha
- * channel should be respected or ignored. If we set internal_format to
- * GL_RGB, the texture handling code is smart enough to swap the format
- * or apply a swizzle if the underlying format is RGBA so we don't need
- * to stomp it to RGBX or anything like that.
- */
- if (texture_format == __DRI_TEXTURE_FORMAT_RGB)
- internal_format = GL_RGB;
- else
- internal_format = GL_RGBA;
- } else if (rb->mt->cpp == 2) {
- internal_format = GL_RGB;
- }
-
- brw_miptree_finish_external(brw, rb->mt);
-
- _mesa_lock_texture(&brw->ctx, texObj);
- texImage = _mesa_get_tex_image(ctx, texObj, target, 0);
- brw_set_texture_image_mt(brw, texImage, internal_format,
- texFormat, rb->mt);
- _mesa_unlock_texture(&brw->ctx, texObj);
-}
-
-void
-brw_release_texbuffer(__DRIcontext *pDRICtx, GLint target,
- __DRIdrawable *dPriv)
-{
- struct brw_context *brw = pDRICtx->driverPrivate;
- struct gl_context *ctx = &brw->ctx;
- struct gl_texture_object *tex_obj;
- struct brw_texture_object *intel_tex;
-
- tex_obj = _mesa_get_current_tex_object(ctx, target);
- if (!tex_obj)
- return;
-
- _mesa_lock_texture(&brw->ctx, tex_obj);
-
- intel_tex = brw_texture_object(tex_obj);
- if (!intel_tex->mt) {
- _mesa_unlock_texture(&brw->ctx, tex_obj);
- return;
- }
-
- /* The brw_miptree_prepare_external below as well as the finish_external
- * above in brw_set_texbuffer2 *should* do nothing. The BindTexImage call
- * from both GLX and EGL has TexImage2D and not TexSubImage2D semantics so
- * the texture is not immutable. This means that the user cannot create a
- * texture view of the image with a different format. Since the only three
- * formats available when using BindTexImage are all UNORM, we can never
- * end up with an sRGB format being used for texturing and so we shouldn't
- * get any format-related resolves when texturing from it.
- *
- * While very unlikely, it is possible that the client could use the bound
- * texture with GL_ARB_image_load_store. In that case, we'll do a resolve
- * but that's not actually a problem as it just means that we lose
- * compression on this texture until the next time it's used as a render
- * target.
- *
- * The only other way we could end up with an unexpected aux usage would be
- * if we rendered to the image from the same context as we have it bound as
- * a texture between BindTexImage and ReleaseTexImage. However, the spec
- * clearly calls this case out and says you shouldn't do that. It doesn't
- * explicitly prevent binding the texture to a framebuffer but it says the
- * results of trying to render to it while bound are undefined.
- *
- * Just to keep everything safe and sane, we do a prepare_external but it
- * should be a no-op in almost all cases. On the off chance that someone
- * ever triggers this, we should at least warn them.
- */
- if (intel_tex->mt->aux_buf &&
- brw_miptree_get_aux_state(intel_tex->mt, 0, 0) !=
- isl_drm_modifier_get_default_aux_state(intel_tex->mt->drm_modifier)) {
- _mesa_warning(ctx, "Aux state changed between BindTexImage and "
- "ReleaseTexImage. Most likely someone tried to draw "
- "to the pixmap bound in BindTexImage or used it with "
- "image_load_store.");
- }
-
- brw_miptree_prepare_external(brw, intel_tex->mt);
-
- _mesa_unlock_texture(&brw->ctx, tex_obj);
-}
-
-static GLboolean
-brw_bind_renderbuffer_tex_image(struct gl_context *ctx,
- struct gl_renderbuffer *rb,
- struct gl_texture_image *image)
-{
- struct brw_renderbuffer *irb = brw_renderbuffer(rb);
- struct brw_texture_image *intel_image = brw_texture_image(image);
- struct gl_texture_object *texobj = image->TexObject;
- struct brw_texture_object *intel_texobj = brw_texture_object(texobj);
-
- /* We can only handle RB allocated with AllocRenderbufferStorage, or
- * window-system renderbuffers.
- */
- assert(!rb->TexImage);
-
- if (!irb->mt)
- return false;
-
- _mesa_lock_texture(ctx, texobj);
- _mesa_init_teximage_fields(ctx, image, rb->Width, rb->Height, 1, 0,
- rb->InternalFormat, rb->Format);
- image->NumSamples = rb->NumSamples;
-
- brw_miptree_reference(&intel_image->mt, irb->mt);
-
- /* Immediately validate the image to the object. */
- brw_miptree_reference(&intel_texobj->mt, intel_image->mt);
-
- intel_texobj->needs_validate = true;
- _mesa_unlock_texture(ctx, texobj);
-
- return true;
-}
-
-void
-brw_set_texbuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv)
-{
- /* The old interface didn't have the format argument, so copy our
- * implementation's behavior at the time.
- */
- brw_set_texbuffer2(pDRICtx, target, __DRI_TEXTURE_FORMAT_RGBA, dPriv);
-}
-
-static void
-brw_image_target_texture(struct gl_context *ctx, GLenum target,
- struct gl_texture_object *texObj,
- struct gl_texture_image *texImage,
- GLeglImageOES image_handle,
- bool storage)
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_mipmap_tree *mt;
- __DRIscreen *dri_screen = brw->screen->driScrnPriv;
- __DRIimage *image;
-
- image = dri_screen->dri2.image->lookupEGLImage(dri_screen, image_handle,
- dri_screen->loaderPrivate);
- if (image == NULL)
- return;
-
- /* Disallow depth/stencil textures: we don't have a way to pass the
- * separate stencil miptree of a GL_DEPTH_STENCIL texture through.
- */
- if (image->has_depthstencil) {
- _mesa_error(ctx, GL_INVALID_OPERATION, __func__);
- return;
- }
-
- mt = brw_miptree_create_for_dri_image(brw, image, target, image->format,
- false);
- if (mt == NULL)
- return;
-
- struct brw_texture_object *intel_texobj = brw_texture_object(texObj);
- intel_texobj->planar_format = image->planar_format;
- intel_texobj->yuv_color_space = image->yuv_color_space;
-
- GLenum internal_format =
- image->internal_format != 0 ?
- image->internal_format : _mesa_get_format_base_format(mt->format);
-
- /* Fix the internal format when _mesa_get_format_base_format(mt->format)
- * isn't a valid one for that particular format.
- */
- if (brw->mesa_format_supports_render[image->format]) {
- if (image->format == MESA_FORMAT_R10G10B10A2_UNORM ||
- image->format == MESA_FORMAT_R10G10B10X2_UNORM ||
- image->format == MESA_FORMAT_B10G10R10A2_UNORM ||
- image->format == MESA_FORMAT_B10G10R10X2_UNORM)
- internal_format = GL_RGB10_A2;
- }
-
- /* Guess sized internal format for dma-bufs, as specified by
- * EXT_EGL_image_storage.
- */
- if (storage && target == GL_TEXTURE_2D && image->imported_dmabuf) {
- internal_format = driGLFormatToSizedInternalGLFormat(image->format);
- if (internal_format == GL_NONE) {
- _mesa_error(ctx, GL_INVALID_OPERATION, __func__);
- return;
- }
- }
-
- brw_set_texture_image_mt(brw, texImage, internal_format, mt->format, mt);
- brw_miptree_release(&mt);
-}
-
-static void
-brw_image_target_texture_2d(struct gl_context *ctx, GLenum target,
- struct gl_texture_object *texObj,
- struct gl_texture_image *texImage,
- GLeglImageOES image_handle)
-{
- brw_image_target_texture(ctx, target, texObj, texImage, image_handle,
- false);
-}
-
-static void
-brw_image_target_tex_storage(struct gl_context *ctx, GLenum target,
- struct gl_texture_object *texObj,
- struct gl_texture_image *texImage,
- GLeglImageOES image_handle)
-{
- struct brw_texture_object *intel_texobj = brw_texture_object(texObj);
- brw_image_target_texture(ctx, target, texObj, texImage, image_handle,
- true);
-
- /* The miptree is in a validated state, so no need to check later. */
- intel_texobj->needs_validate = false;
- intel_texobj->validated_first_level = 0;
- intel_texobj->validated_last_level = 0;
- intel_texobj->_Format = texImage->TexFormat;
-}
-
-static bool
-brw_gettexsubimage_blorp(struct brw_context *brw,
- struct gl_texture_image *tex_image,
- unsigned x, unsigned y, unsigned z,
- unsigned width, unsigned height, unsigned depth,
- GLenum format, GLenum type, const void *pixels,
- const struct gl_pixelstore_attrib *packing)
-{
- struct brw_texture_image *intel_image = brw_texture_image(tex_image);
- const unsigned mt_level = tex_image->Level + tex_image->TexObject->Attrib.MinLevel;
- const unsigned mt_z = tex_image->TexObject->Attrib.MinLayer + tex_image->Face + z;
-
- /* The blorp path can't understand crazy format hackery */
- if (_mesa_base_tex_format(&brw->ctx, tex_image->InternalFormat) !=
- _mesa_get_format_base_format(tex_image->TexFormat))
- return false;
-
- return brw_blorp_download_miptree(brw, intel_image->mt,
- tex_image->TexFormat, SWIZZLE_XYZW,
- mt_level, x, y, mt_z,
- width, height, depth,
- tex_image->TexObject->Target,
- format, type, false, pixels, packing);
-}
-
-/**
- * \brief A fast path for glGetTexImage.
- *
- * \see brw_readpixels_tiled_memcpy()
- */
-static bool
-brw_gettexsubimage_tiled_memcpy(struct gl_context *ctx,
- struct gl_texture_image *texImage,
- GLint xoffset, GLint yoffset,
- GLsizei width, GLsizei height,
- GLenum format, GLenum type,
- GLvoid *pixels,
- const struct gl_pixelstore_attrib *packing)
-{
- struct brw_context *brw = brw_context(ctx);
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- struct brw_texture_image *image = brw_texture_image(texImage);
- int dst_pitch;
-
- /* The miptree's buffer. */
- struct brw_bo *bo;
-
- uint32_t cpp;
- isl_memcpy_type copy_type;
-
- /* This fastpath is restricted to specific texture types:
- * a 2D BGRA, RGBA, L8 or A8 texture. It could be generalized to support
- * more types.
- *
- * FINISHME: The restrictions below on packing alignment and packing row
- * length are likely unneeded now because we calculate the destination stride
- * with _mesa_image_row_stride. However, before removing the restrictions
- * we need tests.
- */
- if (!devinfo->has_llc ||
- !(type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) ||
- !(texImage->TexObject->Target == GL_TEXTURE_2D ||
- texImage->TexObject->Target == GL_TEXTURE_RECTANGLE) ||
- pixels == NULL ||
- packing->BufferObj ||
- packing->Alignment > 4 ||
- packing->SkipPixels > 0 ||
- packing->SkipRows > 0 ||
- (packing->RowLength != 0 && packing->RowLength != width) ||
- packing->SwapBytes ||
- packing->LsbFirst ||
- packing->Invert)
- return false;
-
- /* We can't handle copying from RGBX or BGRX because the tiled_memcpy
- * function doesn't set the last channel to 1. Note this checks BaseFormat
- * rather than TexFormat in case the RGBX format is being simulated with an
- * RGBA format.
- */
- if (texImage->_BaseFormat == GL_RGB)
- return false;
-
- copy_type = brw_miptree_get_memcpy_type(texImage->TexFormat, format, type,
- &cpp);
- if (copy_type == ISL_MEMCPY_INVALID)
- return false;
-
- /* If this is a nontrivial texture view, let another path handle it instead. */
- if (texImage->TexObject->Attrib.MinLayer)
- return false;
-
- if (!image->mt ||
- (image->mt->surf.tiling != ISL_TILING_X &&
- image->mt->surf.tiling != ISL_TILING_Y0)) {
- /* The algorithm is written only for X- or Y-tiled memory. */
- return false;
- }
-
- /* tiled_to_linear() assumes that if the object is swizzled, it is using
- * I915_BIT6_SWIZZLE_9_10 for X and I915_BIT6_SWIZZLE_9 for Y. This is only
- * true on gfx5 and above.
- *
- * The killer on top is that some gfx4 have an L-shaped swizzle mode, where
- * parts of the memory aren't swizzled at all. Userspace just can't handle
- * that.
- */
- if (devinfo->ver < 5 && devinfo->has_bit6_swizzle)
- return false;
-
- int level = texImage->Level + texImage->TexObject->Attrib.MinLevel;
-
- /* Since we are going to write raw data to the miptree, we need to resolve
- * any pending fast color clears before we start.
- */
- assert(image->mt->surf.logical_level0_px.depth == 1);
- assert(image->mt->surf.logical_level0_px.array_len == 1);
-
- brw_miptree_access_raw(brw, image->mt, level, 0, true);
-
- bo = image->mt->bo;
-
- if (brw_batch_references(&brw->batch, bo)) {
- perf_debug("Flushing before mapping a referenced bo.\n");
- brw_batch_flush(brw);
- }
-
- void *map = brw_bo_map(brw, bo, MAP_READ | MAP_RAW);
- if (map == NULL) {
- DBG("%s: failed to map bo\n", __func__);
- return false;
- }
-
- dst_pitch = _mesa_image_row_stride(packing, width, format, type);
-
- DBG("%s: level=%d x,y=(%d,%d) (w,h)=(%d,%d) format=0x%x type=0x%x "
- "mesa_format=0x%x tiling=%d "
- "packing=(alignment=%d row_length=%d skip_pixels=%d skip_rows=%d)\n",
- __func__, texImage->Level, xoffset, yoffset, width, height,
- format, type, texImage->TexFormat, image->mt->surf.tiling,
- packing->Alignment, packing->RowLength, packing->SkipPixels,
- packing->SkipRows);
-
- /* Adjust x and y offset based on miplevel */
- unsigned level_x, level_y;
- brw_miptree_get_image_offset(image->mt, level, 0, &level_x, &level_y);
- xoffset += level_x;
- yoffset += level_y;
-
- isl_memcpy_tiled_to_linear(
- xoffset * cpp, (xoffset + width) * cpp,
- yoffset, yoffset + height,
- pixels,
- map,
- dst_pitch, image->mt->surf.row_pitch_B,
- devinfo->has_bit6_swizzle,
- image->mt->surf.tiling,
- copy_type
- );
-
- brw_bo_unmap(bo);
- return true;
-}
-
-static void
-brw_get_tex_sub_image(struct gl_context *ctx,
- GLint xoffset, GLint yoffset, GLint zoffset,
- GLsizei width, GLsizei height, GLint depth,
- GLenum format, GLenum type, GLvoid *pixels,
- struct gl_texture_image *texImage)
-{
- struct brw_context *brw = brw_context(ctx);
- bool ok;
-
- DBG("%s\n", __func__);
-
- if (ctx->Pack.BufferObj) {
- if (brw_gettexsubimage_blorp(brw, texImage,
- xoffset, yoffset, zoffset,
- width, height, depth, format, type,
- pixels, &ctx->Pack))
- return;
-
- perf_debug("%s: fallback to CPU mapping in PBO case\n", __func__);
- }
-
- ok = brw_gettexsubimage_tiled_memcpy(ctx, texImage, xoffset, yoffset,
- width, height,
- format, type, pixels, &ctx->Pack);
-
- if(ok)
- return;
-
- _mesa_meta_GetTexSubImage(ctx, xoffset, yoffset, zoffset,
- width, height, depth,
- format, type, pixels, texImage);
-
- DBG("%s - DONE\n", __func__);
-}
-
-static void
-flush_astc_denorms(struct gl_context *ctx, GLuint dims,
- struct gl_texture_image *texImage,
- GLint xoffset, GLint yoffset, GLint zoffset,
- GLsizei width, GLsizei height, GLsizei depth)
-{
- struct compressed_pixelstore store;
- _mesa_compute_compressed_pixelstore(dims, texImage->TexFormat,
- width, height, depth,
- &ctx->Unpack, &store);
-
- for (int slice = 0; slice < store.CopySlices; slice++) {
-
- /* Map dest texture buffer */
- GLubyte *dstMap;
- GLint dstRowStride;
- ctx->Driver.MapTextureImage(ctx, texImage, slice + zoffset,
- xoffset, yoffset, width, height,
- GL_MAP_READ_BIT | GL_MAP_WRITE_BIT,
- &dstMap, &dstRowStride);
- if (!dstMap)
- continue;
-
- for (int i = 0; i < store.CopyRowsPerSlice; i++) {
-
- /* An ASTC block is stored in little endian mode. The byte that
- * contains bits 0..7 is stored at the lower address in memory.
- */
- struct astc_void_extent {
- uint16_t header : 12;
- uint16_t dontcare[3];
- uint16_t R;
- uint16_t G;
- uint16_t B;
- uint16_t A;
- } *blocks = (struct astc_void_extent*) dstMap;
-
- /* Iterate over every copied block in the row */
- for (int j = 0; j < store.CopyBytesPerRow / 16; j++) {
-
- /* Check if the header matches that of an LDR void-extent block */
- if (blocks[j].header == 0xDFC) {
-
- /* Flush UNORM16 values that would be denormalized */
- if (blocks[j].A < 4) blocks[j].A = 0;
- if (blocks[j].B < 4) blocks[j].B = 0;
- if (blocks[j].G < 4) blocks[j].G = 0;
- if (blocks[j].R < 4) blocks[j].R = 0;
- }
- }
-
- dstMap += dstRowStride;
- }
-
- ctx->Driver.UnmapTextureImage(ctx, texImage, slice + zoffset);
- }
-}
-
-
-static void
-brw_compressedtexsubimage(struct gl_context *ctx, GLuint dims,
- struct gl_texture_image *texImage,
- GLint xoffset, GLint yoffset, GLint zoffset,
- GLsizei width, GLsizei height, GLsizei depth,
- GLenum format,
- GLsizei imageSize, const GLvoid *data)
-{
- /* Upload the compressed data blocks */
- _mesa_store_compressed_texsubimage(ctx, dims, texImage,
- xoffset, yoffset, zoffset,
- width, height, depth,
- format, imageSize, data);
-
- /* Fix up copied ASTC blocks if necessary */
- GLenum gl_format = _mesa_compressed_format_to_glenum(ctx,
- texImage->TexFormat);
- bool is_linear_astc = _mesa_is_astc_format(gl_format) &&
- !_mesa_is_srgb_format(gl_format);
- struct brw_context *brw = (struct brw_context*) ctx;
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- if (devinfo->ver == 9 &&
- !intel_device_info_is_9lp(devinfo) &&
- is_linear_astc)
- flush_astc_denorms(ctx, dims, texImage,
- xoffset, yoffset, zoffset,
- width, height, depth);
-}
-
-void
-brw_init_texture_image_functions(struct dd_function_table *functions)
-{
- functions->TexImage = brw_teximage;
- functions->TexSubImage = brw_texsubimage;
- functions->CompressedTexSubImage = brw_compressedtexsubimage;
- functions->EGLImageTargetTexture2D = brw_image_target_texture_2d;
- functions->EGLImageTargetTexStorage = brw_image_target_tex_storage;
- functions->BindRenderbufferTexImage = brw_bind_renderbuffer_tex_image;
- functions->GetTexSubImage = brw_get_tex_sub_image;
-}
+++ /dev/null
-/*
- * Copyright 2003 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef _BRW_TEX_OBJ_H
-#define _BRW_TEX_OBJ_H
-
-#include "swrast/s_context.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct brw_texture_object
-{
- struct gl_texture_object base;
-
- /* This is a mirror of base._MaxLevel, updated at validate time,
- * except that we don't bother with the non-base levels for
- * non-mipmapped textures.
- */
- unsigned int _MaxLevel;
-
- unsigned int validated_first_level;
- unsigned int validated_last_level;
-
- /* The miptree of pixel data for the texture (if !needs_validate). After
- * validation, the images will also have references to the same mt.
- */
- struct brw_mipmap_tree *mt;
-
- /**
- * Set when mipmap trees in the texture images of this texture object
- * might not all be the mipmap tree above.
- */
- bool needs_validate;
-
- /* Mesa format for the validated texture object. For non-views this
- * will always be the same as texObj->Image[0][0].TexFormat. For views, it
- * may differ since the mt is shared across views with differing formats.
- */
- mesa_format _Format;
-
- const struct brw_image_format *planar_format;
- unsigned int yuv_color_space;
-};
-
-
-/**
- * brw_texture_image is a subclass of swrast_texture_image because we
- * sometimes fall back to using the swrast module for software rendering.
- */
-struct brw_texture_image
-{
- struct swrast_texture_image base;
-
- /* If brw_image->mt != NULL, image data is stored here.
- * Else if brw_image->base.Buffer != NULL, image is stored there.
- * Else there is no image data.
- */
- struct brw_mipmap_tree *mt;
-};
-
-static inline struct brw_texture_object *
-brw_texture_object(struct gl_texture_object *obj)
-{
- return (struct brw_texture_object *) obj;
-}
-
-static inline struct brw_texture_image *
-brw_texture_image(struct gl_texture_image *img)
-{
- return (struct brw_texture_image *) img;
-}
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _BRW_TEX_OBJ_H */
+++ /dev/null
-/*
- * Copyright © 2013 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "main/mtypes.h"
-#include "main/macros.h"
-#include "main/samplerobj.h"
-#include "main/teximage.h"
-#include "main/texobj.h"
-
-#include "brw_context.h"
-#include "brw_mipmap_tree.h"
-#include "brw_tex.h"
-
-#define FILE_DEBUG_FLAG DEBUG_TEXTURE
-
-/**
- * Sets our driver-specific variant of tObj->_MaxLevel for later surface state
- * upload.
- *
- * If we're only ensuring that there is storage for the first miplevel of a
- * texture, then in texture setup we're going to have to make sure we don't
- * allow sampling beyond level 0.
- */
-static void
-brw_update_max_level(struct gl_texture_object *tObj,
- struct gl_sampler_object *sampler)
-{
- struct brw_texture_object *brw_obj = brw_texture_object(tObj);
-
- if (!tObj->_MipmapComplete ||
- (tObj->_RenderToTexture &&
- (sampler->Attrib.MinFilter == GL_NEAREST ||
- sampler->Attrib.MinFilter == GL_LINEAR))) {
- brw_obj->_MaxLevel = tObj->Attrib.BaseLevel;
- } else {
- brw_obj->_MaxLevel = tObj->_MaxLevel;
- }
-}
-
-/**
- * At rendering-from-a-texture time, make sure that the texture object has a
- * miptree that can hold the entire texture based on
- * BaseLevel/MaxLevel/filtering, and copy in any texture images that are
- * stored in other miptrees.
- */
-void
-brw_finalize_mipmap_tree(struct brw_context *brw,
- struct gl_texture_object *tObj)
-{
- struct brw_texture_object *brw_obj = brw_texture_object(tObj);
- GLuint face, i;
- GLuint nr_faces = 0;
- struct brw_texture_image *firstImage;
- int width, height, depth;
-
- /* TBOs require no validation -- they always just point to their BO. */
- if (tObj->Target == GL_TEXTURE_BUFFER)
- return;
-
- /* What levels does this validated texture image require? */
- int validate_first_level = tObj->Attrib.BaseLevel;
- int validate_last_level = brw_obj->_MaxLevel;
-
- /* Skip the loop over images in the common case of no images having
- * changed. But if the GL_BASE_LEVEL or GL_MAX_LEVEL change to something we
- * haven't looked at, then we do need to look at those new images.
- */
- if (!brw_obj->needs_validate &&
- validate_first_level >= brw_obj->validated_first_level &&
- validate_last_level <= brw_obj->validated_last_level) {
- return;
- }
-
- /* On recent generations, immutable textures should not get this far
- * -- they should have been created in a validated state, and nothing
- * can invalidate them.
- *
- * Unfortunately, this is not true on pre-Sandybridge hardware -- when
- * rendering into an immutable-format depth texture we may have to rebase
- * the rendered levels to meet alignment requirements.
- *
- * FINISHME: Avoid doing this.
- */
- assert(!tObj->Immutable || brw->screen->devinfo.ver < 6);
-
- firstImage = brw_texture_image(tObj->Image[0][tObj->Attrib.BaseLevel]);
- if (!firstImage)
- return;
-
- /* Check tree can hold all active levels. Check tree matches
- * target, imageFormat, etc.
- */
- if (brw_obj->mt &&
- (!brw_miptree_match_image(brw_obj->mt, &firstImage->base.Base) ||
- validate_first_level < brw_obj->mt->first_level ||
- validate_last_level > brw_obj->mt->last_level)) {
- brw_miptree_release(&brw_obj->mt);
- }
-
-
- /* May need to create a new tree:
- */
- if (!brw_obj->mt) {
- const unsigned level = firstImage->base.Base.Level;
- brw_get_image_dims(&firstImage->base.Base, &width, &height, &depth);
- /* Figure out image dimensions at start level. */
- switch(brw_obj->base.Target) {
- case GL_TEXTURE_2D_MULTISAMPLE:
- case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
- case GL_TEXTURE_RECTANGLE:
- case GL_TEXTURE_EXTERNAL_OES:
- assert(level == 0);
- break;
- case GL_TEXTURE_3D:
- depth = depth << level;
- FALLTHROUGH;
- case GL_TEXTURE_2D:
- case GL_TEXTURE_2D_ARRAY:
- case GL_TEXTURE_CUBE_MAP:
- case GL_TEXTURE_CUBE_MAP_ARRAY:
- height = height << level;
- FALLTHROUGH;
- case GL_TEXTURE_1D:
- case GL_TEXTURE_1D_ARRAY:
- width = width << level;
- break;
- default:
- unreachable("Unexpected target");
- }
- perf_debug("Creating new %s %dx%dx%d %d-level miptree to handle "
- "finalized texture miptree.\n",
- _mesa_get_format_name(firstImage->base.Base.TexFormat),
- width, height, depth, validate_last_level + 1);
-
- brw_obj->mt = brw_miptree_create(brw,
- brw_obj->base.Target,
- firstImage->base.Base.TexFormat,
- 0, /* first_level */
- validate_last_level,
- width,
- height,
- depth,
- 1 /* num_samples */,
- MIPTREE_CREATE_BUSY);
- if (!brw_obj->mt)
- return;
- }
-
- /* Pull in any images not in the object's tree:
- */
- nr_faces = _mesa_num_tex_faces(brw_obj->base.Target);
- for (face = 0; face < nr_faces; face++) {
- for (i = validate_first_level; i <= validate_last_level; i++) {
- struct brw_texture_image *brw_image =
- brw_texture_image(brw_obj->base.Image[face][i]);
- /* skip too small size mipmap */
- if (brw_image == NULL)
- break;
-
- if (brw_obj->mt != brw_image->mt)
- brw_miptree_copy_teximage(brw, brw_image, brw_obj->mt);
-
- /* After we're done, we'd better agree that our layout is
- * appropriate, or we'll end up hitting this function again on the
- * next draw
- */
- assert(brw_miptree_match_image(brw_obj->mt, &brw_image->base.Base));
- }
- }
-
- brw_obj->validated_first_level = validate_first_level;
- brw_obj->validated_last_level = validate_last_level;
- brw_obj->_Format = firstImage->base.Base.TexFormat,
- brw_obj->needs_validate = false;
-}
-
-/**
- * Finalizes all textures, completing any rendering that needs to be done
- * to prepare them.
- */
-void
-brw_validate_textures(struct brw_context *brw)
-{
- struct gl_context *ctx = &brw->ctx;
- const int max_enabled_unit = ctx->Texture._MaxEnabledTexImageUnit;
-
- for (int unit = 0; unit <= max_enabled_unit; unit++) {
- struct gl_texture_object *tex_obj = ctx->Texture.Unit[unit]._Current;
-
- if (!tex_obj)
- continue;
-
- struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
-
- /* We know that this is true by now, and if it wasn't, we might have
- * mismatched level sizes and the copies would fail.
- */
- assert(tex_obj->_BaseComplete);
-
- brw_update_max_level(tex_obj, sampler);
- brw_finalize_mipmap_tree(brw, tex_obj);
- }
-}
+++ /dev/null
-/*
- * Copyright 2003 VMware, Inc.
- * Copyright © 2007 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-/**
- * @file intel_upload.c
- *
- * Batched upload via BOs.
- */
-
-#include "main/macros.h"
-#include "brw_bufmgr.h"
-#include "brw_context.h"
-#include "brw_buffer_objects.h"
-
-void
-brw_upload_finish(struct brw_uploader *upload)
-{
- assert((upload->bo == NULL) == (upload->map == NULL));
- if (!upload->bo)
- return;
-
- brw_bo_unmap(upload->bo);
- brw_bo_unreference(upload->bo);
- upload->bo = NULL;
- upload->map = NULL;
- upload->next_offset = 0;
-}
-
-/**
- * Interface for getting memory for uploading streamed data to the GPU
- *
- * In most cases, streamed data (for GPU state structures, for example) is
- * uploaded through brw_state_batch(), since that interface allows relocations
- * from the streamed space returned to other BOs. However, that interface has
- * the restriction that the amount of space allocated has to be "small".
- *
- * This interface, on the other hand, is able to handle arbitrary sized
- * allocation requests, though it will batch small allocations into the same
- * BO for efficiency and reduced memory footprint.
- *
- * \note The returned pointer is valid only until brw_upload_finish().
- *
- * \param out_bo Pointer to a BO, which must point to a valid BO or NULL on
- * entry, and will have a reference to the new BO containing the state on
- * return.
- *
- * \param out_offset Offset within the buffer object that the data will land.
- */
-void *
-brw_upload_space(struct brw_uploader *upload,
- uint32_t size,
- uint32_t alignment,
- struct brw_bo **out_bo,
- uint32_t *out_offset)
-{
- uint32_t offset;
-
- offset = ALIGN_NPOT(upload->next_offset, alignment);
- if (upload->bo && offset + size > upload->bo->size) {
- brw_upload_finish(upload);
- offset = 0;
- }
-
- assert((upload->bo == NULL) == (upload->map == NULL));
- if (!upload->bo) {
- upload->bo = brw_bo_alloc(upload->bufmgr, "streamed data",
- MAX2(upload->default_size, size),
- BRW_MEMZONE_OTHER);
- upload->map = brw_bo_map(NULL, upload->bo,
- MAP_READ | MAP_WRITE |
- MAP_PERSISTENT | MAP_ASYNC);
- }
-
- upload->next_offset = offset + size;
-
- *out_offset = offset;
- if (*out_bo != upload->bo) {
- brw_bo_unreference(*out_bo);
- *out_bo = upload->bo;
- brw_bo_reference(upload->bo);
- }
-
- return upload->map + offset;
-}
-
-/**
- * Handy interface to upload some data to temporary GPU memory quickly.
- *
- * References to this memory should not be retained across batch flushes.
- */
-void
-brw_upload_data(struct brw_uploader *upload,
- const void *data,
- uint32_t size,
- uint32_t alignment,
- struct brw_bo **out_bo,
- uint32_t *out_offset)
-{
- void *dst = brw_upload_space(upload, size, alignment, out_bo, out_offset);
- memcpy(dst, data, size);
-}
-
-void
-brw_upload_init(struct brw_uploader *upload,
- struct brw_bufmgr *bufmgr,
- unsigned default_size)
-{
- upload->bufmgr = bufmgr;
- upload->bo = NULL;
- upload->map = NULL;
- upload->next_offset = 0;
- upload->default_size = default_size;
-}
+++ /dev/null
-/*
- Copyright (C) Intel Corp. 2006. All Rights Reserved.
- Intel funded Tungsten Graphics to
- develop this 3D driver.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
- **********************************************************************/
- /*
- * Authors:
- * Keith Whitwell <keithw@vmware.com>
- */
-
-
-
-#include "brw_batch.h"
-#include "brw_context.h"
-#include "brw_state.h"
-#include "brw_defines.h"
-
-#define VS 0
-#define GS 1
-#define CLP 2
-#define SF 3
-#define CS 4
-
-/** @file brw_urb.c
- *
- * Manages the division of the URB space between the various fixed-function
- * units.
- *
- * See the Thread Initiation Management section of the GFX4 B-Spec, and
- * the individual *_STATE structures for restrictions on numbers of
- * entries and threads.
- */
-
-/*
- * Generally, a unit requires a min_nr_entries based on how many entries
- * it produces before the downstream unit gets unblocked and can use and
- * dereference some of its handles.
- *
- * The SF unit preallocates a PUE at the start of thread dispatch, and only
- * uses that one. So it requires one entry per thread.
- *
- * For CLIP, the SF unit will hold the previous primitive while the
- * next is getting assembled, meaning that linestrips require 3 CLIP VUEs
- * (vertices) to ensure continued processing, trifans require 4, and tristrips
- * require 5. There can be 1 or 2 threads, and each has the same requirement.
- *
- * GS has the same requirement as CLIP, but it never handles tristrips,
- * so we can lower the minimum to 4 for the POLYGONs (trifans) it produces.
- * We only run it single-threaded.
- *
- * For VS, the number of entries may be 8, 12, 16, or 32 (or 64 on G4X).
- * Each thread processes 2 preallocated VUEs (vertices) at a time, and they
- * get streamed down as soon as threads processing earlier vertices get
- * theirs accepted.
- *
- * Each unit will take the number of URB entries we give it (based on the
- * entry size calculated in brw_vs_emit.c for VUEs, brw_sf_emit.c for PUEs,
- * and brw_curbe.c for the CURBEs) and decide its maximum number of
- * threads it can support based on that. in brw_*_state.c.
- *
- * XXX: Are the min_entry_size numbers useful?
- * XXX: Verify min_nr_entries, esp for VS.
- * XXX: Verify SF min_entry_size.
- */
-static const struct {
- GLuint min_nr_entries;
- GLuint preferred_nr_entries;
- GLuint min_entry_size;
- GLuint max_entry_size;
-} limits[CS+1] = {
- { 16, 32, 1, 5 }, /* vs */
- { 4, 8, 1, 5 }, /* gs */
- { 5, 10, 1, 5 }, /* clp */
- { 1, 8, 1, 12 }, /* sf */
- { 1, 4, 1, 32 } /* cs */
-};
-
-
-static bool check_urb_layout(struct brw_context *brw)
-{
- brw->urb.vs_start = 0;
- brw->urb.gs_start = brw->urb.nr_vs_entries * brw->urb.vsize;
- brw->urb.clip_start = brw->urb.gs_start + brw->urb.nr_gs_entries * brw->urb.vsize;
- brw->urb.sf_start = brw->urb.clip_start + brw->urb.nr_clip_entries * brw->urb.vsize;
- brw->urb.cs_start = brw->urb.sf_start + brw->urb.nr_sf_entries * brw->urb.sfsize;
-
- return brw->urb.cs_start + brw->urb.nr_cs_entries *
- brw->urb.csize <= brw->urb.size;
-}
-
-/* Most minimal update, forces re-emit of URB fence packet after GS
- * unit turned on/off.
- */
-void
-brw_calculate_urb_fence(struct brw_context *brw, unsigned csize,
- unsigned vsize, unsigned sfsize)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- if (csize < limits[CS].min_entry_size)
- csize = limits[CS].min_entry_size;
-
- if (vsize < limits[VS].min_entry_size)
- vsize = limits[VS].min_entry_size;
-
- if (sfsize < limits[SF].min_entry_size)
- sfsize = limits[SF].min_entry_size;
-
- if (brw->urb.vsize < vsize ||
- brw->urb.sfsize < sfsize ||
- brw->urb.csize < csize ||
- (brw->urb.constrained && (brw->urb.vsize > vsize ||
- brw->urb.sfsize > sfsize ||
- brw->urb.csize > csize))) {
-
-
- brw->urb.csize = csize;
- brw->urb.sfsize = sfsize;
- brw->urb.vsize = vsize;
-
- brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries;
- brw->urb.nr_gs_entries = limits[GS].preferred_nr_entries;
- brw->urb.nr_clip_entries = limits[CLP].preferred_nr_entries;
- brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries;
- brw->urb.nr_cs_entries = limits[CS].preferred_nr_entries;
-
- brw->urb.constrained = 0;
-
- if (devinfo->ver == 5) {
- brw->urb.nr_vs_entries = 128;
- brw->urb.nr_sf_entries = 48;
- if (check_urb_layout(brw)) {
- goto done;
- } else {
- brw->urb.constrained = 1;
- brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries;
- brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries;
- }
- } else if (devinfo->verx10 == 45) {
- brw->urb.nr_vs_entries = 64;
- if (check_urb_layout(brw)) {
- goto done;
- } else {
- brw->urb.constrained = 1;
- brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries;
- }
- }
-
- if (!check_urb_layout(brw)) {
- brw->urb.nr_vs_entries = limits[VS].min_nr_entries;
- brw->urb.nr_gs_entries = limits[GS].min_nr_entries;
- brw->urb.nr_clip_entries = limits[CLP].min_nr_entries;
- brw->urb.nr_sf_entries = limits[SF].min_nr_entries;
- brw->urb.nr_cs_entries = limits[CS].min_nr_entries;
-
- /* Mark us as operating with constrained nr_entries, so that next
- * time we recalculate we'll resize the fences in the hope of
- * escaping constrained mode and getting back to normal performance.
- */
- brw->urb.constrained = 1;
-
- if (!check_urb_layout(brw)) {
- /* This is impossible, given the maximal sizes of urb
- * entries and the values for minimum nr of entries
- * provided above.
- */
- fprintf(stderr, "couldn't calculate URB layout!\n");
- exit(1);
- }
-
- if (INTEL_DEBUG(DEBUG_URB|DEBUG_PERF))
- fprintf(stderr, "URB CONSTRAINED\n");
- }
-
-done:
- if (INTEL_DEBUG(DEBUG_URB))
- fprintf(stderr,
- "URB fence: %d ..VS.. %d ..GS.. %d ..CLP.. %d ..SF.. %d ..CS.. %d\n",
- brw->urb.vs_start,
- brw->urb.gs_start,
- brw->urb.clip_start,
- brw->urb.sf_start,
- brw->urb.cs_start,
- brw->urb.size);
-
- brw->ctx.NewDriverState |= BRW_NEW_URB_FENCE;
- }
-}
-
-static void recalculate_urb_fence( struct brw_context *brw )
-{
- brw_calculate_urb_fence(brw, brw->curbe.total_size,
- brw_vue_prog_data(brw->vs.base.prog_data)->urb_entry_size,
- brw->sf.prog_data->urb_entry_size);
-}
-
-
-const struct brw_tracked_state brw_recalculate_urb_fence = {
- .dirty = {
- .mesa = 0,
- .brw = BRW_NEW_BLORP |
- BRW_NEW_PUSH_CONSTANT_ALLOCATION |
- BRW_NEW_SF_PROG_DATA |
- BRW_NEW_VS_PROG_DATA,
- },
- .emit = recalculate_urb_fence
-};
-
-
-
-
-
-void brw_upload_urb_fence(struct brw_context *brw)
-{
- struct brw_urb_fence uf;
- memset(&uf, 0, sizeof(uf));
-
- uf.header.opcode = CMD_URB_FENCE;
- uf.header.length = sizeof(uf)/4-2;
- uf.header.vs_realloc = 1;
- uf.header.gs_realloc = 1;
- uf.header.clp_realloc = 1;
- uf.header.sf_realloc = 1;
- uf.header.vfe_realloc = 1;
- uf.header.cs_realloc = 1;
-
- /* The ordering below is correct, not the layout in the
- * instruction.
- *
- * There are 256/384 urb reg pairs in total.
- */
- uf.bits0.vs_fence = brw->urb.gs_start;
- uf.bits0.gs_fence = brw->urb.clip_start;
- uf.bits0.clp_fence = brw->urb.sf_start;
- uf.bits1.sf_fence = brw->urb.cs_start;
- uf.bits1.cs_fence = brw->urb.size;
-
- /* erratum: URB_FENCE must not cross a 64byte cacheline */
- if ((USED_BATCH(brw->batch) & 15) > 12) {
- int pad = 16 - (USED_BATCH(brw->batch) & 15);
- do
- *brw->batch.map_next++ = MI_NOOP;
- while (--pad);
- }
-
- brw_batch_data(brw, &uf, sizeof(uf));
-}
+++ /dev/null
-/*
- Copyright (C) Intel Corp. 2006. All Rights Reserved.
- Intel funded Tungsten Graphics to
- develop this 3D driver.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
- **********************************************************************/
- /*
- * Authors:
- * Keith Whitwell <keithw@vmware.com>
- */
-
-
-#include "brw_util.h"
-#include "brw_defines.h"
-#include "compiler/brw_eu_defines.h"
-
-GLuint brw_translate_blend_equation( GLenum mode )
-{
- switch (mode) {
- case GL_FUNC_ADD:
- return BRW_BLENDFUNCTION_ADD;
- case GL_MIN:
- return BRW_BLENDFUNCTION_MIN;
- case GL_MAX:
- return BRW_BLENDFUNCTION_MAX;
- case GL_FUNC_SUBTRACT:
- return BRW_BLENDFUNCTION_SUBTRACT;
- case GL_FUNC_REVERSE_SUBTRACT:
- return BRW_BLENDFUNCTION_REVERSE_SUBTRACT;
- default:
- unreachable("not reached");
- }
-}
-
-GLuint brw_translate_blend_factor( GLenum factor )
-{
- switch(factor) {
- case GL_ZERO:
- return BRW_BLENDFACTOR_ZERO;
- case GL_SRC_ALPHA:
- return BRW_BLENDFACTOR_SRC_ALPHA;
- case GL_ONE:
- return BRW_BLENDFACTOR_ONE;
- case GL_SRC_COLOR:
- return BRW_BLENDFACTOR_SRC_COLOR;
- case GL_ONE_MINUS_SRC_COLOR:
- return BRW_BLENDFACTOR_INV_SRC_COLOR;
- case GL_DST_COLOR:
- return BRW_BLENDFACTOR_DST_COLOR;
- case GL_ONE_MINUS_DST_COLOR:
- return BRW_BLENDFACTOR_INV_DST_COLOR;
- case GL_ONE_MINUS_SRC_ALPHA:
- return BRW_BLENDFACTOR_INV_SRC_ALPHA;
- case GL_DST_ALPHA:
- return BRW_BLENDFACTOR_DST_ALPHA;
- case GL_ONE_MINUS_DST_ALPHA:
- return BRW_BLENDFACTOR_INV_DST_ALPHA;
- case GL_SRC_ALPHA_SATURATE:
- return BRW_BLENDFACTOR_SRC_ALPHA_SATURATE;
- case GL_CONSTANT_COLOR:
- return BRW_BLENDFACTOR_CONST_COLOR;
- case GL_ONE_MINUS_CONSTANT_COLOR:
- return BRW_BLENDFACTOR_INV_CONST_COLOR;
- case GL_CONSTANT_ALPHA:
- return BRW_BLENDFACTOR_CONST_ALPHA;
- case GL_ONE_MINUS_CONSTANT_ALPHA:
- return BRW_BLENDFACTOR_INV_CONST_ALPHA;
-
- case GL_SRC1_COLOR:
- return BRW_BLENDFACTOR_SRC1_COLOR;
- case GL_SRC1_ALPHA:
- return BRW_BLENDFACTOR_SRC1_ALPHA;
- case GL_ONE_MINUS_SRC1_COLOR:
- return BRW_BLENDFACTOR_INV_SRC1_COLOR;
- case GL_ONE_MINUS_SRC1_ALPHA:
- return BRW_BLENDFACTOR_INV_SRC1_ALPHA;
-
- default:
- unreachable("not reached");
- }
-}
-
-static const GLuint prim_to_hw_prim[GL_TRIANGLE_STRIP_ADJACENCY+1] = {
- [GL_POINTS] =_3DPRIM_POINTLIST,
- [GL_LINES] = _3DPRIM_LINELIST,
- [GL_LINE_LOOP] = _3DPRIM_LINELOOP,
- [GL_LINE_STRIP] = _3DPRIM_LINESTRIP,
- [GL_TRIANGLES] = _3DPRIM_TRILIST,
- [GL_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP,
- [GL_TRIANGLE_FAN] = _3DPRIM_TRIFAN,
- [GL_QUADS] = _3DPRIM_QUADLIST,
- [GL_QUAD_STRIP] = _3DPRIM_QUADSTRIP,
- [GL_POLYGON] = _3DPRIM_POLYGON,
- [GL_LINES_ADJACENCY] = _3DPRIM_LINELIST_ADJ,
- [GL_LINE_STRIP_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ,
- [GL_TRIANGLES_ADJACENCY] = _3DPRIM_TRILIST_ADJ,
- [GL_TRIANGLE_STRIP_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
-};
-
-uint32_t
-get_hw_prim_for_gl_prim(int mode)
-{
- assert(mode < ARRAY_SIZE(prim_to_hw_prim));
- return prim_to_hw_prim[mode];
-}
+++ /dev/null
-/*
- Copyright (C) Intel Corp. 2006. All Rights Reserved.
- Intel funded Tungsten Graphics to
- develop this 3D driver.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
- **********************************************************************/
- /*
- * Authors:
- * Keith Whitwell <keithw@vmware.com>
- */
-
-
-#ifndef BRW_UTIL_H
-#define BRW_UTIL_H
-
-#include "brw_context.h"
-#include "main/framebuffer.h"
-
-extern GLuint brw_translate_blend_factor( GLenum factor );
-extern GLuint brw_translate_blend_equation( GLenum mode );
-
-static inline float
-brw_get_line_width(struct brw_context *brw)
-{
- /* From the OpenGL 4.4 spec:
- *
- * "The actual width of non-antialiased lines is determined by rounding
- * the supplied width to the nearest integer, then clamping it to the
- * implementation-dependent maximum non-antialiased line width."
- */
- float line_width =
- CLAMP(!_mesa_is_multisample_enabled(&brw->ctx) && !brw->ctx.Line.SmoothFlag
- ? roundf(brw->ctx.Line.Width) : brw->ctx.Line.Width,
- 0.125f, brw->ctx.Const.MaxLineWidth);
-
- if (!_mesa_is_multisample_enabled(&brw->ctx) && brw->ctx.Line.SmoothFlag && line_width < 1.5f) {
- /* For 1 pixel line thickness or less, the general
- * anti-aliasing algorithm gives up, and a garbage line is
- * generated. Setting a Line Width of 0.0 specifies the
- * rasterization of the "thinnest" (one-pixel-wide),
- * non-antialiased lines.
- *
- * Lines rendered with zero Line Width are rasterized using
- * Grid Intersection Quantization rules as specified by
- * bspec section 6.3.12.1 Zero-Width (Cosmetic) Line
- * Rasterization.
- */
- line_width = 0.0f;
- }
-
- return line_width;
-}
-
-#endif
+++ /dev/null
-/*
- Copyright (C) Intel Corp. 2006. All Rights Reserved.
- Intel funded Tungsten Graphics to
- develop this 3D driver.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
- **********************************************************************/
- /*
- * Authors:
- * Keith Whitwell <keithw@vmware.com>
- */
-
-
-#include "util/compiler.h"
-#include "main/context.h"
-#include "brw_context.h"
-#include "brw_vs.h"
-#include "brw_util.h"
-#include "brw_state.h"
-#include "program/prog_print.h"
-#include "program/prog_parameter.h"
-#include "compiler/brw_nir.h"
-#include "brw_program.h"
-
-#include "util/ralloc.h"
-
-/**
- * Decide which set of clip planes should be used when clipping via
- * gl_Position or gl_ClipVertex.
- */
-gl_clip_plane *
-brw_select_clip_planes(struct gl_context *ctx)
-{
- if (ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX]) {
- /* There is currently a GLSL vertex shader, so clip according to GLSL
- * rules, which means compare gl_ClipVertex (or gl_Position, if
- * gl_ClipVertex wasn't assigned) against the eye-coordinate clip planes
- * that were stored in EyeUserPlane at the time the clip planes were
- * specified.
- */
- return ctx->Transform.EyeUserPlane;
- } else {
- /* Either we are using fixed function or an ARB vertex program. In
- * either case the clip planes are going to be compared against
- * gl_Position (which is in clip coordinates) so we have to clip using
- * _ClipUserPlane, which was transformed into clip coordinates by Mesa
- * core.
- */
- return ctx->Transform._ClipUserPlane;
- }
-}
-
-static GLbitfield64
-brw_vs_outputs_written(struct brw_context *brw, struct brw_vs_prog_key *key,
- GLbitfield64 user_varyings)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- GLbitfield64 outputs_written = user_varyings;
-
- if (devinfo->ver < 6) {
- /* Put dummy slots into the VUE for the SF to put the replaced
- * point sprite coords in. We shouldn't need these dummy slots,
- * which take up precious URB space, but it would mean that the SF
- * doesn't get nice aligned pairs of input coords into output
- * coords, which would be a pain to handle.
- */
- for (unsigned i = 0; i < 8; i++) {
- if (key->point_coord_replace & (1 << i))
- outputs_written |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + i);
- }
-
- /* if back colors are written, allocate slots for front colors too */
- if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC0))
- outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL0);
- if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC1))
- outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL1);
- }
-
- /* In order for legacy clipping to work, we need to populate the clip
- * distance varying slots whenever clipping is enabled, even if the vertex
- * shader doesn't write to gl_ClipDistance.
- */
- if (key->nr_userclip_plane_consts > 0) {
- outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0);
- outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1);
- }
-
- return outputs_written;
-}
-
-static bool
-brw_codegen_vs_prog(struct brw_context *brw,
- struct brw_program *vp,
- struct brw_vs_prog_key *key)
-{
- const struct brw_compiler *compiler = brw->screen->compiler;
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- const GLuint *program;
- struct brw_vs_prog_data prog_data;
- struct brw_stage_prog_data *stage_prog_data = &prog_data.base.base;
- void *mem_ctx;
- bool start_busy = false;
- double start_time = 0;
-
- memset(&prog_data, 0, sizeof(prog_data));
-
- /* Use ALT floating point mode for ARB programs so that 0^0 == 1. */
- if (vp->program.info.is_arb_asm)
- stage_prog_data->use_alt_mode = true;
-
- mem_ctx = ralloc_context(NULL);
-
- nir_shader *nir = nir_shader_clone(mem_ctx, vp->program.nir);
-
- brw_assign_common_binding_table_offsets(devinfo, &vp->program,
- &prog_data.base.base, 0);
-
- if (!vp->program.info.is_arb_asm) {
- brw_nir_setup_glsl_uniforms(mem_ctx, nir, &vp->program,
- &prog_data.base.base,
- compiler->scalar_stage[MESA_SHADER_VERTEX]);
- if (brw->can_push_ubos) {
- brw_nir_analyze_ubo_ranges(compiler, nir, key,
- prog_data.base.base.ubo_ranges);
- }
- } else {
- brw_nir_setup_arb_uniforms(mem_ctx, nir, &vp->program,
- &prog_data.base.base);
- }
-
- if (key->nr_userclip_plane_consts > 0) {
- brw_nir_lower_legacy_clipping(nir, key->nr_userclip_plane_consts,
- &prog_data.base.base);
- }
-
- if (key->copy_edgeflag)
- nir_lower_passthrough_edgeflags(nir);
-
- uint64_t outputs_written =
- brw_vs_outputs_written(brw, key, nir->info.outputs_written);
-
- brw_compute_vue_map(devinfo,
- &prog_data.base.vue_map, outputs_written,
- nir->info.separate_shader, 1);
-
- if (0) {
- _mesa_fprint_program_opt(stderr, &vp->program, PROG_PRINT_DEBUG, true);
- }
-
- if (unlikely(brw->perf_debug)) {
- start_busy = (brw->batch.last_bo &&
- brw_bo_busy(brw->batch.last_bo));
- start_time = get_time();
- }
-
- if (INTEL_DEBUG(DEBUG_VS)) {
- if (vp->program.info.is_arb_asm)
- brw_dump_arb_asm("vertex", &vp->program);
- }
-
-
- /* Emit GFX4 code.
- */
- struct brw_compile_vs_params params = {
- .nir = nir,
- .key = key,
- .prog_data = &prog_data,
- .log_data = brw,
- };
-
- if (INTEL_DEBUG(DEBUG_SHADER_TIME)) {
- params.shader_time = true;
- params.shader_time_index =
- brw_get_shader_time_index(brw, &vp->program, ST_VS,
- !vp->program.info.is_arb_asm);
- }
-
- program = brw_compile_vs(compiler, mem_ctx, ¶ms);
- if (program == NULL) {
- if (!vp->program.info.is_arb_asm) {
- vp->program.sh.data->LinkStatus = LINKING_FAILURE;
- ralloc_strcat(&vp->program.sh.data->InfoLog, params.error_str);
- }
-
- _mesa_problem(NULL, "Failed to compile vertex shader: %s\n", params.error_str);
-
- ralloc_free(mem_ctx);
- return false;
- }
-
- if (unlikely(brw->perf_debug)) {
- if (vp->compiled_once) {
- brw_debug_recompile(brw, MESA_SHADER_VERTEX, vp->program.Id,
- &key->base);
- }
- if (start_busy && !brw_bo_busy(brw->batch.last_bo)) {
- perf_debug("VS compile took %.03f ms and stalled the GPU\n",
- (get_time() - start_time) * 1000);
- }
- vp->compiled_once = true;
- }
-
- /* Scratch space is used for register spilling */
- brw_alloc_stage_scratch(brw, &brw->vs.base,
- prog_data.base.base.total_scratch);
-
- /* The param and pull_param arrays will be freed by the shader cache. */
- ralloc_steal(NULL, prog_data.base.base.param);
- ralloc_steal(NULL, prog_data.base.base.pull_param);
- brw_upload_cache(&brw->cache, BRW_CACHE_VS_PROG,
- key, sizeof(struct brw_vs_prog_key),
- program, prog_data.base.base.program_size,
- &prog_data, sizeof(prog_data),
- &brw->vs.base.prog_offset, &brw->vs.base.prog_data);
- ralloc_free(mem_ctx);
-
- return true;
-}
-
-static bool
-brw_vs_state_dirty(const struct brw_context *brw)
-{
- return brw_state_dirty(brw,
- _NEW_BUFFERS |
- _NEW_LIGHT |
- _NEW_POINT |
- _NEW_POLYGON |
- _NEW_TEXTURE |
- _NEW_TRANSFORM,
- BRW_NEW_VERTEX_PROGRAM |
- BRW_NEW_VS_ATTRIB_WORKAROUNDS);
-}
-
-void
-brw_vs_populate_key(struct brw_context *brw,
- struct brw_vs_prog_key *key)
-{
- struct gl_context *ctx = &brw->ctx;
- /* BRW_NEW_VERTEX_PROGRAM */
- struct gl_program *prog = brw->programs[MESA_SHADER_VERTEX];
- struct brw_program *vp = (struct brw_program *) prog;
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- memset(key, 0, sizeof(*key));
-
- /* Just upload the program verbatim for now. Always send it all
- * the inputs it asks for, whether they are varying or not.
- */
-
- /* _NEW_TEXTURE */
- brw_populate_base_prog_key(ctx, vp, &key->base);
-
- if (ctx->Transform.ClipPlanesEnabled != 0 &&
- (ctx->API == API_OPENGL_COMPAT || ctx->API == API_OPENGLES) &&
- vp->program.info.clip_distance_array_size == 0) {
- key->nr_userclip_plane_consts =
- util_logbase2(ctx->Transform.ClipPlanesEnabled) + 1;
- }
-
- if (devinfo->ver < 6) {
- /* _NEW_POLYGON */
- key->copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL ||
- ctx->Polygon.BackMode != GL_FILL);
-
- /* _NEW_POINT */
- if (ctx->Point.PointSprite) {
- key->point_coord_replace = ctx->Point.CoordReplace & 0xff;
- }
- }
-
- if (prog->info.outputs_written &
- (VARYING_BIT_COL0 | VARYING_BIT_COL1 | VARYING_BIT_BFC0 |
- VARYING_BIT_BFC1)) {
- /* _NEW_LIGHT | _NEW_BUFFERS */
- key->clamp_vertex_color = ctx->Light._ClampVertexColor;
- }
-
- /* BRW_NEW_VS_ATTRIB_WORKAROUNDS */
- if (devinfo->verx10 <= 70) {
- memcpy(key->gl_attrib_wa_flags, brw->vb.attrib_wa_flags,
- sizeof(brw->vb.attrib_wa_flags));
- }
-}
-
-void
-brw_upload_vs_prog(struct brw_context *brw)
-{
- struct brw_vs_prog_key key;
- /* BRW_NEW_VERTEX_PROGRAM */
- struct brw_program *vp =
- (struct brw_program *) brw->programs[MESA_SHADER_VERTEX];
-
- if (!brw_vs_state_dirty(brw))
- return;
-
- brw_vs_populate_key(brw, &key);
-
- if (brw_search_cache(&brw->cache, BRW_CACHE_VS_PROG, &key, sizeof(key),
- &brw->vs.base.prog_offset, &brw->vs.base.prog_data,
- true))
- return;
-
- if (brw_disk_cache_upload_program(brw, MESA_SHADER_VERTEX))
- return;
-
- vp = (struct brw_program *) brw->programs[MESA_SHADER_VERTEX];
- vp->id = key.base.program_string_id;
-
- ASSERTED bool success = brw_codegen_vs_prog(brw, vp, &key);
- assert(success);
-}
-
-void
-brw_vs_populate_default_key(const struct brw_compiler *compiler,
- struct brw_vs_prog_key *key,
- struct gl_program *prog)
-{
- const struct intel_device_info *devinfo = compiler->devinfo;
- struct brw_program *bvp = brw_program(prog);
-
- memset(key, 0, sizeof(*key));
-
- brw_populate_default_base_prog_key(devinfo, bvp, &key->base);
-
- key->clamp_vertex_color =
- (prog->info.outputs_written &
- (VARYING_BIT_COL0 | VARYING_BIT_COL1 | VARYING_BIT_BFC0 |
- VARYING_BIT_BFC1));
-}
-
-bool
-brw_vs_precompile(struct gl_context *ctx, struct gl_program *prog)
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_vs_prog_key key;
- uint32_t old_prog_offset = brw->vs.base.prog_offset;
- struct brw_stage_prog_data *old_prog_data = brw->vs.base.prog_data;
- bool success;
-
- struct brw_program *bvp = brw_program(prog);
-
- brw_vs_populate_default_key(brw->screen->compiler, &key, prog);
-
- success = brw_codegen_vs_prog(brw, bvp, &key);
-
- brw->vs.base.prog_offset = old_prog_offset;
- brw->vs.base.prog_data = old_prog_data;
-
- return success;
-}
+++ /dev/null
-/*
- Copyright (C) Intel Corp. 2006. All Rights Reserved.
- Intel funded Tungsten Graphics to
- develop this 3D driver.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
- **********************************************************************/
- /*
- * Authors:
- * Keith Whitwell <keithw@vmware.com>
- */
-
-
-#ifndef BRW_VS_H
-#define BRW_VS_H
-
-#include "brw_context.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-void
-brw_upload_vs_prog(struct brw_context *brw);
-
-void
-brw_vs_populate_key(struct brw_context *brw,
- struct brw_vs_prog_key *key);
-void
-brw_vs_populate_default_key(const struct brw_compiler *compiler,
- struct brw_vs_prog_key *key,
- struct gl_program *prog);
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
-#endif
+++ /dev/null
-/*
- Copyright (C) Intel Corp. 2006. All Rights Reserved.
- Intel funded Tungsten Graphics to
- develop this 3D driver.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
- **********************************************************************/
- /*
- * Authors:
- * Keith Whitwell <keithw@vmware.com>
- */
-
-#include "main/mtypes.h"
-#include "program/prog_parameter.h"
-#include "main/shaderapi.h"
-
-#include "brw_context.h"
-#include "brw_state.h"
-#include "brw_buffer_objects.h"
-
-
-/* Creates a new VS constant buffer reflecting the current VS program's
- * constants, if needed by the VS program.
- *
- * Otherwise, constants go through the CURBEs using the brw_constant_buffer
- * state atom.
- */
-static void
-brw_upload_vs_pull_constants(struct brw_context *brw)
-{
- struct brw_stage_state *stage_state = &brw->vs.base;
-
- /* BRW_NEW_VERTEX_PROGRAM */
- struct brw_program *vp =
- (struct brw_program *) brw->programs[MESA_SHADER_VERTEX];
-
- /* BRW_NEW_VS_PROG_DATA */
- const struct brw_stage_prog_data *prog_data = brw->vs.base.prog_data;
-
- _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_VERTEX);
- /* _NEW_PROGRAM_CONSTANTS */
- brw_upload_pull_constants(brw, BRW_NEW_VS_CONSTBUF, &vp->program,
- stage_state, prog_data);
-}
-
-const struct brw_tracked_state brw_vs_pull_constants = {
- .dirty = {
- .mesa = _NEW_PROGRAM_CONSTANTS,
- .brw = BRW_NEW_BATCH |
- BRW_NEW_VERTEX_PROGRAM |
- BRW_NEW_VS_PROG_DATA,
- },
- .emit = brw_upload_vs_pull_constants,
-};
-
-static void
-brw_upload_vs_ubo_surfaces(struct brw_context *brw)
-{
- struct gl_context *ctx = &brw->ctx;
- /* _NEW_PROGRAM */
- struct gl_program *prog = ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX];
-
- /* BRW_NEW_VS_PROG_DATA */
- brw_upload_ubo_surfaces(brw, prog, &brw->vs.base, brw->vs.base.prog_data);
-}
-
-const struct brw_tracked_state brw_vs_ubo_surfaces = {
- .dirty = {
- .mesa = _NEW_PROGRAM,
- .brw = BRW_NEW_BATCH |
- BRW_NEW_UNIFORM_BUFFER |
- BRW_NEW_VS_PROG_DATA,
- },
- .emit = brw_upload_vs_ubo_surfaces,
-};
-
-static void
-brw_upload_vs_image_surfaces(struct brw_context *brw)
-{
- /* BRW_NEW_VERTEX_PROGRAM */
- const struct gl_program *vp = brw->programs[MESA_SHADER_VERTEX];
-
- if (vp) {
- /* BRW_NEW_VS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
- brw_upload_image_surfaces(brw, vp, &brw->vs.base,
- brw->vs.base.prog_data);
- }
-}
-
-const struct brw_tracked_state brw_vs_image_surfaces = {
- .dirty = {
- .mesa = _NEW_TEXTURE,
- .brw = BRW_NEW_BATCH |
- BRW_NEW_AUX_STATE |
- BRW_NEW_IMAGE_UNITS |
- BRW_NEW_VERTEX_PROGRAM |
- BRW_NEW_VS_PROG_DATA,
- },
- .emit = brw_upload_vs_image_surfaces,
-};
+++ /dev/null
-/*
- * Copyright (C) Intel Corp. 2006. All Rights Reserved.
- * Intel funded Tungsten Graphics to
- * develop this 3D driver.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial
- * portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-#include "brw_context.h"
-#include "brw_wm.h"
-#include "brw_state.h"
-#include "main/enums.h"
-#include "main/formats.h"
-#include "main/fbobject.h"
-#include "main/samplerobj.h"
-#include "main/framebuffer.h"
-#include "program/prog_parameter.h"
-#include "program/program.h"
-#include "brw_mipmap_tree.h"
-#include "brw_image.h"
-#include "brw_fbo.h"
-#include "compiler/brw_nir.h"
-#include "brw_program.h"
-
-#include "util/ralloc.h"
-#include "util/u_math.h"
-
-static void
-assign_fs_binding_table_offsets(const struct intel_device_info *devinfo,
- const struct gl_program *prog,
- const struct brw_wm_prog_key *key,
- struct brw_wm_prog_data *prog_data)
-{
- /* Render targets implicitly start at surface index 0. Even if there are
- * no color regions, we still perform an FB write to a null render target,
- * which will be surface 0.
- */
- uint32_t next_binding_table_offset = MAX2(key->nr_color_regions, 1);
-
- next_binding_table_offset =
- brw_assign_common_binding_table_offsets(devinfo, prog, &prog_data->base,
- next_binding_table_offset);
-
- if (prog->nir->info.outputs_read && !key->coherent_fb_fetch) {
- prog_data->binding_table.render_target_read_start =
- next_binding_table_offset;
- next_binding_table_offset += key->nr_color_regions;
- }
-
- /* Update the binding table size */
- prog_data->base.binding_table.size_bytes = next_binding_table_offset * 4;
-}
-
-static bool
-brw_codegen_wm_prog(struct brw_context *brw,
- struct brw_program *fp,
- struct brw_wm_prog_key *key,
- struct brw_vue_map *vue_map)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- void *mem_ctx = ralloc_context(NULL);
- struct brw_wm_prog_data prog_data;
- const GLuint *program;
- bool start_busy = false;
- double start_time = 0;
-
- nir_shader *nir = nir_shader_clone(mem_ctx, fp->program.nir);
-
- memset(&prog_data, 0, sizeof(prog_data));
-
- /* Use ALT floating point mode for ARB programs so that 0^0 == 1. */
- if (fp->program.info.is_arb_asm)
- prog_data.base.use_alt_mode = true;
-
- assign_fs_binding_table_offsets(devinfo, &fp->program, key, &prog_data);
-
- if (!fp->program.info.is_arb_asm) {
- brw_nir_setup_glsl_uniforms(mem_ctx, nir, &fp->program,
- &prog_data.base, true);
- if (brw->can_push_ubos) {
- brw_nir_analyze_ubo_ranges(brw->screen->compiler, nir,
- NULL, prog_data.base.ubo_ranges);
- }
- } else {
- brw_nir_setup_arb_uniforms(mem_ctx, nir, &fp->program, &prog_data.base);
-
- if (INTEL_DEBUG(DEBUG_WM))
- brw_dump_arb_asm("fragment", &fp->program);
- }
-
- if (unlikely(brw->perf_debug)) {
- start_busy = (brw->batch.last_bo &&
- brw_bo_busy(brw->batch.last_bo));
- start_time = get_time();
- }
-
- struct brw_compile_fs_params params = {
- .nir = nir,
- .key = key,
- .prog_data = &prog_data,
-
- .allow_spilling = true,
- .vue_map = vue_map,
-
- .log_data = brw,
- };
-
- if (INTEL_DEBUG(DEBUG_SHADER_TIME)) {
- params.shader_time = true;
- params.shader_time_index8 =
- brw_get_shader_time_index(brw, &fp->program, ST_FS8,
- !fp->program.info.is_arb_asm);
- params.shader_time_index16 =
- brw_get_shader_time_index(brw, &fp->program, ST_FS16,
- !fp->program.info.is_arb_asm);
- params.shader_time_index32 =
- brw_get_shader_time_index(brw, &fp->program, ST_FS32,
- !fp->program.info.is_arb_asm);
- }
-
- program = brw_compile_fs(brw->screen->compiler, mem_ctx, ¶ms);
-
- if (program == NULL) {
- if (!fp->program.info.is_arb_asm) {
- fp->program.sh.data->LinkStatus = LINKING_FAILURE;
- ralloc_strcat(&fp->program.sh.data->InfoLog, params.error_str);
- }
-
- _mesa_problem(NULL, "Failed to compile fragment shader: %s\n", params.error_str);
-
- ralloc_free(mem_ctx);
- return false;
- }
-
- if (unlikely(brw->perf_debug)) {
- if (fp->compiled_once) {
- brw_debug_recompile(brw, MESA_SHADER_FRAGMENT, fp->program.Id,
- &key->base);
- }
- fp->compiled_once = true;
-
- if (start_busy && !brw_bo_busy(brw->batch.last_bo)) {
- perf_debug("FS compile took %.03f ms and stalled the GPU\n",
- (get_time() - start_time) * 1000);
- }
- }
-
- brw_alloc_stage_scratch(brw, &brw->wm.base, prog_data.base.total_scratch);
-
- if (INTEL_DEBUG(DEBUG_WM) && fp->program.info.is_arb_asm)
- fprintf(stderr, "\n");
-
- /* The param and pull_param arrays will be freed by the shader cache. */
- ralloc_steal(NULL, prog_data.base.param);
- ralloc_steal(NULL, prog_data.base.pull_param);
- brw_upload_cache(&brw->cache, BRW_CACHE_FS_PROG,
- key, sizeof(struct brw_wm_prog_key),
- program, prog_data.base.program_size,
- &prog_data, sizeof(prog_data),
- &brw->wm.base.prog_offset, &brw->wm.base.prog_data);
-
- ralloc_free(mem_ctx);
-
- return true;
-}
-
-static uint8_t
-gfx6_gather_workaround(GLenum internalformat)
-{
- switch (internalformat) {
- case GL_R8I: return WA_SIGN | WA_8BIT;
- case GL_R8UI: return WA_8BIT;
- case GL_R16I: return WA_SIGN | WA_16BIT;
- case GL_R16UI: return WA_16BIT;
- default:
- /* Note that even though GL_R32I and GL_R32UI have format overrides in
- * the surface state, there is no shader w/a required.
- */
- return 0;
- }
-}
-
-static void
-brw_populate_sampler_prog_key_data(struct gl_context *ctx,
- const struct gl_program *prog,
- struct brw_sampler_prog_key_data *key)
-{
- struct brw_context *brw = brw_context(ctx);
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- GLbitfield mask = prog->SamplersUsed;
-
- while (mask) {
- const int s = u_bit_scan(&mask);
-
- key->swizzles[s] = SWIZZLE_NOOP;
- key->scale_factors[s] = 0.0f;
-
- int unit_id = prog->SamplerUnits[s];
- const struct gl_texture_unit *unit = &ctx->Texture.Unit[unit_id];
-
- if (unit->_Current && unit->_Current->Target != GL_TEXTURE_BUFFER) {
- const struct gl_texture_object *t = unit->_Current;
- const struct gl_texture_image *img = t->Image[0][t->Attrib.BaseLevel];
- struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit_id);
-
- const bool alpha_depth = t->Attrib.DepthMode == GL_ALPHA &&
- (img->_BaseFormat == GL_DEPTH_COMPONENT ||
- img->_BaseFormat == GL_DEPTH_STENCIL);
-
- /* Haswell handles texture swizzling as surface format overrides
- * (except for GL_ALPHA); all other platforms need MOVs in the shader.
- */
- if (alpha_depth || (devinfo->verx10 <= 70))
- key->swizzles[s] = brw_get_texture_swizzle(ctx, t);
-
- if (devinfo->ver < 8 &&
- sampler->Attrib.MinFilter != GL_NEAREST &&
- sampler->Attrib.MagFilter != GL_NEAREST) {
- if (sampler->Attrib.WrapS == GL_CLAMP)
- key->gl_clamp_mask[0] |= 1 << s;
- if (sampler->Attrib.WrapT == GL_CLAMP)
- key->gl_clamp_mask[1] |= 1 << s;
- if (sampler->Attrib.WrapR == GL_CLAMP)
- key->gl_clamp_mask[2] |= 1 << s;
- }
-
- /* gather4 for RG32* is broken in multiple ways on Gfx7. */
- if (devinfo->ver == 7 && prog->info.uses_texture_gather) {
- switch (img->InternalFormat) {
- case GL_RG32I:
- case GL_RG32UI: {
- /* We have to override the format to R32G32_FLOAT_LD.
- * This means that SCS_ALPHA and SCS_ONE will return 0x3f8
- * (1.0) rather than integer 1. This needs shader hacks.
- *
- * On Ivybridge, we whack W (alpha) to ONE in our key's
- * swizzle. On Haswell, we look at the original texture
- * swizzle, and use XYZW with channels overridden to ONE,
- * leaving normal texture swizzling to SCS.
- */
- unsigned src_swizzle =
- devinfo->platform == INTEL_PLATFORM_HSW ?
- t->Attrib._Swizzle : key->swizzles[s];
- for (int i = 0; i < 4; i++) {
- unsigned src_comp = GET_SWZ(src_swizzle, i);
- if (src_comp == SWIZZLE_ONE || src_comp == SWIZZLE_W) {
- key->swizzles[i] &= ~(0x7 << (3 * i));
- key->swizzles[i] |= SWIZZLE_ONE << (3 * i);
- }
- }
- }
- FALLTHROUGH;
- case GL_RG32F:
- /* The channel select for green doesn't work - we have to
- * request blue. Haswell can use SCS for this, but Ivybridge
- * needs a shader workaround.
- */
- if (devinfo->platform != INTEL_PLATFORM_HSW)
- key->gather_channel_quirk_mask |= 1 << s;
- break;
- }
- }
-
- /* Gfx6's gather4 is broken for UINT/SINT; we treat them as
- * UNORM/FLOAT instead and fix it in the shader.
- */
- if (devinfo->ver == 6 && prog->info.uses_texture_gather) {
- key->gfx6_gather_wa[s] = gfx6_gather_workaround(img->InternalFormat);
- }
-
- /* If this is a multisample sampler, and uses the CMS MSAA layout,
- * then we need to emit slightly different code to first sample the
- * MCS surface.
- */
- struct brw_texture_object *intel_tex =
- brw_texture_object((struct gl_texture_object *)t);
-
- /* From gfx9 onwards some single sampled buffers can also be
- * compressed. These don't need ld2dms sampling along with mcs fetch.
- */
- if (intel_tex->mt->aux_usage == ISL_AUX_USAGE_MCS) {
- assert(devinfo->ver >= 7);
- assert(intel_tex->mt->surf.samples > 1);
- assert(intel_tex->mt->aux_buf);
- assert(intel_tex->mt->surf.msaa_layout == ISL_MSAA_LAYOUT_ARRAY);
- key->compressed_multisample_layout_mask |= 1 << s;
-
- if (intel_tex->mt->surf.samples >= 16) {
- assert(devinfo->ver >= 9);
- key->msaa_16 |= 1 << s;
- }
- }
-
- if (t->Target == GL_TEXTURE_EXTERNAL_OES && intel_tex->planar_format) {
-
- /* Setup possible scaling factor. */
- key->scale_factors[s] = intel_tex->planar_format->scaling_factor;
-
- switch (intel_tex->planar_format->components) {
- case __DRI_IMAGE_COMPONENTS_Y_UV:
- key->y_uv_image_mask |= 1 << s;
- break;
- case __DRI_IMAGE_COMPONENTS_Y_U_V:
- key->y_u_v_image_mask |= 1 << s;
- break;
- case __DRI_IMAGE_COMPONENTS_Y_XUXV:
- key->yx_xuxv_image_mask |= 1 << s;
- break;
- case __DRI_IMAGE_COMPONENTS_Y_UXVX:
- key->xy_uxvx_image_mask |= 1 << s;
- break;
- case __DRI_IMAGE_COMPONENTS_AYUV:
- key->ayuv_image_mask |= 1 << s;
- break;
- case __DRI_IMAGE_COMPONENTS_XYUV:
- key->xyuv_image_mask |= 1 << s;
- break;
- default:
- break;
- }
-
- switch (intel_tex->yuv_color_space) {
- case __DRI_YUV_COLOR_SPACE_ITU_REC709:
- key->bt709_mask |= 1 << s;
- break;
- case __DRI_YUV_COLOR_SPACE_ITU_REC2020:
- key->bt2020_mask |= 1 << s;
- break;
- default:
- break;
- }
- }
-
- }
- }
-}
-
-void
-brw_populate_base_prog_key(struct gl_context *ctx,
- const struct brw_program *prog,
- struct brw_base_prog_key *key)
-{
- key->program_string_id = prog->id;
- key->subgroup_size_type = BRW_SUBGROUP_SIZE_UNIFORM;
- brw_populate_sampler_prog_key_data(ctx, &prog->program, &key->tex);
-}
-
-void
-brw_populate_default_base_prog_key(const struct intel_device_info *devinfo,
- const struct brw_program *prog,
- struct brw_base_prog_key *key)
-{
- key->program_string_id = prog->id;
- key->subgroup_size_type = BRW_SUBGROUP_SIZE_UNIFORM;
- brw_setup_tex_for_precompile(devinfo, &key->tex, &prog->program);
-}
-
-static bool
-brw_wm_state_dirty(const struct brw_context *brw)
-{
- return brw_state_dirty(brw,
- _NEW_BUFFERS |
- _NEW_COLOR |
- _NEW_DEPTH |
- _NEW_FRAG_CLAMP |
- _NEW_HINT |
- _NEW_LIGHT |
- _NEW_LINE |
- _NEW_MULTISAMPLE |
- _NEW_POLYGON |
- _NEW_STENCIL |
- _NEW_TEXTURE,
- BRW_NEW_FRAGMENT_PROGRAM |
- BRW_NEW_REDUCED_PRIMITIVE |
- BRW_NEW_STATS_WM |
- BRW_NEW_VUE_MAP_GEOM_OUT);
-}
-
-void
-brw_wm_populate_key(struct brw_context *brw, struct brw_wm_prog_key *key)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- struct gl_context *ctx = &brw->ctx;
- /* BRW_NEW_FRAGMENT_PROGRAM */
- const struct gl_program *prog = brw->programs[MESA_SHADER_FRAGMENT];
- const struct brw_program *fp = brw_program_const(prog);
- GLuint lookup = 0;
- GLuint line_aa;
-
- memset(key, 0, sizeof(*key));
-
- /* Build the index for table lookup
- */
- if (devinfo->ver < 6) {
- struct brw_renderbuffer *depth_irb =
- brw_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH);
-
- /* _NEW_COLOR */
- if (prog->info.fs.uses_discard || ctx->Color.AlphaEnabled) {
- lookup |= BRW_WM_IZ_PS_KILL_ALPHATEST_BIT;
- }
-
- if (prog->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
- lookup |= BRW_WM_IZ_PS_COMPUTES_DEPTH_BIT;
- }
-
- /* _NEW_DEPTH */
- if (depth_irb && ctx->Depth.Test) {
- lookup |= BRW_WM_IZ_DEPTH_TEST_ENABLE_BIT;
-
- if (brw_depth_writes_enabled(brw))
- lookup |= BRW_WM_IZ_DEPTH_WRITE_ENABLE_BIT;
- }
-
- /* _NEW_STENCIL | _NEW_BUFFERS */
- if (brw->stencil_enabled) {
- lookup |= BRW_WM_IZ_STENCIL_TEST_ENABLE_BIT;
-
- if (ctx->Stencil.WriteMask[0] ||
- ctx->Stencil.WriteMask[ctx->Stencil._BackFace])
- lookup |= BRW_WM_IZ_STENCIL_WRITE_ENABLE_BIT;
- }
- key->iz_lookup = lookup;
- }
-
- line_aa = BRW_WM_AA_NEVER;
-
- /* _NEW_LINE, _NEW_POLYGON, BRW_NEW_REDUCED_PRIMITIVE */
- if (ctx->Line.SmoothFlag) {
- if (brw->reduced_primitive == GL_LINES) {
- line_aa = BRW_WM_AA_ALWAYS;
- }
- else if (brw->reduced_primitive == GL_TRIANGLES) {
- if (ctx->Polygon.FrontMode == GL_LINE) {
- line_aa = BRW_WM_AA_SOMETIMES;
-
- if (ctx->Polygon.BackMode == GL_LINE ||
- (ctx->Polygon.CullFlag &&
- ctx->Polygon.CullFaceMode == GL_BACK))
- line_aa = BRW_WM_AA_ALWAYS;
- }
- else if (ctx->Polygon.BackMode == GL_LINE) {
- line_aa = BRW_WM_AA_SOMETIMES;
-
- if ((ctx->Polygon.CullFlag &&
- ctx->Polygon.CullFaceMode == GL_FRONT))
- line_aa = BRW_WM_AA_ALWAYS;
- }
- }
- }
-
- key->line_aa = line_aa;
-
- /* _NEW_HINT */
- key->high_quality_derivatives =
- prog->info.uses_fddx_fddy &&
- ctx->Hint.FragmentShaderDerivative == GL_NICEST;
-
- if (devinfo->ver < 6)
- key->stats_wm = brw->stats_wm;
-
- /* _NEW_LIGHT */
- key->flat_shade =
- (prog->info.inputs_read & (VARYING_BIT_COL0 | VARYING_BIT_COL1)) &&
- (ctx->Light.ShadeModel == GL_FLAT);
-
- /* _NEW_FRAG_CLAMP | _NEW_BUFFERS */
- key->clamp_fragment_color = ctx->Color._ClampFragmentColor;
-
- /* _NEW_TEXTURE */
- brw_populate_base_prog_key(ctx, fp, &key->base);
-
- /* _NEW_BUFFERS */
- key->nr_color_regions = ctx->DrawBuffer->_NumColorDrawBuffers;
-
- /* _NEW_COLOR */
- key->force_dual_color_blend = brw->dual_color_blend_by_location &&
- (ctx->Color.BlendEnabled & 1) && ctx->Color._BlendUsesDualSrc & 0x1;
-
- /* _NEW_MULTISAMPLE, _NEW_BUFFERS */
- key->alpha_to_coverage = _mesa_is_alpha_to_coverage_enabled(ctx);
-
- /* _NEW_COLOR, _NEW_BUFFERS */
- key->alpha_test_replicate_alpha =
- ctx->DrawBuffer->_NumColorDrawBuffers > 1 &&
- _mesa_is_alpha_test_enabled(ctx);
-
- /* _NEW_BUFFERS _NEW_MULTISAMPLE */
- /* Ignore sample qualifier while computing this flag. */
- if (ctx->Multisample.Enabled) {
- key->persample_interp =
- ctx->Multisample.SampleShading &&
- (ctx->Multisample.MinSampleShadingValue *
- _mesa_geometric_samples(ctx->DrawBuffer) > 1);
-
- key->multisample_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1;
- }
-
- key->ignore_sample_mask_out = !key->multisample_fbo;
-
- /* BRW_NEW_VUE_MAP_GEOM_OUT */
- if (devinfo->ver < 6 || util_bitcount64(prog->info.inputs_read &
- BRW_FS_VARYING_INPUT_MASK) > 16) {
- key->input_slots_valid = brw->vue_map_geom_out.slots_valid;
- }
-
- /* _NEW_COLOR | _NEW_BUFFERS */
- /* Pre-gfx6, the hardware alpha test always used each render
- * target's alpha to do alpha test, as opposed to render target 0's alpha
- * like GL requires. Fix that by building the alpha test into the
- * shader, and we'll skip enabling the fixed function alpha test.
- */
- if (devinfo->ver < 6 && ctx->DrawBuffer->_NumColorDrawBuffers > 1 &&
- ctx->Color.AlphaEnabled) {
- key->alpha_test_func = ctx->Color.AlphaFunc;
- key->alpha_test_ref = ctx->Color.AlphaRef;
- }
-
- /* Whether reads from the framebuffer should behave coherently. */
- key->coherent_fb_fetch = ctx->Extensions.EXT_shader_framebuffer_fetch;
-}
-
-void
-brw_upload_wm_prog(struct brw_context *brw)
-{
- struct brw_wm_prog_key key;
- struct brw_program *fp =
- (struct brw_program *) brw->programs[MESA_SHADER_FRAGMENT];
-
- if (!brw_wm_state_dirty(brw))
- return;
-
- brw_wm_populate_key(brw, &key);
-
- if (brw_search_cache(&brw->cache, BRW_CACHE_FS_PROG, &key, sizeof(key),
- &brw->wm.base.prog_offset, &brw->wm.base.prog_data,
- true))
- return;
-
- if (brw_disk_cache_upload_program(brw, MESA_SHADER_FRAGMENT))
- return;
-
- fp = (struct brw_program *) brw->programs[MESA_SHADER_FRAGMENT];
- fp->id = key.base.program_string_id;
-
- ASSERTED bool success = brw_codegen_wm_prog(brw, fp, &key,
- &brw->vue_map_geom_out);
- assert(success);
-}
-
-void
-brw_wm_populate_default_key(const struct brw_compiler *compiler,
- struct brw_wm_prog_key *key,
- struct gl_program *prog)
-{
- const struct intel_device_info *devinfo = compiler->devinfo;
-
- memset(key, 0, sizeof(*key));
-
- brw_populate_default_base_prog_key(devinfo, brw_program(prog),
- &key->base);
-
- uint64_t outputs_written = prog->info.outputs_written;
-
- if (devinfo->ver < 6) {
- if (prog->info.fs.uses_discard)
- key->iz_lookup |= BRW_WM_IZ_PS_KILL_ALPHATEST_BIT;
-
- if (outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
- key->iz_lookup |= BRW_WM_IZ_PS_COMPUTES_DEPTH_BIT;
-
- /* Just assume depth testing. */
- key->iz_lookup |= BRW_WM_IZ_DEPTH_TEST_ENABLE_BIT;
- key->iz_lookup |= BRW_WM_IZ_DEPTH_WRITE_ENABLE_BIT;
- }
-
- if (devinfo->ver < 6 || util_bitcount64(prog->info.inputs_read &
- BRW_FS_VARYING_INPUT_MASK) > 16) {
- key->input_slots_valid = prog->info.inputs_read | VARYING_BIT_POS;
- }
-
- key->nr_color_regions = util_bitcount64(outputs_written &
- ~(BITFIELD64_BIT(FRAG_RESULT_DEPTH) |
- BITFIELD64_BIT(FRAG_RESULT_STENCIL) |
- BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)));
-
- /* Whether reads from the framebuffer should behave coherently. */
- key->coherent_fb_fetch = devinfo->ver >= 9;
-}
-
-bool
-brw_fs_precompile(struct gl_context *ctx, struct gl_program *prog)
-{
- struct brw_context *brw = brw_context(ctx);
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- struct brw_wm_prog_key key;
-
- struct brw_program *bfp = brw_program(prog);
-
- brw_wm_populate_default_key(brw->screen->compiler, &key, prog);
-
- /* check brw_wm_populate_default_key coherent_fb_fetch setting */
- assert(key.coherent_fb_fetch ==
- ctx->Extensions.EXT_shader_framebuffer_fetch);
-
- uint32_t old_prog_offset = brw->wm.base.prog_offset;
- struct brw_stage_prog_data *old_prog_data = brw->wm.base.prog_data;
-
- struct brw_vue_map vue_map;
- if (devinfo->ver < 6) {
- brw_compute_vue_map(&brw->screen->devinfo, &vue_map,
- prog->info.inputs_read | VARYING_BIT_POS,
- false, 1);
- }
-
- bool success = brw_codegen_wm_prog(brw, bfp, &key, &vue_map);
-
- brw->wm.base.prog_offset = old_prog_offset;
- brw->wm.base.prog_data = old_prog_data;
-
- return success;
-}
+++ /dev/null
-/*
- Copyright (C) Intel Corp. 2006. All Rights Reserved.
- Intel funded Tungsten Graphics to
- develop this 3D driver.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
- **********************************************************************/
- /*
- * Authors:
- * Keith Whitwell <keithw@vmware.com>
- */
-
-
-#ifndef BRW_WM_H
-#define BRW_WM_H
-
-#include <stdbool.h>
-
-#include "brw_context.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-void
-brw_upload_wm_prog(struct brw_context *brw);
-
-void
-brw_wm_populate_key(struct brw_context *brw,
- struct brw_wm_prog_key *key);
-void
-brw_wm_populate_default_key(const struct brw_compiler *compiler,
- struct brw_wm_prog_key *key,
- struct gl_program *prog);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif
+++ /dev/null
-/*
- Copyright (C) Intel Corp. 2006. All Rights Reserved.
- Intel funded Tungsten Graphics to
- develop this 3D driver.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
- **********************************************************************/
- /*
- * Authors:
- * Keith Whitwell <keithw@vmware.com>
- */
-
-
-#include "compiler/nir/nir.h"
-#include "main/context.h"
-#include "main/blend.h"
-#include "main/mtypes.h"
-#include "main/samplerobj.h"
-#include "main/shaderimage.h"
-#include "main/teximage.h"
-#include "program/prog_parameter.h"
-#include "program/prog_instruction.h"
-#include "main/framebuffer.h"
-#include "main/shaderapi.h"
-
-#include "isl/isl.h"
-
-#include "brw_mipmap_tree.h"
-#include "brw_batch.h"
-#include "brw_tex.h"
-#include "brw_fbo.h"
-#include "brw_buffer_objects.h"
-
-#include "brw_context.h"
-#include "brw_state.h"
-#include "brw_defines.h"
-#include "brw_wm.h"
-
-static void
-get_isl_surf(struct brw_context *brw, struct brw_mipmap_tree *mt,
- GLenum target, struct isl_view *view,
- uint32_t *tile_x, uint32_t *tile_y,
- uint32_t *offset, struct isl_surf *surf)
-{
- *surf = mt->surf;
-
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- const enum isl_dim_layout dim_layout =
- get_isl_dim_layout(devinfo, mt->surf.tiling, target);
-
- surf->dim = get_isl_surf_dim(target);
-
- if (surf->dim_layout == dim_layout)
- return;
-
- /* The layout of the specified texture target is not compatible with the
- * actual layout of the miptree structure in memory -- You're entering
- * dangerous territory, this can only possibly work if you only intended
- * to access a single level and slice of the texture, and the hardware
- * supports the tile offset feature in order to allow non-tile-aligned
- * base offsets, since we'll have to point the hardware to the first
- * texel of the level instead of relying on the usual base level/layer
- * controls.
- */
- assert(devinfo->has_surface_tile_offset);
- assert(view->levels == 1 && view->array_len == 1);
- assert(*tile_x == 0 && *tile_y == 0);
-
- *offset += brw_miptree_get_tile_offsets(mt, view->base_level,
- view->base_array_layer,
- tile_x, tile_y);
-
- /* Minify the logical dimensions of the texture. */
- const unsigned l = view->base_level - mt->first_level;
- surf->logical_level0_px.width = minify(surf->logical_level0_px.width, l);
- surf->logical_level0_px.height = surf->dim <= ISL_SURF_DIM_1D ? 1 :
- minify(surf->logical_level0_px.height, l);
- surf->logical_level0_px.depth = surf->dim <= ISL_SURF_DIM_2D ? 1 :
- minify(surf->logical_level0_px.depth, l);
-
- /* Only the base level and layer can be addressed with the overridden
- * layout.
- */
- surf->logical_level0_px.array_len = 1;
- surf->levels = 1;
- surf->dim_layout = dim_layout;
-
- /* The requested slice of the texture is now at the base level and
- * layer.
- */
- view->base_level = 0;
- view->base_array_layer = 0;
-}
-
-static void
-brw_emit_surface_state(struct brw_context *brw,
- struct brw_mipmap_tree *mt,
- GLenum target, struct isl_view view,
- enum isl_aux_usage aux_usage,
- uint32_t *surf_offset, int surf_index,
- unsigned reloc_flags)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- uint32_t tile_x = mt->level[0].level_x;
- uint32_t tile_y = mt->level[0].level_y;
- uint32_t offset = mt->offset;
-
- struct isl_surf surf;
-
- get_isl_surf(brw, mt, target, &view, &tile_x, &tile_y, &offset, &surf);
-
- union isl_color_value clear_color = { .u32 = { 0, 0, 0, 0 } };
-
- struct brw_bo *aux_bo = NULL;
- struct isl_surf *aux_surf = NULL;
- uint64_t aux_offset = 0;
- struct brw_bo *clear_bo = NULL;
- uint64_t clear_offset = 0;
-
- if (aux_usage != ISL_AUX_USAGE_NONE) {
- aux_surf = &mt->aux_buf->surf;
- aux_bo = mt->aux_buf->bo;
- aux_offset = mt->aux_buf->offset;
-
- /* We only really need a clear color if we also have an auxiliary
- * surface. Without one, it does nothing.
- */
- clear_color = brw_miptree_get_clear_color(mt, &clear_bo, &clear_offset);
- }
-
- void *state = brw_state_batch(brw,
- brw->isl_dev.ss.size,
- brw->isl_dev.ss.align,
- surf_offset);
-
- isl_surf_fill_state(&brw->isl_dev, state, .surf = &surf, .view = &view,
- .address = brw_state_reloc(&brw->batch,
- *surf_offset + brw->isl_dev.ss.addr_offset,
- mt->bo, offset, reloc_flags),
- .aux_surf = aux_surf, .aux_usage = aux_usage,
- .aux_address = aux_offset,
- .mocs = brw_mocs(&brw->isl_dev, mt->bo),
- .clear_color = clear_color,
- .use_clear_address = clear_bo != NULL,
- .clear_address = clear_offset,
- .x_offset_sa = tile_x, .y_offset_sa = tile_y);
- if (aux_surf) {
- /* On gfx7 and prior, the upper 20 bits of surface state DWORD 6 are the
- * upper 20 bits of the GPU address of the MCS buffer; the lower 12 bits
- * contain other control information. Since buffer addresses are always
- * on 4k boundaries (and thus have their lower 12 bits zero), we can use
- * an ordinary reloc to do the necessary address translation.
- *
- * FIXME: move to the point of assignment.
- */
- assert((aux_offset & 0xfff) == 0);
-
- if (devinfo->ver >= 8) {
- uint64_t *aux_addr = state + brw->isl_dev.ss.aux_addr_offset;
- *aux_addr = brw_state_reloc(&brw->batch,
- *surf_offset +
- brw->isl_dev.ss.aux_addr_offset,
- aux_bo, *aux_addr,
- reloc_flags);
- } else {
- uint32_t *aux_addr = state + brw->isl_dev.ss.aux_addr_offset;
- *aux_addr = brw_state_reloc(&brw->batch,
- *surf_offset +
- brw->isl_dev.ss.aux_addr_offset,
- aux_bo, *aux_addr,
- reloc_flags);
-
- }
- }
-
- if (clear_bo != NULL) {
- /* Make sure the offset is aligned with a cacheline. */
- assert((clear_offset & 0x3f) == 0);
- uint64_t *clear_address =
- state + brw->isl_dev.ss.clear_color_state_offset;
- *clear_address = brw_state_reloc(&brw->batch,
- *surf_offset +
- brw->isl_dev.ss.clear_color_state_offset,
- clear_bo, *clear_address, reloc_flags);
- }
-}
-
-static uint32_t
-gfx6_update_renderbuffer_surface(struct brw_context *brw,
- struct gl_renderbuffer *rb,
- unsigned unit,
- uint32_t surf_index)
-{
- struct gl_context *ctx = &brw->ctx;
- struct brw_renderbuffer *irb = brw_renderbuffer(rb);
- struct brw_mipmap_tree *mt = irb->mt;
-
- assert(brw_render_target_supported(brw, rb));
-
- mesa_format rb_format = _mesa_get_render_format(ctx, brw_rb_format(irb));
- if (unlikely(!brw->mesa_format_supports_render[rb_format])) {
- _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
- __func__, _mesa_get_format_name(rb_format));
- }
- enum isl_format isl_format = brw->mesa_to_isl_render_format[rb_format];
-
- struct isl_view view = {
- .format = isl_format,
- .base_level = irb->mt_level - irb->mt->first_level,
- .levels = 1,
- .base_array_layer = irb->mt_layer,
- .array_len = MAX2(irb->layer_count, 1),
- .swizzle = ISL_SWIZZLE_IDENTITY,
- .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT,
- };
-
- uint32_t offset;
- brw_emit_surface_state(brw, mt, mt->target, view,
- brw->draw_aux_usage[unit],
- &offset, surf_index,
- RELOC_WRITE);
- return offset;
-}
-
-GLuint
-translate_tex_target(GLenum target)
-{
- switch (target) {
- case GL_TEXTURE_1D:
- case GL_TEXTURE_1D_ARRAY_EXT:
- return BRW_SURFACE_1D;
-
- case GL_TEXTURE_RECTANGLE_NV:
- return BRW_SURFACE_2D;
-
- case GL_TEXTURE_2D:
- case GL_TEXTURE_2D_ARRAY_EXT:
- case GL_TEXTURE_EXTERNAL_OES:
- case GL_TEXTURE_2D_MULTISAMPLE:
- case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
- return BRW_SURFACE_2D;
-
- case GL_TEXTURE_3D:
- return BRW_SURFACE_3D;
-
- case GL_TEXTURE_CUBE_MAP:
- case GL_TEXTURE_CUBE_MAP_ARRAY:
- return BRW_SURFACE_CUBE;
-
- default:
- unreachable("not reached");
- }
-}
-
-uint32_t
-brw_get_surface_tiling_bits(enum isl_tiling tiling)
-{
- switch (tiling) {
- case ISL_TILING_X:
- return BRW_SURFACE_TILED;
- case ISL_TILING_Y0:
- return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
- default:
- return 0;
- }
-}
-
-
-uint32_t
-brw_get_surface_num_multisamples(unsigned num_samples)
-{
- if (num_samples > 1)
- return BRW_SURFACE_MULTISAMPLECOUNT_4;
- else
- return BRW_SURFACE_MULTISAMPLECOUNT_1;
-}
-
-/**
- * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
- * swizzling.
- */
-int
-brw_get_texture_swizzle(const struct gl_context *ctx,
- const struct gl_texture_object *t)
-{
- const struct gl_texture_image *img = t->Image[0][t->Attrib.BaseLevel];
-
- int swizzles[SWIZZLE_NIL + 1] = {
- SWIZZLE_X,
- SWIZZLE_Y,
- SWIZZLE_Z,
- SWIZZLE_W,
- SWIZZLE_ZERO,
- SWIZZLE_ONE,
- SWIZZLE_NIL
- };
-
- if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
- img->_BaseFormat == GL_DEPTH_STENCIL) {
- GLenum depth_mode = t->Attrib.DepthMode;
-
- /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
- * with depth component data specified with a sized internal format.
- * Otherwise, it's left at the old default, GL_LUMINANCE.
- */
- if (_mesa_is_gles3(ctx) &&
- img->InternalFormat != GL_DEPTH_COMPONENT &&
- img->InternalFormat != GL_DEPTH_STENCIL) {
- depth_mode = GL_RED;
- }
-
- switch (depth_mode) {
- case GL_ALPHA:
- swizzles[0] = SWIZZLE_ZERO;
- swizzles[1] = SWIZZLE_ZERO;
- swizzles[2] = SWIZZLE_ZERO;
- swizzles[3] = SWIZZLE_X;
- break;
- case GL_LUMINANCE:
- swizzles[0] = SWIZZLE_X;
- swizzles[1] = SWIZZLE_X;
- swizzles[2] = SWIZZLE_X;
- swizzles[3] = SWIZZLE_ONE;
- break;
- case GL_INTENSITY:
- swizzles[0] = SWIZZLE_X;
- swizzles[1] = SWIZZLE_X;
- swizzles[2] = SWIZZLE_X;
- swizzles[3] = SWIZZLE_X;
- break;
- case GL_RED:
- swizzles[0] = SWIZZLE_X;
- swizzles[1] = SWIZZLE_ZERO;
- swizzles[2] = SWIZZLE_ZERO;
- swizzles[3] = SWIZZLE_ONE;
- break;
- }
- }
-
- GLenum datatype = _mesa_get_format_datatype(img->TexFormat);
-
- /* If the texture's format is alpha-only, force R, G, and B to
- * 0.0. Similarly, if the texture's format has no alpha channel,
- * force the alpha value read to 1.0. This allows for the
- * implementation to use an RGBA texture for any of these formats
- * without leaking any unexpected values.
- */
- switch (img->_BaseFormat) {
- case GL_ALPHA:
- swizzles[0] = SWIZZLE_ZERO;
- swizzles[1] = SWIZZLE_ZERO;
- swizzles[2] = SWIZZLE_ZERO;
- break;
- case GL_LUMINANCE:
- if (t->_IsIntegerFormat || datatype == GL_SIGNED_NORMALIZED) {
- swizzles[0] = SWIZZLE_X;
- swizzles[1] = SWIZZLE_X;
- swizzles[2] = SWIZZLE_X;
- swizzles[3] = SWIZZLE_ONE;
- }
- break;
- case GL_LUMINANCE_ALPHA:
- if (datatype == GL_SIGNED_NORMALIZED) {
- swizzles[0] = SWIZZLE_X;
- swizzles[1] = SWIZZLE_X;
- swizzles[2] = SWIZZLE_X;
- swizzles[3] = SWIZZLE_W;
- }
- break;
- case GL_INTENSITY:
- if (datatype == GL_SIGNED_NORMALIZED) {
- swizzles[0] = SWIZZLE_X;
- swizzles[1] = SWIZZLE_X;
- swizzles[2] = SWIZZLE_X;
- swizzles[3] = SWIZZLE_X;
- }
- break;
- case GL_RED:
- if (img->TexFormat == MESA_FORMAT_R_SRGB8) {
- swizzles[0] = SWIZZLE_X;
- swizzles[1] = SWIZZLE_ZERO;
- swizzles[2] = SWIZZLE_ZERO;
- swizzles[3] = SWIZZLE_ONE;
- break;
- }
- FALLTHROUGH;
- case GL_RG:
- case GL_RGB:
- if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0 ||
- img->TexFormat == MESA_FORMAT_RGB_DXT1 ||
- img->TexFormat == MESA_FORMAT_SRGB_DXT1)
- swizzles[3] = SWIZZLE_ONE;
- break;
- }
-
- return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->Attrib._Swizzle, 0)],
- swizzles[GET_SWZ(t->Attrib._Swizzle, 1)],
- swizzles[GET_SWZ(t->Attrib._Swizzle, 2)],
- swizzles[GET_SWZ(t->Attrib._Swizzle, 3)]);
-}
-
-/**
- * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gfx7.5+
- * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED). The mappings are
- *
- * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
- * 0 1 2 3 4 5
- * 4 5 6 7 0 1
- * SCS_RED, SCS_GREEN, SCS_BLUE, SCS_ALPHA, SCS_ZERO, SCS_ONE
- *
- * which is simply adding 4 then modding by 8 (or anding with 7).
- *
- * We then may need to apply workarounds for textureGather hardware bugs.
- */
-static unsigned
-swizzle_to_scs(GLenum swizzle, bool need_green_to_blue)
-{
- unsigned scs = (swizzle + 4) & 7;
-
- return (need_green_to_blue && scs == HSW_SCS_GREEN) ? HSW_SCS_BLUE : scs;
-}
-
-static void brw_update_texture_surface(struct gl_context *ctx,
- unsigned unit,
- uint32_t *surf_offset,
- bool for_gather,
- bool for_txf,
- uint32_t plane)
-{
- struct brw_context *brw = brw_context(ctx);
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- struct gl_texture_object *obj = ctx->Texture.Unit[unit]._Current;
-
- if (obj->Target == GL_TEXTURE_BUFFER) {
- brw_update_buffer_texture_surface(ctx, unit, surf_offset);
-
- } else {
- struct brw_texture_object *intel_obj = brw_texture_object(obj);
- struct brw_mipmap_tree *mt = intel_obj->mt;
-
- if (plane > 0) {
- if (mt->plane[plane - 1] == NULL)
- return;
- mt = mt->plane[plane - 1];
- }
-
- struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
- /* If this is a view with restricted NumLayers, then our effective depth
- * is not just the miptree depth.
- */
- unsigned view_num_layers;
- if (obj->Immutable && obj->Target != GL_TEXTURE_3D) {
- view_num_layers = obj->Attrib.NumLayers;
- } else {
- view_num_layers = mt->surf.dim == ISL_SURF_DIM_3D ?
- mt->surf.logical_level0_px.depth :
- mt->surf.logical_level0_px.array_len;
- }
-
- /* Handling GL_ALPHA as a surface format override breaks 1.30+ style
- * texturing functions that return a float, as our code generation always
- * selects the .x channel (which would always be 0).
- */
- struct gl_texture_image *firstImage = obj->Image[0][obj->Attrib.BaseLevel];
- const bool alpha_depth = obj->Attrib.DepthMode == GL_ALPHA &&
- (firstImage->_BaseFormat == GL_DEPTH_COMPONENT ||
- firstImage->_BaseFormat == GL_DEPTH_STENCIL);
- const unsigned swizzle = (unlikely(alpha_depth) ? SWIZZLE_XYZW :
- brw_get_texture_swizzle(&brw->ctx, obj));
-
- mesa_format mesa_fmt;
- if (firstImage->_BaseFormat == GL_DEPTH_STENCIL ||
- firstImage->_BaseFormat == GL_DEPTH_COMPONENT) {
- /* The format from intel_obj may be a combined depth stencil format
- * when we just want depth. Pull it from the miptree instead. This
- * is safe because texture views aren't allowed on depth/stencil.
- */
- mesa_fmt = mt->format;
- } else if (brw_miptree_has_etc_shadow(brw, mt)) {
- mesa_fmt = mt->shadow_mt->format;
- } else if (plane > 0) {
- mesa_fmt = mt->format;
- } else {
- mesa_fmt = intel_obj->_Format;
- }
- enum isl_format format = translate_tex_format(brw, mesa_fmt,
- for_txf ? GL_DECODE_EXT :
- sampler->Attrib.sRGBDecode);
-
- /* Implement gfx6 and gfx7 gather work-around */
- bool need_green_to_blue = false;
- if (for_gather) {
- if (devinfo->ver == 7 && (format == ISL_FORMAT_R32G32_FLOAT ||
- format == ISL_FORMAT_R32G32_SINT ||
- format == ISL_FORMAT_R32G32_UINT)) {
- format = ISL_FORMAT_R32G32_FLOAT_LD;
- need_green_to_blue = devinfo->platform == INTEL_PLATFORM_HSW;
- } else if (devinfo->ver == 6) {
- /* Sandybridge's gather4 message is broken for integer formats.
- * To work around this, we pretend the surface is UNORM for
- * 8 or 16-bit formats, and emit shader instructions to recover
- * the real INT/UINT value. For 32-bit formats, we pretend
- * the surface is FLOAT, and simply reinterpret the resulting
- * bits.
- */
- switch (format) {
- case ISL_FORMAT_R8_SINT:
- case ISL_FORMAT_R8_UINT:
- format = ISL_FORMAT_R8_UNORM;
- break;
-
- case ISL_FORMAT_R16_SINT:
- case ISL_FORMAT_R16_UINT:
- format = ISL_FORMAT_R16_UNORM;
- break;
-
- case ISL_FORMAT_R32_SINT:
- case ISL_FORMAT_R32_UINT:
- format = ISL_FORMAT_R32_FLOAT;
- break;
-
- default:
- break;
- }
- }
- }
-
- if (obj->StencilSampling && firstImage->_BaseFormat == GL_DEPTH_STENCIL) {
- if (devinfo->ver <= 7) {
- assert(mt->shadow_mt && !mt->stencil_mt->shadow_needs_update);
- mt = mt->shadow_mt;
- } else {
- mt = mt->stencil_mt;
- }
- format = ISL_FORMAT_R8_UINT;
- } else if (devinfo->ver <= 7 && mt->format == MESA_FORMAT_S_UINT8) {
- assert(mt->shadow_mt && !mt->shadow_needs_update);
- mt = mt->shadow_mt;
- format = ISL_FORMAT_R8_UINT;
- } else if (brw_miptree_needs_fake_etc(brw, mt)) {
- assert(mt->shadow_mt && !mt->shadow_needs_update);
- mt = mt->shadow_mt;
- }
-
- const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
-
- struct isl_view view = {
- .format = format,
- .base_level = obj->Attrib.MinLevel + obj->Attrib.BaseLevel,
- .levels = intel_obj->_MaxLevel - obj->Attrib.BaseLevel + 1,
- .base_array_layer = obj->Attrib.MinLayer,
- .array_len = view_num_layers,
- .swizzle = {
- .r = swizzle_to_scs(GET_SWZ(swizzle, 0), need_green_to_blue),
- .g = swizzle_to_scs(GET_SWZ(swizzle, 1), need_green_to_blue),
- .b = swizzle_to_scs(GET_SWZ(swizzle, 2), need_green_to_blue),
- .a = swizzle_to_scs(GET_SWZ(swizzle, 3), need_green_to_blue),
- },
- .usage = ISL_SURF_USAGE_TEXTURE_BIT,
- };
-
- /* On Ivy Bridge and earlier, we handle texture swizzle with shader
- * code. The actual surface swizzle should be identity.
- */
- if (devinfo->verx10 <= 70)
- view.swizzle = ISL_SWIZZLE_IDENTITY;
-
- if (obj->Target == GL_TEXTURE_CUBE_MAP ||
- obj->Target == GL_TEXTURE_CUBE_MAP_ARRAY)
- view.usage |= ISL_SURF_USAGE_CUBE_BIT;
-
- enum isl_aux_usage aux_usage =
- brw_miptree_texture_aux_usage(brw, mt, format,
- brw->gfx9_astc5x5_wa_tex_mask);
-
- brw_emit_surface_state(brw, mt, mt->target, view, aux_usage,
- surf_offset, surf_index,
- 0);
- }
-}
-
-void
-brw_emit_buffer_surface_state(struct brw_context *brw,
- uint32_t *out_offset,
- struct brw_bo *bo,
- unsigned buffer_offset,
- enum isl_format format,
- unsigned buffer_size,
- unsigned pitch,
- unsigned reloc_flags)
-{
- uint32_t *dw = brw_state_batch(brw,
- brw->isl_dev.ss.size,
- brw->isl_dev.ss.align,
- out_offset);
-
- isl_buffer_fill_state(&brw->isl_dev, dw,
- .address = !bo ? buffer_offset :
- brw_state_reloc(&brw->batch,
- *out_offset + brw->isl_dev.ss.addr_offset,
- bo, buffer_offset,
- reloc_flags),
- .size_B = buffer_size,
- .format = format,
- .swizzle = ISL_SWIZZLE_IDENTITY,
- .stride_B = pitch,
- .mocs = brw_mocs(&brw->isl_dev, bo));
-}
-
-static unsigned
-buffer_texture_range_size(struct brw_context *brw,
- struct gl_texture_object *obj)
-{
- assert(obj->Target == GL_TEXTURE_BUFFER);
- const unsigned texel_size = _mesa_get_format_bytes(obj->_BufferObjectFormat);
- const unsigned buffer_size = (!obj->BufferObject ? 0 :
- obj->BufferObject->Size);
- const unsigned buffer_offset = MIN2(buffer_size, obj->BufferOffset);
-
- /* The ARB_texture_buffer_specification says:
- *
- * "The number of texels in the buffer texture's texel array is given by
- *
- * floor(<buffer_size> / (<components> * sizeof(<base_type>)),
- *
- * where <buffer_size> is the size of the buffer object, in basic
- * machine units and <components> and <base_type> are the element count
- * and base data type for elements, as specified in Table X.1. The
- * number of texels in the texel array is then clamped to the
- * implementation-dependent limit MAX_TEXTURE_BUFFER_SIZE_ARB."
- *
- * We need to clamp the size in bytes to MAX_TEXTURE_BUFFER_SIZE * stride,
- * so that when ISL divides by stride to obtain the number of texels, that
- * texel count is clamped to MAX_TEXTURE_BUFFER_SIZE.
- */
- return MIN3((unsigned)obj->BufferSize,
- buffer_size - buffer_offset,
- brw->ctx.Const.MaxTextureBufferSize * texel_size);
-}
-
-static void
-emit_null_surface_state(struct brw_context *brw,
- const struct gl_framebuffer *fb,
- uint32_t *out_offset);
-
-void
-brw_update_buffer_texture_surface(struct gl_context *ctx,
- unsigned unit,
- uint32_t *surf_offset)
-{
- struct brw_context *brw = brw_context(ctx);
- struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
- struct brw_buffer_object *intel_obj =
- brw_buffer_object(tObj->BufferObject);
- const unsigned size = buffer_texture_range_size(brw, tObj);
- struct brw_bo *bo = NULL;
- mesa_format format = tObj->_BufferObjectFormat;
- const enum isl_format isl_format = brw_isl_format_for_mesa_format(format);
- int texel_size = _mesa_get_format_bytes(format);
-
- if (tObj->BufferObject == NULL) {
- emit_null_surface_state(brw, NULL, surf_offset);
- return;
- }
-
- if (intel_obj)
- bo = brw_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size,
- false);
-
- if (isl_format == ISL_FORMAT_UNSUPPORTED) {
- _mesa_problem(NULL, "bad format %s for texture buffer\n",
- _mesa_get_format_name(format));
- }
-
- brw_emit_buffer_surface_state(brw, surf_offset, bo,
- tObj->BufferOffset,
- isl_format,
- size,
- texel_size,
- 0);
-}
-
-/**
- * Set up a binding table entry for use by stream output logic (transform
- * feedback).
- *
- * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
- */
-void
-brw_update_sol_surface(struct brw_context *brw,
- struct gl_buffer_object *buffer_obj,
- uint32_t *out_offset, unsigned num_vector_components,
- unsigned stride_dwords, unsigned offset_dwords)
-{
- struct brw_buffer_object *intel_bo = brw_buffer_object(buffer_obj);
- uint32_t offset_bytes = 4 * offset_dwords;
- struct brw_bo *bo = brw_bufferobj_buffer(brw, intel_bo,
- offset_bytes,
- buffer_obj->Size - offset_bytes,
- true);
- uint32_t *surf = brw_state_batch(brw, 6 * 4, 32, out_offset);
- uint32_t pitch_minus_1 = 4*stride_dwords - 1;
- size_t size_dwords = buffer_obj->Size / 4;
- uint32_t buffer_size_minus_1, width, height, depth, surface_format;
-
- /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
- * too big to map using a single binding table entry?
- */
- assert((size_dwords - offset_dwords) / stride_dwords
- <= BRW_MAX_NUM_BUFFER_ENTRIES);
-
- if (size_dwords > offset_dwords + num_vector_components) {
- /* There is room for at least 1 transform feedback output in the buffer.
- * Compute the number of additional transform feedback outputs the
- * buffer has room for.
- */
- buffer_size_minus_1 =
- (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
- } else {
- /* There isn't even room for a single transform feedback output in the
- * buffer. We can't configure the binding table entry to prevent output
- * entirely; we'll have to rely on the geometry shader to detect
- * overflow. But to minimize the damage in case of a bug, set up the
- * binding table entry to just allow a single output.
- */
- buffer_size_minus_1 = 0;
- }
- width = buffer_size_minus_1 & 0x7f;
- height = (buffer_size_minus_1 & 0xfff80) >> 7;
- depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
-
- switch (num_vector_components) {
- case 1:
- surface_format = ISL_FORMAT_R32_FLOAT;
- break;
- case 2:
- surface_format = ISL_FORMAT_R32G32_FLOAT;
- break;
- case 3:
- surface_format = ISL_FORMAT_R32G32B32_FLOAT;
- break;
- case 4:
- surface_format = ISL_FORMAT_R32G32B32A32_FLOAT;
- break;
- default:
- unreachable("Invalid vector size for transform feedback output");
- }
-
- surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
- BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
- surface_format << BRW_SURFACE_FORMAT_SHIFT |
- BRW_SURFACE_RC_READ_WRITE;
- surf[1] = brw_state_reloc(&brw->batch,
- *out_offset + 4, bo, offset_bytes, RELOC_WRITE);
- surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
- height << BRW_SURFACE_HEIGHT_SHIFT);
- surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
- pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
- surf[4] = 0;
- surf[5] = 0;
-}
-
-/* Creates a new WM constant buffer reflecting the current fragment program's
- * constants, if needed by the fragment program.
- *
- * Otherwise, constants go through the CURBEs using the brw_constant_buffer
- * state atom.
- */
-static void
-brw_upload_wm_pull_constants(struct brw_context *brw)
-{
- struct brw_stage_state *stage_state = &brw->wm.base;
- /* BRW_NEW_FRAGMENT_PROGRAM */
- struct brw_program *fp =
- (struct brw_program *) brw->programs[MESA_SHADER_FRAGMENT];
-
- /* BRW_NEW_FS_PROG_DATA */
- struct brw_stage_prog_data *prog_data = brw->wm.base.prog_data;
-
- _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_FRAGMENT);
- /* _NEW_PROGRAM_CONSTANTS */
- brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program,
- stage_state, prog_data);
-}
-
-const struct brw_tracked_state brw_wm_pull_constants = {
- .dirty = {
- .mesa = _NEW_PROGRAM_CONSTANTS,
- .brw = BRW_NEW_BATCH |
- BRW_NEW_FRAGMENT_PROGRAM |
- BRW_NEW_FS_PROG_DATA,
- },
- .emit = brw_upload_wm_pull_constants,
-};
-
-/**
- * Creates a null renderbuffer surface.
- *
- * This is used when the shader doesn't write to any color output. An FB
- * write to target 0 will still be emitted, because that's how the thread is
- * terminated (and computed depth is returned), so we need to have the
- * hardware discard the target 0 color output..
- */
-static void
-emit_null_surface_state(struct brw_context *brw,
- const struct gl_framebuffer *fb,
- uint32_t *out_offset)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- uint32_t *surf = brw_state_batch(brw,
- brw->isl_dev.ss.size,
- brw->isl_dev.ss.align,
- out_offset);
-
- /* Use the fb dimensions or 1x1x1 */
- const unsigned width = fb ? _mesa_geometric_width(fb) : 1;
- const unsigned height = fb ? _mesa_geometric_height(fb) : 1;
- const unsigned samples = fb ? _mesa_geometric_samples(fb) : 1;
-
- if (devinfo->ver != 6 || samples <= 1) {
- isl_null_fill_state(&brw->isl_dev, surf,
- .size = isl_extent3d(width, height, 1));
- return;
- }
-
- /* On Gfx6, null render targets seem to cause GPU hangs when multisampling.
- * So work around this problem by rendering into dummy color buffer.
- *
- * To decrease the amount of memory needed by the workaround buffer, we
- * set its pitch to 128 bytes (the width of a Y tile). This means that
- * the amount of memory needed for the workaround buffer is
- * (width_in_tiles + height_in_tiles - 1) tiles.
- *
- * Note that since the workaround buffer will be interpreted by the
- * hardware as an interleaved multisampled buffer, we need to compute
- * width_in_tiles and height_in_tiles by dividing the width and height
- * by 16 rather than the normal Y-tile size of 32.
- */
- unsigned width_in_tiles = ALIGN(width, 16) / 16;
- unsigned height_in_tiles = ALIGN(height, 16) / 16;
- unsigned pitch_minus_1 = 127;
- unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
- brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
- size_needed);
-
- surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
- ISL_FORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
- surf[1] = brw_state_reloc(&brw->batch, *out_offset + 4,
- brw->wm.multisampled_null_render_target_bo,
- 0, RELOC_WRITE);
-
- surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT |
- (height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
-
- /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
- * Notes):
- *
- * If Surface Type is SURFTYPE_NULL, this field must be TRUE
- */
- surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
- pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
- surf[4] = BRW_SURFACE_MULTISAMPLECOUNT_4;
- surf[5] = 0;
-}
-
-/**
- * Sets up a surface state structure to point at the given region.
- * While it is only used for the front/back buffer currently, it should be
- * usable for further buffers when doing ARB_draw_buffer support.
- */
-static uint32_t
-gfx4_update_renderbuffer_surface(struct brw_context *brw,
- struct gl_renderbuffer *rb,
- unsigned unit,
- uint32_t surf_index)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- struct gl_context *ctx = &brw->ctx;
- struct brw_renderbuffer *irb = brw_renderbuffer(rb);
- struct brw_mipmap_tree *mt = irb->mt;
- uint32_t *surf;
- uint32_t tile_x, tile_y;
- enum isl_format format;
- uint32_t offset;
- /* _NEW_BUFFERS */
- mesa_format rb_format = _mesa_get_render_format(ctx, brw_rb_format(irb));
- /* BRW_NEW_FS_PROG_DATA */
-
- if (rb->TexImage && !devinfo->has_surface_tile_offset) {
- brw_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
-
- if (tile_x != 0 || tile_y != 0) {
- /* Original gfx4 hardware couldn't draw to a non-tile-aligned
- * destination in a miptree unless you actually setup your renderbuffer
- * as a miptree and used the fragile lod/array_index/etc. controls to
- * select the image. So, instead, we just make a new single-level
- * miptree and render into that.
- */
- brw_renderbuffer_move_to_temp(brw, irb, false);
- assert(irb->align_wa_mt);
- mt = irb->align_wa_mt;
- }
- }
-
- surf = brw_state_batch(brw, 6 * 4, 32, &offset);
-
- format = brw->mesa_to_isl_render_format[rb_format];
- if (unlikely(!brw->mesa_format_supports_render[rb_format])) {
- _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
- __func__, _mesa_get_format_name(rb_format));
- }
-
- surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
- format << BRW_SURFACE_FORMAT_SHIFT);
-
- /* reloc */
- assert(mt->offset % mt->cpp == 0);
- surf[1] = brw_state_reloc(&brw->batch, offset + 4, mt->bo,
- mt->offset +
- brw_renderbuffer_get_tile_offsets(irb,
- &tile_x,
- &tile_y),
- RELOC_WRITE);
-
- surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
- (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
-
- surf[3] = (brw_get_surface_tiling_bits(mt->surf.tiling) |
- (mt->surf.row_pitch_B - 1) << BRW_SURFACE_PITCH_SHIFT);
-
- surf[4] = brw_get_surface_num_multisamples(mt->surf.samples);
-
- assert(devinfo->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
- /* Note that the low bits of these fields are missing, so
- * there's the possibility of getting in trouble.
- */
- assert(tile_x % 4 == 0);
- assert(tile_y % 2 == 0);
- surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
- (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
- (mt->surf.image_alignment_el.height == 4 ?
- BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
-
- if (devinfo->ver < 6) {
- /* _NEW_COLOR */
- if (!ctx->Color.ColorLogicOpEnabled &&
- ctx->Color._AdvancedBlendMode == BLEND_NONE &&
- (ctx->Color.BlendEnabled & (1 << unit)))
- surf[0] |= BRW_SURFACE_BLEND_ENABLED;
-
- if (!GET_COLORMASK_BIT(ctx->Color.ColorMask, unit, 0))
- surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
- if (!GET_COLORMASK_BIT(ctx->Color.ColorMask, unit, 1))
- surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
- if (!GET_COLORMASK_BIT(ctx->Color.ColorMask, unit, 2))
- surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
-
- /* As mentioned above, disable writes to the alpha component when the
- * renderbuffer is XRGB.
- */
- if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
- !GET_COLORMASK_BIT(ctx->Color.ColorMask, unit, 3)) {
- surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
- }
- }
-
- return offset;
-}
-
-static void
-update_renderbuffer_surfaces(struct brw_context *brw)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- const struct gl_context *ctx = &brw->ctx;
-
- /* _NEW_BUFFERS | _NEW_COLOR */
- const struct gl_framebuffer *fb = ctx->DrawBuffer;
-
- /* Render targets always start at binding table index 0. */
- const unsigned rt_start = 0;
-
- uint32_t *surf_offsets = brw->wm.base.surf_offset;
-
- /* Update surfaces for drawing buffers */
- if (fb->_NumColorDrawBuffers >= 1) {
- for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
- struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[i];
-
- if (brw_renderbuffer(rb)) {
- surf_offsets[rt_start + i] = devinfo->ver >= 6 ?
- gfx6_update_renderbuffer_surface(brw, rb, i, rt_start + i) :
- gfx4_update_renderbuffer_surface(brw, rb, i, rt_start + i);
- } else {
- emit_null_surface_state(brw, fb, &surf_offsets[rt_start + i]);
- }
- }
- } else {
- emit_null_surface_state(brw, fb, &surf_offsets[rt_start]);
- }
-
- /* The PIPE_CONTROL command description says:
- *
- * "Whenever a Binding Table Index (BTI) used by a Render Taget Message
- * points to a different RENDER_SURFACE_STATE, SW must issue a Render
- * Target Cache Flush by enabling this bit. When render target flush
- * is set due to new association of BTI, PS Scoreboard Stall bit must
- * be set in this packet."
- */
- if (devinfo->ver >= 11) {
- brw_emit_pipe_control_flush(brw,
- PIPE_CONTROL_RENDER_TARGET_FLUSH |
- PIPE_CONTROL_STALL_AT_SCOREBOARD);
- }
-
- brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
-}
-
-const struct brw_tracked_state brw_renderbuffer_surfaces = {
- .dirty = {
- .mesa = _NEW_BUFFERS |
- _NEW_COLOR,
- .brw = BRW_NEW_BATCH,
- },
- .emit = update_renderbuffer_surfaces,
-};
-
-const struct brw_tracked_state gfx6_renderbuffer_surfaces = {
- .dirty = {
- .mesa = _NEW_BUFFERS,
- .brw = BRW_NEW_BATCH |
- BRW_NEW_AUX_STATE,
- },
- .emit = update_renderbuffer_surfaces,
-};
-
-static void
-update_renderbuffer_read_surfaces(struct brw_context *brw)
-{
- const struct gl_context *ctx = &brw->ctx;
-
- /* BRW_NEW_FS_PROG_DATA */
- const struct brw_wm_prog_data *wm_prog_data =
- brw_wm_prog_data(brw->wm.base.prog_data);
-
- if (wm_prog_data->has_render_target_reads &&
- !ctx->Extensions.EXT_shader_framebuffer_fetch) {
- /* _NEW_BUFFERS */
- const struct gl_framebuffer *fb = ctx->DrawBuffer;
-
- for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
- struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[i];
- const struct brw_renderbuffer *irb = brw_renderbuffer(rb);
- const unsigned surf_index =
- wm_prog_data->binding_table.render_target_read_start + i;
- uint32_t *surf_offset = &brw->wm.base.surf_offset[surf_index];
-
- if (irb) {
- const enum isl_format format = brw->mesa_to_isl_render_format[
- _mesa_get_render_format(ctx, brw_rb_format(irb))];
- assert(isl_format_supports_sampling(&brw->screen->devinfo,
- format));
-
- /* Override the target of the texture if the render buffer is a
- * single slice of a 3D texture (since the minimum array element
- * field of the surface state structure is ignored by the sampler
- * unit for 3D textures on some hardware), or if the render buffer
- * is a 1D array (since shaders always provide the array index
- * coordinate at the Z component to avoid state-dependent
- * recompiles when changing the texture target of the
- * framebuffer).
- */
- const GLenum target =
- (irb->mt->target == GL_TEXTURE_3D &&
- irb->layer_count == 1) ? GL_TEXTURE_2D :
- irb->mt->target == GL_TEXTURE_1D_ARRAY ? GL_TEXTURE_2D_ARRAY :
- irb->mt->target;
-
- const struct isl_view view = {
- .format = format,
- .base_level = irb->mt_level - irb->mt->first_level,
- .levels = 1,
- .base_array_layer = irb->mt_layer,
- .array_len = irb->layer_count,
- .swizzle = ISL_SWIZZLE_IDENTITY,
- .usage = ISL_SURF_USAGE_TEXTURE_BIT,
- };
-
- enum isl_aux_usage aux_usage =
- brw_miptree_texture_aux_usage(brw, irb->mt, format,
- brw->gfx9_astc5x5_wa_tex_mask);
- if (brw->draw_aux_usage[i] == ISL_AUX_USAGE_NONE)
- aux_usage = ISL_AUX_USAGE_NONE;
-
- brw_emit_surface_state(brw, irb->mt, target, view, aux_usage,
- surf_offset, surf_index,
- 0);
-
- } else {
- emit_null_surface_state(brw, fb, surf_offset);
- }
- }
-
- brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
- }
-}
-
-const struct brw_tracked_state brw_renderbuffer_read_surfaces = {
- .dirty = {
- .mesa = _NEW_BUFFERS,
- .brw = BRW_NEW_BATCH |
- BRW_NEW_AUX_STATE |
- BRW_NEW_FS_PROG_DATA,
- },
- .emit = update_renderbuffer_read_surfaces,
-};
-
-static bool
-is_depth_texture(struct brw_texture_object *iobj)
-{
- GLenum base_format = _mesa_get_format_base_format(iobj->_Format);
- return base_format == GL_DEPTH_COMPONENT ||
- (base_format == GL_DEPTH_STENCIL && !iobj->base.StencilSampling);
-}
-
-static void
-update_stage_texture_surfaces(struct brw_context *brw,
- const struct gl_program *prog,
- struct brw_stage_state *stage_state,
- bool for_gather, uint32_t plane)
-{
- if (!prog)
- return;
-
- struct gl_context *ctx = &brw->ctx;
-
- uint32_t *surf_offset = stage_state->surf_offset;
-
- /* BRW_NEW_*_PROG_DATA */
- if (for_gather)
- surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
- else
- surf_offset += stage_state->prog_data->binding_table.plane_start[plane];
-
- unsigned num_samplers = BITSET_LAST_BIT(prog->info.textures_used);
- for (unsigned s = 0; s < num_samplers; s++) {
- surf_offset[s] = 0;
-
- if (BITSET_TEST(prog->info.textures_used, s)) {
- const unsigned unit = prog->SamplerUnits[s];
- const bool used_by_txf = BITSET_TEST(prog->info.textures_used_by_txf, s);
- struct gl_texture_object *obj = ctx->Texture.Unit[unit]._Current;
- struct brw_texture_object *iobj = brw_texture_object(obj);
-
- /* _NEW_TEXTURE */
- if (!obj)
- continue;
-
- if ((prog->ShadowSamplers & (1 << s)) && !is_depth_texture(iobj)) {
- /* A programming note for the sample_c message says:
- *
- * "The Surface Format of the associated surface must be
- * indicated as supporting shadow mapping as indicated in the
- * surface format table."
- *
- * Accessing non-depth textures via a sampler*Shadow type is
- * undefined. GLSL 4.50 page 162 says:
- *
- * "If a shadow texture call is made to a sampler that does not
- * represent a depth texture, then results are undefined."
- *
- * We give them a null surface (zeros) for undefined. We've seen
- * GPU hangs with color buffers and sample_c, so we try and avoid
- * those with this hack.
- */
- emit_null_surface_state(brw, NULL, surf_offset + s);
- } else {
- brw_update_texture_surface(ctx, unit, surf_offset + s, for_gather,
- used_by_txf, plane);
- }
- }
- }
-}
-
-
-/**
- * Construct SURFACE_STATE objects for enabled textures.
- */
-static void
-brw_update_texture_surfaces(struct brw_context *brw)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- /* BRW_NEW_VERTEX_PROGRAM */
- struct gl_program *vs = brw->programs[MESA_SHADER_VERTEX];
-
- /* BRW_NEW_TESS_PROGRAMS */
- struct gl_program *tcs = brw->programs[MESA_SHADER_TESS_CTRL];
- struct gl_program *tes = brw->programs[MESA_SHADER_TESS_EVAL];
-
- /* BRW_NEW_GEOMETRY_PROGRAM */
- struct gl_program *gs = brw->programs[MESA_SHADER_GEOMETRY];
-
- /* BRW_NEW_FRAGMENT_PROGRAM */
- struct gl_program *fs = brw->programs[MESA_SHADER_FRAGMENT];
-
- /* _NEW_TEXTURE */
- update_stage_texture_surfaces(brw, vs, &brw->vs.base, false, 0);
- update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, false, 0);
- update_stage_texture_surfaces(brw, tes, &brw->tes.base, false, 0);
- update_stage_texture_surfaces(brw, gs, &brw->gs.base, false, 0);
- update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 0);
-
- /* emit alternate set of surface state for gather. this
- * allows the surface format to be overriden for only the
- * gather4 messages. */
- if (devinfo->ver < 8) {
- if (vs && vs->info.uses_texture_gather)
- update_stage_texture_surfaces(brw, vs, &brw->vs.base, true, 0);
- if (tcs && tcs->info.uses_texture_gather)
- update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, true, 0);
- if (tes && tes->info.uses_texture_gather)
- update_stage_texture_surfaces(brw, tes, &brw->tes.base, true, 0);
- if (gs && gs->info.uses_texture_gather)
- update_stage_texture_surfaces(brw, gs, &brw->gs.base, true, 0);
- if (fs && fs->info.uses_texture_gather)
- update_stage_texture_surfaces(brw, fs, &brw->wm.base, true, 0);
- }
-
- if (fs) {
- update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 1);
- update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 2);
- }
-
- brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
-}
-
-const struct brw_tracked_state brw_texture_surfaces = {
- .dirty = {
- .mesa = _NEW_TEXTURE,
- .brw = BRW_NEW_BATCH |
- BRW_NEW_AUX_STATE |
- BRW_NEW_FRAGMENT_PROGRAM |
- BRW_NEW_FS_PROG_DATA |
- BRW_NEW_GEOMETRY_PROGRAM |
- BRW_NEW_GS_PROG_DATA |
- BRW_NEW_TESS_PROGRAMS |
- BRW_NEW_TCS_PROG_DATA |
- BRW_NEW_TES_PROG_DATA |
- BRW_NEW_TEXTURE_BUFFER |
- BRW_NEW_VERTEX_PROGRAM |
- BRW_NEW_VS_PROG_DATA,
- },
- .emit = brw_update_texture_surfaces,
-};
-
-static void
-brw_update_cs_texture_surfaces(struct brw_context *brw)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- /* BRW_NEW_COMPUTE_PROGRAM */
- struct gl_program *cs = brw->programs[MESA_SHADER_COMPUTE];
-
- /* _NEW_TEXTURE */
- update_stage_texture_surfaces(brw, cs, &brw->cs.base, false, 0);
-
- /* emit alternate set of surface state for gather. this
- * allows the surface format to be overriden for only the
- * gather4 messages.
- */
- if (devinfo->ver < 8) {
- if (cs && cs->info.uses_texture_gather)
- update_stage_texture_surfaces(brw, cs, &brw->cs.base, true, 0);
- }
-
- brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
-}
-
-const struct brw_tracked_state brw_cs_texture_surfaces = {
- .dirty = {
- .mesa = _NEW_TEXTURE,
- .brw = BRW_NEW_BATCH |
- BRW_NEW_COMPUTE_PROGRAM |
- BRW_NEW_AUX_STATE,
- },
- .emit = brw_update_cs_texture_surfaces,
-};
-
-static void
-upload_buffer_surface(struct brw_context *brw,
- struct gl_buffer_binding *binding,
- uint32_t *out_offset,
- enum isl_format format,
- unsigned reloc_flags)
-{
- if (!binding->BufferObject) {
- emit_null_surface_state(brw, NULL, out_offset);
- } else {
- ptrdiff_t size = binding->BufferObject->Size - binding->Offset;
- if (!binding->AutomaticSize)
- size = MIN2(size, binding->Size);
-
- if (size == 0) {
- emit_null_surface_state(brw, NULL, out_offset);
- return;
- }
-
- struct brw_buffer_object *iobj =
- brw_buffer_object(binding->BufferObject);
- struct brw_bo *bo =
- brw_bufferobj_buffer(brw, iobj, binding->Offset, size,
- (reloc_flags & RELOC_WRITE) != 0);
-
- brw_emit_buffer_surface_state(brw, out_offset, bo, binding->Offset,
- format, size, 1, reloc_flags);
- }
-}
-
-void
-brw_upload_ubo_surfaces(struct brw_context *brw, struct gl_program *prog,
- struct brw_stage_state *stage_state,
- struct brw_stage_prog_data *prog_data)
-{
- struct gl_context *ctx = &brw->ctx;
-
- if (!prog || (prog->info.num_ubos == 0 &&
- prog->info.num_ssbos == 0 &&
- prog->info.num_abos == 0))
- return;
-
- if (prog->info.num_ubos) {
- assert(prog_data->binding_table.ubo_start < BRW_MAX_SURFACES);
- uint32_t *ubo_surf_offsets =
- &stage_state->surf_offset[prog_data->binding_table.ubo_start];
-
- for (int i = 0; i < prog->info.num_ubos; i++) {
- struct gl_buffer_binding *binding =
- &ctx->UniformBufferBindings[prog->sh.UniformBlocks[i]->Binding];
- upload_buffer_surface(brw, binding, &ubo_surf_offsets[i],
- ISL_FORMAT_R32G32B32A32_FLOAT, 0);
- }
- }
-
- if (prog->info.num_ssbos || prog->info.num_abos) {
- assert(prog_data->binding_table.ssbo_start < BRW_MAX_SURFACES);
- uint32_t *ssbo_surf_offsets =
- &stage_state->surf_offset[prog_data->binding_table.ssbo_start];
- uint32_t *abo_surf_offsets = ssbo_surf_offsets + prog->info.num_ssbos;
-
- for (int i = 0; i < prog->info.num_abos; i++) {
- struct gl_buffer_binding *binding =
- &ctx->AtomicBufferBindings[prog->sh.AtomicBuffers[i]->Binding];
- upload_buffer_surface(brw, binding, &abo_surf_offsets[i],
- ISL_FORMAT_RAW, RELOC_WRITE);
- }
-
- for (int i = 0; i < prog->info.num_ssbos; i++) {
- struct gl_buffer_binding *binding =
- &ctx->ShaderStorageBufferBindings[prog->sh.ShaderStorageBlocks[i]->Binding];
-
- upload_buffer_surface(brw, binding, &ssbo_surf_offsets[i],
- ISL_FORMAT_RAW, RELOC_WRITE);
- }
- }
-
- stage_state->push_constants_dirty = true;
- brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
-}
-
-static void
-brw_upload_wm_ubo_surfaces(struct brw_context *brw)
-{
- struct gl_context *ctx = &brw->ctx;
- /* _NEW_PROGRAM */
- struct gl_program *prog = ctx->FragmentProgram._Current;
-
- /* BRW_NEW_FS_PROG_DATA */
- brw_upload_ubo_surfaces(brw, prog, &brw->wm.base, brw->wm.base.prog_data);
-}
-
-const struct brw_tracked_state brw_wm_ubo_surfaces = {
- .dirty = {
- .mesa = _NEW_PROGRAM,
- .brw = BRW_NEW_BATCH |
- BRW_NEW_FS_PROG_DATA |
- BRW_NEW_UNIFORM_BUFFER,
- },
- .emit = brw_upload_wm_ubo_surfaces,
-};
-
-static void
-brw_upload_cs_ubo_surfaces(struct brw_context *brw)
-{
- struct gl_context *ctx = &brw->ctx;
- /* _NEW_PROGRAM */
- struct gl_program *prog =
- ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
-
- /* BRW_NEW_CS_PROG_DATA */
- brw_upload_ubo_surfaces(brw, prog, &brw->cs.base, brw->cs.base.prog_data);
-}
-
-const struct brw_tracked_state brw_cs_ubo_surfaces = {
- .dirty = {
- .mesa = _NEW_PROGRAM,
- .brw = BRW_NEW_BATCH |
- BRW_NEW_CS_PROG_DATA |
- BRW_NEW_UNIFORM_BUFFER,
- },
- .emit = brw_upload_cs_ubo_surfaces,
-};
-
-static void
-brw_upload_cs_image_surfaces(struct brw_context *brw)
-{
- /* _NEW_PROGRAM */
- const struct gl_program *cp = brw->programs[MESA_SHADER_COMPUTE];
-
- if (cp) {
- /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
- brw_upload_image_surfaces(brw, cp, &brw->cs.base,
- brw->cs.base.prog_data);
- }
-}
-
-const struct brw_tracked_state brw_cs_image_surfaces = {
- .dirty = {
- .mesa = _NEW_TEXTURE | _NEW_PROGRAM,
- .brw = BRW_NEW_BATCH |
- BRW_NEW_CS_PROG_DATA |
- BRW_NEW_AUX_STATE |
- BRW_NEW_IMAGE_UNITS
- },
- .emit = brw_upload_cs_image_surfaces,
-};
-
-static uint32_t
-get_image_format(struct brw_context *brw, mesa_format format, GLenum access)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- enum isl_format hw_format = brw_isl_format_for_mesa_format(format);
- if (access == GL_WRITE_ONLY || access == GL_NONE) {
- return hw_format;
- } else if (isl_has_matching_typed_storage_image_format(devinfo, hw_format)) {
- /* Typed surface reads support a very limited subset of the shader
- * image formats. Translate it into the closest format the
- * hardware supports.
- */
- return isl_lower_storage_image_format(devinfo, hw_format);
- } else {
- /* The hardware doesn't actually support a typed format that we can use
- * so we have to fall back to untyped read/write messages.
- */
- return ISL_FORMAT_RAW;
- }
-}
-
-static void
-update_default_image_param(struct brw_context *brw,
- struct gl_image_unit *u,
- struct brw_image_param *param)
-{
- memset(param, 0, sizeof(*param));
- /* Set the swizzling shifts to all-ones to effectively disable swizzling --
- * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
- * detailed explanation of these parameters.
- */
- param->swizzling[0] = 0xff;
- param->swizzling[1] = 0xff;
-}
-
-static void
-update_buffer_image_param(struct brw_context *brw,
- struct gl_image_unit *u,
- struct brw_image_param *param)
-{
- const unsigned size = buffer_texture_range_size(brw, u->TexObj);
- update_default_image_param(brw, u, param);
-
- param->size[0] = size / _mesa_get_format_bytes(u->_ActualFormat);
- param->stride[0] = _mesa_get_format_bytes(u->_ActualFormat);
-}
-
-static void
-update_image_surface(struct brw_context *brw,
- struct gl_image_unit *u,
- GLenum access,
- uint32_t *surf_offset,
- struct brw_image_param *param)
-{
- if (_mesa_is_image_unit_valid(&brw->ctx, u)) {
- struct gl_texture_object *obj = u->TexObj;
- const unsigned format = get_image_format(brw, u->_ActualFormat, access);
- const bool written = (access != GL_READ_ONLY && access != GL_NONE);
-
- if (obj->Target == GL_TEXTURE_BUFFER) {
- const unsigned texel_size = (format == ISL_FORMAT_RAW ? 1 :
- _mesa_get_format_bytes(u->_ActualFormat));
- const unsigned buffer_size = buffer_texture_range_size(brw, obj);
- struct brw_bo *const bo = !obj->BufferObject ? NULL :
- brw_bufferobj_buffer(brw, brw_buffer_object(obj->BufferObject),
- obj->BufferOffset, buffer_size, written);
-
- brw_emit_buffer_surface_state(
- brw, surf_offset, bo, obj->BufferOffset,
- format, buffer_size, texel_size,
- written ? RELOC_WRITE : 0);
-
- update_buffer_image_param(brw, u, param);
-
- } else {
- struct brw_texture_object *intel_obj = brw_texture_object(obj);
- struct brw_mipmap_tree *mt = intel_obj->mt;
-
- unsigned base_layer, num_layers;
- if (u->Layered) {
- if (obj->Target == GL_TEXTURE_3D) {
- base_layer = 0;
- num_layers = minify(mt->surf.logical_level0_px.depth, u->Level);
- } else {
- assert(obj->Immutable || obj->Attrib.MinLayer == 0);
- base_layer = obj->Attrib.MinLayer;
- num_layers = obj->Immutable ?
- obj->Attrib.NumLayers :
- mt->surf.logical_level0_px.array_len;
- }
- } else {
- base_layer = obj->Attrib.MinLayer + u->_Layer;
- num_layers = 1;
- }
-
- struct isl_view view = {
- .format = format,
- .base_level = obj->Attrib.MinLevel + u->Level,
- .levels = 1,
- .base_array_layer = base_layer,
- .array_len = num_layers,
- .swizzle = ISL_SWIZZLE_IDENTITY,
- .usage = ISL_SURF_USAGE_STORAGE_BIT,
- };
-
- if (format == ISL_FORMAT_RAW) {
- brw_emit_buffer_surface_state(
- brw, surf_offset, mt->bo, mt->offset,
- format, mt->bo->size - mt->offset, 1 /* pitch */,
- written ? RELOC_WRITE : 0);
-
- } else {
- const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
- assert(!brw_miptree_has_color_unresolved(mt,
- view.base_level, 1,
- view.base_array_layer,
- view.array_len));
- brw_emit_surface_state(brw, mt, mt->target, view,
- ISL_AUX_USAGE_NONE,
- surf_offset, surf_index,
- written ? RELOC_WRITE : 0);
- }
-
- isl_surf_fill_image_param(&brw->isl_dev, param, &mt->surf, &view);
- }
-
- } else {
- emit_null_surface_state(brw, NULL, surf_offset);
- update_default_image_param(brw, u, param);
- }
-}
-
-void
-brw_upload_image_surfaces(struct brw_context *brw,
- const struct gl_program *prog,
- struct brw_stage_state *stage_state,
- struct brw_stage_prog_data *prog_data)
-{
- assert(prog);
- struct gl_context *ctx = &brw->ctx;
-
- if (prog->info.num_images) {
- for (unsigned i = 0; i < prog->info.num_images; i++) {
- struct gl_image_unit *u = &ctx->ImageUnits[prog->sh.ImageUnits[i]];
- const unsigned surf_idx = prog_data->binding_table.image_start + i;
-
- update_image_surface(brw, u, prog->sh.ImageAccess[i],
- &stage_state->surf_offset[surf_idx],
- &stage_state->image_param[i]);
- }
-
- brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
- /* This may have changed the image metadata dependent on the context
- * image unit state and passed to the program as uniforms, make sure
- * that push and pull constants are reuploaded.
- */
- brw->NewGLState |= _NEW_PROGRAM_CONSTANTS;
- }
-}
-
-static void
-brw_upload_wm_image_surfaces(struct brw_context *brw)
-{
- /* BRW_NEW_FRAGMENT_PROGRAM */
- const struct gl_program *wm = brw->programs[MESA_SHADER_FRAGMENT];
-
- if (wm) {
- /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
- brw_upload_image_surfaces(brw, wm, &brw->wm.base,
- brw->wm.base.prog_data);
- }
-}
-
-const struct brw_tracked_state brw_wm_image_surfaces = {
- .dirty = {
- .mesa = _NEW_TEXTURE,
- .brw = BRW_NEW_BATCH |
- BRW_NEW_AUX_STATE |
- BRW_NEW_FRAGMENT_PROGRAM |
- BRW_NEW_FS_PROG_DATA |
- BRW_NEW_IMAGE_UNITS
- },
- .emit = brw_upload_wm_image_surfaces,
-};
-
-static void
-brw_upload_cs_work_groups_surface(struct brw_context *brw)
-{
- struct gl_context *ctx = &brw->ctx;
- /* _NEW_PROGRAM */
- struct gl_program *prog =
- ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
- /* BRW_NEW_CS_PROG_DATA */
- const struct brw_cs_prog_data *cs_prog_data =
- brw_cs_prog_data(brw->cs.base.prog_data);
-
- if (prog && cs_prog_data->uses_num_work_groups) {
- const unsigned surf_idx =
- cs_prog_data->binding_table.work_groups_start;
- uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx];
- struct brw_bo *bo;
- uint32_t bo_offset;
-
- if (brw->compute.num_work_groups_bo == NULL) {
- bo = NULL;
- brw_upload_data(&brw->upload,
- (void *)brw->compute.num_work_groups,
- 3 * sizeof(GLuint),
- sizeof(GLuint),
- &bo,
- &bo_offset);
- } else {
- bo = brw->compute.num_work_groups_bo;
- bo_offset = brw->compute.num_work_groups_offset;
- }
-
- brw_emit_buffer_surface_state(brw, surf_offset,
- bo, bo_offset,
- ISL_FORMAT_RAW,
- 3 * sizeof(GLuint), 1,
- RELOC_WRITE);
-
- /* The state buffer now holds a reference to our upload, drop ours. */
- if (bo != brw->compute.num_work_groups_bo)
- brw_bo_unreference(bo);
-
- brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
- }
-}
-
-const struct brw_tracked_state brw_cs_work_groups_surface = {
- .dirty = {
- .brw = BRW_NEW_CS_PROG_DATA |
- BRW_NEW_CS_WORK_GROUPS
- },
- .emit = brw_upload_cs_work_groups_surface,
-};
+++ /dev/null
-/*
- * Copyright © 2011 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include <assert.h>
-
-#include "brw_batch.h"
-#include "brw_mipmap_tree.h"
-#include "brw_fbo.h"
-
-#include "brw_context.h"
-#include "brw_state.h"
-
-#include "blorp/blorp_genX_exec.h"
-
-#if GFX_VER <= 5
-#include "gfx4_blorp_exec.h"
-#endif
-
-#include "brw_blorp.h"
-
-static void blorp_measure_start(struct blorp_batch *batch,
- const struct blorp_params *params) { }
-
-static void *
-blorp_emit_dwords(struct blorp_batch *batch, unsigned n)
-{
- assert(batch->blorp->driver_ctx == batch->driver_batch);
- struct brw_context *brw = batch->driver_batch;
-
- brw_batch_begin(brw, n);
- uint32_t *map = brw->batch.map_next;
- brw->batch.map_next += n;
- brw_batch_advance(brw);
- return map;
-}
-
-static uint64_t
-blorp_emit_reloc(struct blorp_batch *batch,
- void *location, struct blorp_address address, uint32_t delta)
-{
- assert(batch->blorp->driver_ctx == batch->driver_batch);
- struct brw_context *brw = batch->driver_batch;
- uint32_t offset;
-
- if (GFX_VER < 6 && brw_ptr_in_state_buffer(&brw->batch, location)) {
- offset = (char *)location - (char *)brw->batch.state.map;
- return brw_state_reloc(&brw->batch, offset,
- address.buffer, address.offset + delta,
- address.reloc_flags);
- }
-
- assert(!brw_ptr_in_state_buffer(&brw->batch, location));
-
- offset = (char *)location - (char *)brw->batch.batch.map;
- return brw_batch_reloc(&brw->batch, offset,
- address.buffer, address.offset + delta,
- address.reloc_flags);
-}
-
-static void
-blorp_surface_reloc(struct blorp_batch *batch, uint32_t ss_offset,
- struct blorp_address address, uint32_t delta)
-{
- assert(batch->blorp->driver_ctx == batch->driver_batch);
- struct brw_context *brw = batch->driver_batch;
- struct brw_bo *bo = address.buffer;
-
- uint64_t reloc_val =
- brw_state_reloc(&brw->batch, ss_offset, bo, address.offset + delta,
- address.reloc_flags);
-
- void *reloc_ptr = (void *)brw->batch.state.map + ss_offset;
-#if GFX_VER >= 8
- *(uint64_t *)reloc_ptr = reloc_val;
-#else
- *(uint32_t *)reloc_ptr = reloc_val;
-#endif
-}
-
-static uint64_t
-blorp_get_surface_address(UNUSED struct blorp_batch *blorp_batch,
- UNUSED struct blorp_address address)
-{
- /* We'll let blorp_surface_reloc write the address. */
- return 0ull;
-}
-
-#if GFX_VER >= 7 && GFX_VER < 10
-static struct blorp_address
-blorp_get_surface_base_address(struct blorp_batch *batch)
-{
- assert(batch->blorp->driver_ctx == batch->driver_batch);
- struct brw_context *brw = batch->driver_batch;
- return (struct blorp_address) {
- .buffer = brw->batch.state.bo,
- .offset = 0,
- };
-}
-#endif
-
-static void *
-blorp_alloc_dynamic_state(struct blorp_batch *batch,
- uint32_t size,
- uint32_t alignment,
- uint32_t *offset)
-{
- assert(batch->blorp->driver_ctx == batch->driver_batch);
- struct brw_context *brw = batch->driver_batch;
-
- return brw_state_batch(brw, size, alignment, offset);
-}
-
-UNUSED static void *
-blorp_alloc_general_state(struct blorp_batch *blorp_batch,
- uint32_t size,
- uint32_t alignment,
- uint32_t *offset)
-{
- /* Use dynamic state range for general state on i965. */
- return blorp_alloc_dynamic_state(blorp_batch, size, alignment, offset);
-}
-
-static void
-blorp_alloc_binding_table(struct blorp_batch *batch, unsigned num_entries,
- unsigned state_size, unsigned state_alignment,
- uint32_t *bt_offset, uint32_t *surface_offsets,
- void **surface_maps)
-{
- assert(batch->blorp->driver_ctx == batch->driver_batch);
- struct brw_context *brw = batch->driver_batch;
-
- uint32_t *bt_map = brw_state_batch(brw,
- num_entries * sizeof(uint32_t), 32,
- bt_offset);
-
- for (unsigned i = 0; i < num_entries; i++) {
- surface_maps[i] = brw_state_batch(brw,
- state_size, state_alignment,
- &(surface_offsets)[i]);
- bt_map[i] = surface_offsets[i];
- }
-}
-
-static void *
-blorp_alloc_vertex_buffer(struct blorp_batch *batch, uint32_t size,
- struct blorp_address *addr)
-{
- assert(batch->blorp->driver_ctx == batch->driver_batch);
- struct brw_context *brw = batch->driver_batch;
-
- /* From the Skylake PRM, 3DSTATE_VERTEX_BUFFERS:
- *
- * "The VF cache needs to be invalidated before binding and then using
- * Vertex Buffers that overlap with any previously bound Vertex Buffer
- * (at a 64B granularity) since the last invalidation. A VF cache
- * invalidate is performed by setting the "VF Cache Invalidation Enable"
- * bit in PIPE_CONTROL."
- *
- * This restriction first appears in the Skylake PRM but the internal docs
- * also list it as being an issue on Broadwell. In order to avoid this
- * problem, we align all vertex buffer allocations to 64 bytes.
- */
- uint32_t offset;
- void *data = brw_state_batch(brw, size, 64, &offset);
-
- *addr = (struct blorp_address) {
- .buffer = brw->batch.state.bo,
- .offset = offset,
-
- /* The VF cache designers apparently cut corners, and made the cache
- * only consider the bottom 32 bits of memory addresses. If you happen
- * to have two vertex buffers which get placed exactly 4 GiB apart and
- * use them in back-to-back draw calls, you can get collisions. To work
- * around this problem, we restrict vertex buffers to the low 32 bits of
- * the address space.
- */
- .reloc_flags = RELOC_32BIT,
-
- .mocs = brw_mocs(&brw->isl_dev, brw->batch.state.bo),
- };
-
- return data;
-}
-
-/**
- * See vf_invalidate_for_vb_48b_transitions in genX_state_upload.c.
- */
-static void
-blorp_vf_invalidate_for_vb_48b_transitions(UNUSED struct blorp_batch *batch,
- UNUSED const struct blorp_address *addrs,
- UNUSED uint32_t *sizes,
- UNUSED unsigned num_vbs)
-{
-#if GFX_VER >= 8 && GFX_VER < 11
- struct brw_context *brw = batch->driver_batch;
- bool need_invalidate = false;
-
- for (unsigned i = 0; i < num_vbs; i++) {
- struct brw_bo *bo = addrs[i].buffer;
- uint16_t high_bits =
- bo && (bo->kflags & EXEC_OBJECT_PINNED) ? bo->gtt_offset >> 32u : 0;
-
- if (high_bits != brw->vb.last_bo_high_bits[i]) {
- need_invalidate = true;
- brw->vb.last_bo_high_bits[i] = high_bits;
- }
- }
-
- if (need_invalidate) {
- brw_emit_pipe_control_flush(brw, PIPE_CONTROL_VF_CACHE_INVALIDATE | PIPE_CONTROL_CS_STALL);
- }
-#endif
-}
-
-UNUSED static struct blorp_address
-blorp_get_workaround_address(struct blorp_batch *batch)
-{
- assert(batch->blorp->driver_ctx == batch->driver_batch);
- struct brw_context *brw = batch->driver_batch;
-
- return (struct blorp_address) {
- .buffer = brw->workaround_bo,
- .offset = brw->workaround_bo_offset,
- };
-}
-
-static void
-blorp_flush_range(UNUSED struct blorp_batch *batch, UNUSED void *start,
- UNUSED size_t size)
-{
- /* All allocated states come from the batch which we will flush before we
- * submit it. There's nothing for us to do here.
- */
-}
-
-#if GFX_VER >= 7
-static const struct intel_l3_config *
-blorp_get_l3_config(struct blorp_batch *batch)
-{
- assert(batch->blorp->driver_ctx == batch->driver_batch);
- struct brw_context *brw = batch->driver_batch;
-
- return brw->l3.config;
-}
-#else /* GFX_VER < 7 */
-static void
-blorp_emit_urb_config(struct blorp_batch *batch,
- unsigned vs_entry_size,
- UNUSED unsigned sf_entry_size)
-{
- assert(batch->blorp->driver_ctx == batch->driver_batch);
- struct brw_context *brw = batch->driver_batch;
-
-#if GFX_VER == 6
- gfx6_upload_urb(brw, vs_entry_size, false, 0);
-#else
- /* We calculate it now and emit later. */
- brw_calculate_urb_fence(brw, 0, vs_entry_size, sf_entry_size);
-#endif
-}
-#endif
-
-void
-genX(blorp_exec)(struct blorp_batch *batch,
- const struct blorp_params *params)
-{
- assert(batch->blorp->driver_ctx == batch->driver_batch);
- struct brw_context *brw = batch->driver_batch;
- struct gl_context *ctx = &brw->ctx;
- bool check_aperture_failed_once = false;
-
-#if GFX_VER >= 11
- /* The PIPE_CONTROL command description says:
- *
- * "Whenever a Binding Table Index (BTI) used by a Render Taget Message
- * points to a different RENDER_SURFACE_STATE, SW must issue a Render
- * Target Cache Flush by enabling this bit. When render target flush
- * is set due to new association of BTI, PS Scoreboard Stall bit must
- * be set in this packet."
- */
- brw_emit_pipe_control_flush(brw,
- PIPE_CONTROL_RENDER_TARGET_FLUSH |
- PIPE_CONTROL_STALL_AT_SCOREBOARD);
-#endif
-
- /* Flush the sampler and render caches. We definitely need to flush the
- * sampler cache so that we get updated contents from the render cache for
- * the glBlitFramebuffer() source. Also, we are sometimes warned in the
- * docs to flush the cache between reinterpretations of the same surface
- * data with different formats, which blorp does for stencil and depth
- * data.
- */
- if (params->src.enabled)
- brw_cache_flush_for_read(brw, params->src.addr.buffer);
- if (params->dst.enabled) {
- brw_cache_flush_for_render(brw, params->dst.addr.buffer,
- params->dst.view.format,
- params->dst.aux_usage);
- }
- if (params->depth.enabled)
- brw_cache_flush_for_depth(brw, params->depth.addr.buffer);
- if (params->stencil.enabled)
- brw_cache_flush_for_depth(brw, params->stencil.addr.buffer);
-
- brw_select_pipeline(brw, BRW_RENDER_PIPELINE);
- brw_emit_l3_state(brw);
-
-retry:
- brw_batch_require_space(brw, 1400);
- brw_require_statebuffer_space(brw, 600);
- brw_batch_save_state(brw);
- check_aperture_failed_once |= brw_batch_saved_state_is_empty(brw);
- brw->batch.no_wrap = true;
-
-#if GFX_VER == 6
- /* Emit workaround flushes when we switch from drawing to blorping. */
- brw_emit_post_sync_nonzero_flush(brw);
-#endif
-
- brw->vtbl.emit_state_base_address(brw);
-
-#if GFX_VER >= 8
- gfx7_l3_state.emit(brw);
-#endif
-
-#if GFX_VER >= 6
- brw_emit_depth_stall_flushes(brw);
-#endif
-
-#if GFX_VER == 8
- gfx8_write_pma_stall_bits(brw, 0);
-#endif
-
- const unsigned scale = params->fast_clear_op ? UINT_MAX : 1;
- if (brw->current_hash_scale != scale) {
- brw_emit_hashing_mode(brw, params->x1 - params->x0,
- params->y1 - params->y0, scale);
- }
-
- blorp_emit(batch, GENX(3DSTATE_DRAWING_RECTANGLE), rect) {
- rect.ClippedDrawingRectangleXMax = MAX2(params->x1, params->x0) - 1;
- rect.ClippedDrawingRectangleYMax = MAX2(params->y1, params->y0) - 1;
- }
-
- blorp_exec(batch, params);
-
- brw->batch.no_wrap = false;
-
- /* Check if the blorp op we just did would make our batch likely to fail to
- * map all the BOs into the GPU at batch exec time later. If so, flush the
- * batch and try again with nothing else in the batch.
- */
- if (!brw_batch_has_aperture_space(brw, 0)) {
- if (!check_aperture_failed_once) {
- check_aperture_failed_once = true;
- brw_batch_reset_to_saved(brw);
- brw_batch_flush(brw);
- goto retry;
- } else {
- int ret = brw_batch_flush(brw);
- WARN_ONCE(ret == -ENOSPC,
- "i965: blorp emit exceeded available aperture space\n");
- }
- }
-
- if (unlikely(brw->always_flush_batch))
- brw_batch_flush(brw);
-
- /* We've smashed all state compared to what the normal 3D pipeline
- * rendering tracks for GL.
- */
- brw->ctx.NewDriverState |= BRW_NEW_BLORP;
- brw->no_depth_or_stencil = !params->depth.enabled &&
- !params->stencil.enabled;
- brw->ib.index_size = -1;
- brw->urb.vsize = 0;
- brw->urb.gs_present = false;
- brw->urb.gsize = 0;
- brw->urb.tess_present = false;
- brw->urb.hsize = 0;
- brw->urb.dsize = 0;
-
- if (params->dst.enabled) {
- brw_render_cache_add_bo(brw, params->dst.addr.buffer,
- params->dst.view.format,
- params->dst.aux_usage);
- }
- if (params->depth.enabled)
- brw_depth_cache_add_bo(brw, params->depth.addr.buffer);
- if (params->stencil.enabled)
- brw_depth_cache_add_bo(brw, params->stencil.addr.buffer);
-}
+++ /dev/null
-/*
- * Copyright © 2018 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef GENX_BOILERPLATE_H
-#define GENX_BOILERPLATE_H
-
-#include <assert.h>
-
-#include "genxml/gen_macros.h"
-
-#include "brw_context.h"
-#include "brw_batch.h"
-
-UNUSED static void *
-emit_dwords(struct brw_context *brw, unsigned n)
-{
- brw_batch_begin(brw, n);
- uint32_t *map = brw->batch.map_next;
- brw->batch.map_next += n;
- brw_batch_advance(brw);
- return map;
-}
-
-struct brw_address {
- struct brw_bo *bo;
- unsigned reloc_flags;
- uint32_t offset;
-};
-
-#define __gen_address_type struct brw_address
-#define __gen_user_data struct brw_context
-
-static uint64_t
-__gen_combine_address(struct brw_context *brw, void *location,
- struct brw_address address, uint32_t delta)
-{
- struct brw_batch *batch = &brw->batch;
- uint32_t offset;
-
- if (address.bo == NULL) {
- return address.offset + delta;
- } else {
- if (GFX_VER < 6 && brw_ptr_in_state_buffer(batch, location)) {
- offset = (char *) location - (char *) brw->batch.state.map;
- return brw_state_reloc(batch, offset, address.bo,
- address.offset + delta,
- address.reloc_flags);
- }
-
- assert(!brw_ptr_in_state_buffer(batch, location));
-
- offset = (char *) location - (char *) brw->batch.batch.map;
- return brw_batch_reloc(batch, offset, address.bo,
- address.offset + delta,
- address.reloc_flags);
- }
-}
-
-UNUSED static struct brw_address
-rw_bo(struct brw_bo *bo, uint32_t offset)
-{
- return (struct brw_address) {
- .bo = bo,
- .offset = offset,
- .reloc_flags = RELOC_WRITE,
- };
-}
-
-UNUSED static struct brw_address
-ro_bo(struct brw_bo *bo, uint32_t offset)
-{
- return (struct brw_address) {
- .bo = bo,
- .offset = offset,
- };
-}
-
-UNUSED static struct brw_address
-rw_32_bo(struct brw_bo *bo, uint32_t offset)
-{
- return (struct brw_address) {
- .bo = bo,
- .offset = offset,
- .reloc_flags = RELOC_WRITE | RELOC_32BIT,
- };
-}
-
-UNUSED static struct brw_address
-ro_32_bo(struct brw_bo *bo, uint32_t offset)
-{
- return (struct brw_address) {
- .bo = bo,
- .offset = offset,
- .reloc_flags = RELOC_32BIT,
- };
-}
-
-UNUSED static struct brw_address
-ggtt_bo(struct brw_bo *bo, uint32_t offset)
-{
- return (struct brw_address) {
- .bo = bo,
- .offset = offset,
- .reloc_flags = RELOC_WRITE | RELOC_NEEDS_GGTT,
- };
-}
-
-#include "genxml/genX_pack.h"
-
-#define _brw_cmd_length(cmd) cmd ## _length
-#define _brw_cmd_length_bias(cmd) cmd ## _length_bias
-#define _brw_cmd_header(cmd) cmd ## _header
-#define _brw_cmd_pack(cmd) cmd ## _pack
-
-#define brw_batch_emit(brw, cmd, name) \
- for (struct cmd name = { _brw_cmd_header(cmd) }, \
- *_dst = emit_dwords(brw, _brw_cmd_length(cmd)); \
- __builtin_expect(_dst != NULL, 1); \
- _brw_cmd_pack(cmd)(brw, (void *)_dst, &name), \
- _dst = NULL)
-
-#define brw_batch_emitn(brw, cmd, n, ...) ({ \
- uint32_t *_dw = emit_dwords(brw, n); \
- struct cmd template = { \
- _brw_cmd_header(cmd), \
- .DWordLength = n - _brw_cmd_length_bias(cmd), \
- __VA_ARGS__ \
- }; \
- _brw_cmd_pack(cmd)(brw, _dw, &template); \
- _dw + 1; /* Array starts at dw[1] */ \
- })
-
-#define brw_state_emit(brw, cmd, align, offset, name) \
- for (struct cmd name = {}, \
- *_dst = brw_state_batch(brw, _brw_cmd_length(cmd) * 4, \
- align, offset); \
- __builtin_expect(_dst != NULL, 1); \
- _brw_cmd_pack(cmd)(brw, (void *)_dst, &name), \
- _dst = NULL)
-
-#endif
+++ /dev/null
-/*
- * Copyright © 2017 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "genX_boilerplate.h"
-#include "brw_defines.h"
-#include "brw_state.h"
-
-static unsigned
-flags_to_post_sync_op(uint32_t flags)
-{
- if (flags & PIPE_CONTROL_WRITE_IMMEDIATE)
- return WriteImmediateData;
-
- if (flags & PIPE_CONTROL_WRITE_DEPTH_COUNT)
- return WritePSDepthCount;
-
- if (flags & PIPE_CONTROL_WRITE_TIMESTAMP)
- return WriteTimestamp;
-
- return 0;
-}
-
-/**
- * Do the given flags have a Post Sync or LRI Post Sync operation?
- */
-static enum pipe_control_flags
-get_post_sync_flags(enum pipe_control_flags flags)
-{
- flags &= PIPE_CONTROL_WRITE_IMMEDIATE |
- PIPE_CONTROL_WRITE_DEPTH_COUNT |
- PIPE_CONTROL_WRITE_TIMESTAMP |
- PIPE_CONTROL_LRI_POST_SYNC_OP;
-
- /* Only one "Post Sync Op" is allowed, and it's mutually exclusive with
- * "LRI Post Sync Operation". So more than one bit set would be illegal.
- */
- assert(util_bitcount(flags) <= 1);
-
- return flags;
-}
-
-#define IS_COMPUTE_PIPELINE(brw) \
- (GFX_VER >= 7 && brw->last_pipeline == BRW_COMPUTE_PIPELINE)
-
-/* Closed interval - GFX_VER \in [x, y] */
-#define IS_GFX_VER_BETWEEN(x, y) (GFX_VER >= x && GFX_VER <= y)
-#define IS_GFX_VERx10_BETWEEN(x, y) \
- (GFX_VERx10 >= x && GFX_VERx10 <= y)
-
-/**
- * Emit a series of PIPE_CONTROL commands, taking into account any
- * workarounds necessary to actually accomplish the caller's request.
- *
- * Unless otherwise noted, spec quotations in this function come from:
- *
- * Synchronization of the 3D Pipeline > PIPE_CONTROL Command > Programming
- * Restrictions for PIPE_CONTROL.
- *
- * You should not use this function directly. Use the helpers in
- * brw_pipe_control.c instead, which may split the pipe control further.
- */
-void
-genX(emit_raw_pipe_control)(struct brw_context *brw, uint32_t flags,
- struct brw_bo *bo, uint32_t offset, uint64_t imm)
-{
- UNUSED const struct intel_device_info *devinfo = &brw->screen->devinfo;
- enum pipe_control_flags post_sync_flags = get_post_sync_flags(flags);
- enum pipe_control_flags non_lri_post_sync_flags =
- post_sync_flags & ~PIPE_CONTROL_LRI_POST_SYNC_OP;
-
- /* Recursive PIPE_CONTROL workarounds --------------------------------
- * (http://knowyourmeme.com/memes/xzibit-yo-dawg)
- *
- * We do these first because we want to look at the original operation,
- * rather than any workarounds we set.
- */
- if (GFX_VER == 6 && (flags & PIPE_CONTROL_RENDER_TARGET_FLUSH)) {
- /* Hardware workaround: SNB B-Spec says:
- *
- * "[Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush
- * Enable = 1, a PIPE_CONTROL with any non-zero post-sync-op is
- * required."
- */
- brw_emit_post_sync_nonzero_flush(brw);
- }
-
- if (GFX_VER == 9 && (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE)) {
- /* The PIPE_CONTROL "VF Cache Invalidation Enable" bit description
- * lists several workarounds:
- *
- * "Project: SKL, KBL, BXT
- *
- * If the VF Cache Invalidation Enable is set to a 1 in a
- * PIPE_CONTROL, a separate Null PIPE_CONTROL, all bitfields
- * sets to 0, with the VF Cache Invalidation Enable set to 0
- * needs to be sent prior to the PIPE_CONTROL with VF Cache
- * Invalidation Enable set to a 1."
- */
- genX(emit_raw_pipe_control)(brw, 0, NULL, 0, 0);
- }
-
- if (GFX_VER == 9 && IS_COMPUTE_PIPELINE(brw) && post_sync_flags) {
- /* Project: SKL / Argument: LRI Post Sync Operation [23]
- *
- * "PIPECONTROL command with “Command Streamer Stall Enable” must be
- * programmed prior to programming a PIPECONTROL command with "LRI
- * Post Sync Operation" in GPGPU mode of operation (i.e when
- * PIPELINE_SELECT command is set to GPGPU mode of operation)."
- *
- * The same text exists a few rows below for Post Sync Op.
- */
- genX(emit_raw_pipe_control)(brw, PIPE_CONTROL_CS_STALL, NULL, 0, 0);
- }
-
- /* "Flush Types" workarounds ---------------------------------------------
- * We do these now because they may add post-sync operations or CS stalls.
- */
-
- if (IS_GFX_VER_BETWEEN(8, 10) &&
- (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE)) {
- /* Project: BDW, SKL+ (stopping at CNL) / Argument: VF Invalidate
- *
- * "'Post Sync Operation' must be enabled to 'Write Immediate Data' or
- * 'Write PS Depth Count' or 'Write Timestamp'."
- */
- if (!bo) {
- flags |= PIPE_CONTROL_WRITE_IMMEDIATE;
- post_sync_flags |= PIPE_CONTROL_WRITE_IMMEDIATE;
- non_lri_post_sync_flags |= PIPE_CONTROL_WRITE_IMMEDIATE;
- bo = brw->workaround_bo;
- offset = brw->workaround_bo_offset;
- }
- }
-
- if (GFX_VERx10 < 75 && (flags & PIPE_CONTROL_DEPTH_STALL)) {
- /* Project: PRE-HSW / Argument: Depth Stall
- *
- * "The following bits must be clear:
- * - Render Target Cache Flush Enable ([12] of DW1)
- * - Depth Cache Flush Enable ([0] of DW1)"
- */
- assert(!(flags & (PIPE_CONTROL_RENDER_TARGET_FLUSH |
- PIPE_CONTROL_DEPTH_CACHE_FLUSH)));
- }
-
- if (GFX_VER >= 6 && (flags & PIPE_CONTROL_DEPTH_STALL)) {
- /* From the PIPE_CONTROL instruction table, bit 13 (Depth Stall Enable):
- *
- * "This bit must be DISABLED for operations other than writing
- * PS_DEPTH_COUNT."
- *
- * This seems like nonsense. An Ivybridge workaround requires us to
- * emit a PIPE_CONTROL with a depth stall and write immediate post-sync
- * operation. Gfx8+ requires us to emit depth stalls and depth cache
- * flushes together. So, it's hard to imagine this means anything other
- * than "we originally intended this to be used for PS_DEPTH_COUNT".
- *
- * We ignore the supposed restriction and do nothing.
- */
- }
-
- if (GFX_VERx10 < 75 && (flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH)) {
- /* Project: PRE-HSW / Argument: Depth Cache Flush
- *
- * "Depth Stall must be clear ([13] of DW1)."
- */
- assert(!(flags & PIPE_CONTROL_DEPTH_STALL));
- }
-
- if (flags & (PIPE_CONTROL_RENDER_TARGET_FLUSH |
- PIPE_CONTROL_STALL_AT_SCOREBOARD)) {
- /* From the PIPE_CONTROL instruction table, bit 12 and bit 1:
- *
- * "This bit must be DISABLED for End-of-pipe (Read) fences,
- * PS_DEPTH_COUNT or TIMESTAMP queries."
- *
- * TODO: Implement end-of-pipe checking.
- */
- assert(!(post_sync_flags & (PIPE_CONTROL_WRITE_DEPTH_COUNT |
- PIPE_CONTROL_WRITE_TIMESTAMP)));
- }
-
- if (GFX_VER < 11 && (flags & PIPE_CONTROL_STALL_AT_SCOREBOARD)) {
- /* From the PIPE_CONTROL instruction table, bit 1:
- *
- * "This bit is ignored if Depth Stall Enable is set.
- * Further, the render cache is not flushed even if Write Cache
- * Flush Enable bit is set."
- *
- * We assert that the caller doesn't do this combination, to try and
- * prevent mistakes. It shouldn't hurt the GPU, though.
- *
- * We skip this check on Gfx11+ as the "Stall and Pixel Scoreboard"
- * and "Render Target Flush" combo is explicitly required for BTI
- * update workarounds.
- */
- assert(!(flags & (PIPE_CONTROL_DEPTH_STALL |
- PIPE_CONTROL_RENDER_TARGET_FLUSH)));
- }
-
- /* PIPE_CONTROL page workarounds ------------------------------------- */
-
- if (IS_GFX_VER_BETWEEN(7, 8) &&
- (flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE)) {
- /* From the PIPE_CONTROL page itself:
- *
- * "IVB, HSW, BDW
- * Restriction: Pipe_control with CS-stall bit set must be issued
- * before a pipe-control command that has the State Cache
- * Invalidate bit set."
- */
- flags |= PIPE_CONTROL_CS_STALL;
- }
-
- if (GFX_VERx10 == 75) {
- /* From the PIPE_CONTROL page itself:
- *
- * "HSW - Programming Note: PIPECONTROL with RO Cache Invalidation:
- * Prior to programming a PIPECONTROL command with any of the RO
- * cache invalidation bit set, program a PIPECONTROL flush command
- * with “CS stall” bit and “HDC Flush” bit set."
- *
- * TODO: Actually implement this. What's an HDC Flush?
- */
- }
-
- if (flags & PIPE_CONTROL_FLUSH_LLC) {
- /* From the PIPE_CONTROL instruction table, bit 26 (Flush LLC):
- *
- * "Project: ALL
- * SW must always program Post-Sync Operation to "Write Immediate
- * Data" when Flush LLC is set."
- *
- * For now, we just require the caller to do it.
- */
- assert(flags & PIPE_CONTROL_WRITE_IMMEDIATE);
- }
-
- /* "Post-Sync Operation" workarounds -------------------------------- */
-
- /* Project: All / Argument: Global Snapshot Count Reset [19]
- *
- * "This bit must not be exercised on any product.
- * Requires stall bit ([20] of DW1) set."
- *
- * We don't use this, so we just assert that it isn't used. The
- * PIPE_CONTROL instruction page indicates that they intended this
- * as a debug feature and don't think it is useful in production,
- * but it may actually be usable, should we ever want to.
- */
- assert((flags & PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET) == 0);
-
- if (flags & (PIPE_CONTROL_MEDIA_STATE_CLEAR |
- PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE)) {
- /* Project: All / Arguments:
- *
- * - Generic Media State Clear [16]
- * - Indirect State Pointers Disable [16]
- *
- * "Requires stall bit ([20] of DW1) set."
- *
- * Also, the PIPE_CONTROL instruction table, bit 16 (Generic Media
- * State Clear) says:
- *
- * "PIPECONTROL command with “Command Streamer Stall Enable” must be
- * programmed prior to programming a PIPECONTROL command with "Media
- * State Clear" set in GPGPU mode of operation"
- *
- * This is a subset of the earlier rule, so there's nothing to do.
- */
- flags |= PIPE_CONTROL_CS_STALL;
- }
-
- if (flags & PIPE_CONTROL_STORE_DATA_INDEX) {
- /* Project: All / Argument: Store Data Index
- *
- * "Post-Sync Operation ([15:14] of DW1) must be set to something other
- * than '0'."
- *
- * For now, we just assert that the caller does this. We might want to
- * automatically add a write to the workaround BO...
- */
- assert(non_lri_post_sync_flags != 0);
- }
-
- if (flags & PIPE_CONTROL_SYNC_GFDT) {
- /* Project: All / Argument: Sync GFDT
- *
- * "Post-Sync Operation ([15:14] of DW1) must be set to something other
- * than '0' or 0x2520[13] must be set."
- *
- * For now, we just assert that the caller does this.
- */
- assert(non_lri_post_sync_flags != 0);
- }
-
- if (IS_GFX_VERx10_BETWEEN(60, 75) &&
- (flags & PIPE_CONTROL_TLB_INVALIDATE)) {
- /* Project: SNB, IVB, HSW / Argument: TLB inv
- *
- * "{All SKUs}{All Steppings}: Post-Sync Operation ([15:14] of DW1)
- * must be set to something other than '0'."
- *
- * For now, we just assert that the caller does this.
- */
- assert(non_lri_post_sync_flags != 0);
- }
-
- if (GFX_VER >= 7 && (flags & PIPE_CONTROL_TLB_INVALIDATE)) {
- /* Project: IVB+ / Argument: TLB inv
- *
- * "Requires stall bit ([20] of DW1) set."
- *
- * Also, from the PIPE_CONTROL instruction table:
- *
- * "Project: SKL+
- * Post Sync Operation or CS stall must be set to ensure a TLB
- * invalidation occurs. Otherwise no cycle will occur to the TLB
- * cache to invalidate."
- *
- * This is not a subset of the earlier rule, so there's nothing to do.
- */
- flags |= PIPE_CONTROL_CS_STALL;
- }
-
- if (GFX_VER == 9 && devinfo->gt == 4) {
- /* TODO: The big Skylake GT4 post sync op workaround */
- }
-
- /* "GPGPU specific workarounds" (both post-sync and flush) ------------ */
-
- if (IS_COMPUTE_PIPELINE(brw)) {
- if (GFX_VER >= 9 && (flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE)) {
- /* Project: SKL+ / Argument: Tex Invalidate
- * "Requires stall bit ([20] of DW) set for all GPGPU Workloads."
- */
- flags |= PIPE_CONTROL_CS_STALL;
- }
-
- if (GFX_VER == 8 && (post_sync_flags ||
- (flags & (PIPE_CONTROL_NOTIFY_ENABLE |
- PIPE_CONTROL_DEPTH_STALL |
- PIPE_CONTROL_RENDER_TARGET_FLUSH |
- PIPE_CONTROL_DEPTH_CACHE_FLUSH |
- PIPE_CONTROL_DATA_CACHE_FLUSH)))) {
- /* Project: BDW / Arguments:
- *
- * - LRI Post Sync Operation [23]
- * - Post Sync Op [15:14]
- * - Notify En [8]
- * - Depth Stall [13]
- * - Render Target Cache Flush [12]
- * - Depth Cache Flush [0]
- * - DC Flush Enable [5]
- *
- * "Requires stall bit ([20] of DW) set for all GPGPU and Media
- * Workloads."
- *
- * (The docs have separate table rows for each bit, with essentially
- * the same workaround text. We've combined them here.)
- */
- flags |= PIPE_CONTROL_CS_STALL;
-
- /* Also, from the PIPE_CONTROL instruction table, bit 20:
- *
- * "Project: BDW
- * This bit must be always set when PIPE_CONTROL command is
- * programmed by GPGPU and MEDIA workloads, except for the cases
- * when only Read Only Cache Invalidation bits are set (State
- * Cache Invalidation Enable, Instruction cache Invalidation
- * Enable, Texture Cache Invalidation Enable, Constant Cache
- * Invalidation Enable). This is to WA FFDOP CG issue, this WA
- * need not implemented when FF_DOP_CG is disable via "Fixed
- * Function DOP Clock Gate Disable" bit in RC_PSMI_CTRL register."
- *
- * It sounds like we could avoid CS stalls in some cases, but we
- * don't currently bother. This list isn't exactly the list above,
- * either...
- */
- }
- }
-
- /* Implement the WaCsStallAtEveryFourthPipecontrol workaround on IVB, BYT:
- *
- * "Every 4th PIPE_CONTROL command, not counting the PIPE_CONTROL with
- * only read-cache-invalidate bit(s) set, must have a CS_STALL bit set."
- *
- * Note that the kernel does CS stalls between batches, so we only need
- * to count them within a batch. We currently naively count every 4, and
- * don't skip the ones with only read-cache-invalidate bits set. This
- * may or may not be a problem...
- */
- if (GFX_VERx10 == 70) {
- if (flags & PIPE_CONTROL_CS_STALL) {
- /* If we're doing a CS stall, reset the counter and carry on. */
- brw->pipe_controls_since_last_cs_stall = 0;
- }
-
- /* If this is the fourth pipe control without a CS stall, do one now. */
- if (++brw->pipe_controls_since_last_cs_stall == 4) {
- brw->pipe_controls_since_last_cs_stall = 0;
- flags |= PIPE_CONTROL_CS_STALL;
- }
- }
-
- /* "Stall" workarounds ----------------------------------------------
- * These have to come after the earlier ones because we may have added
- * some additional CS stalls above.
- */
-
- if (GFX_VER < 9 && (flags & PIPE_CONTROL_CS_STALL)) {
- /* Project: PRE-SKL, VLV, CHV
- *
- * "[All Stepping][All SKUs]:
- *
- * One of the following must also be set:
- *
- * - Render Target Cache Flush Enable ([12] of DW1)
- * - Depth Cache Flush Enable ([0] of DW1)
- * - Stall at Pixel Scoreboard ([1] of DW1)
- * - Depth Stall ([13] of DW1)
- * - Post-Sync Operation ([13] of DW1)
- * - DC Flush Enable ([5] of DW1)"
- *
- * If we don't already have one of those bits set, we choose to add
- * "Stall at Pixel Scoreboard". Some of the other bits require a
- * CS stall as a workaround (see above), which would send us into
- * an infinite recursion of PIPE_CONTROLs. "Stall at Pixel Scoreboard"
- * appears to be safe, so we choose that.
- */
- const uint32_t wa_bits = PIPE_CONTROL_RENDER_TARGET_FLUSH |
- PIPE_CONTROL_DEPTH_CACHE_FLUSH |
- PIPE_CONTROL_WRITE_IMMEDIATE |
- PIPE_CONTROL_WRITE_DEPTH_COUNT |
- PIPE_CONTROL_WRITE_TIMESTAMP |
- PIPE_CONTROL_STALL_AT_SCOREBOARD |
- PIPE_CONTROL_DEPTH_STALL |
- PIPE_CONTROL_DATA_CACHE_FLUSH;
- if (!(flags & wa_bits))
- flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;
- }
-
- /* Emit --------------------------------------------------------------- */
-
- brw_batch_emit(brw, GENX(PIPE_CONTROL), pc) {
- #if GFX_VER >= 9
- pc.FlushLLC = 0;
- #endif
- #if GFX_VER >= 7
- pc.LRIPostSyncOperation = NoLRIOperation;
- pc.PipeControlFlushEnable = flags & PIPE_CONTROL_FLUSH_ENABLE;
- pc.DCFlushEnable = flags & PIPE_CONTROL_DATA_CACHE_FLUSH;
- #endif
- #if GFX_VER >= 6
- pc.StoreDataIndex = 0;
- pc.CommandStreamerStallEnable = flags & PIPE_CONTROL_CS_STALL;
- pc.GlobalSnapshotCountReset =
- flags & PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET;
- pc.TLBInvalidate = flags & PIPE_CONTROL_TLB_INVALIDATE;
- pc.GenericMediaStateClear = flags & PIPE_CONTROL_MEDIA_STATE_CLEAR;
- pc.StallAtPixelScoreboard = flags & PIPE_CONTROL_STALL_AT_SCOREBOARD;
- pc.RenderTargetCacheFlushEnable =
- flags & PIPE_CONTROL_RENDER_TARGET_FLUSH;
- pc.DepthCacheFlushEnable = flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH;
- pc.StateCacheInvalidationEnable =
- flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE;
- pc.VFCacheInvalidationEnable = flags & PIPE_CONTROL_VF_CACHE_INVALIDATE;
- pc.ConstantCacheInvalidationEnable =
- flags & PIPE_CONTROL_CONST_CACHE_INVALIDATE;
- #else
- pc.WriteCacheFlush = flags & PIPE_CONTROL_RENDER_TARGET_FLUSH;
- #endif
- pc.PostSyncOperation = flags_to_post_sync_op(flags);
- pc.DepthStallEnable = flags & PIPE_CONTROL_DEPTH_STALL;
- pc.InstructionCacheInvalidateEnable =
- flags & PIPE_CONTROL_INSTRUCTION_INVALIDATE;
- pc.NotifyEnable = flags & PIPE_CONTROL_NOTIFY_ENABLE;
- #if GFX_VERx10 >= 45
- pc.IndirectStatePointersDisable =
- flags & PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE;
- #endif
- #if GFX_VER >= 6
- pc.TextureCacheInvalidationEnable =
- flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
- #elif GFX_VER == 5 || GFX_VERx10 == 45
- pc.TextureCacheFlushEnable =
- flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
- #endif
- pc.Address = ggtt_bo(bo, offset);
- if (GFX_VER < 7 && bo)
- pc.DestinationAddressType = DAT_GGTT;
- pc.ImmediateData = imm;
- }
-}
+++ /dev/null
-/*
- * Copyright © 2017 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include <assert.h>
-
-#include "main/samplerobj.h"
-
-#include "dev/intel_device_info.h"
-#include "common/intel_sample_positions.h"
-#include "genxml/gen_macros.h"
-#include "common/intel_guardband.h"
-
-#include "main/bufferobj.h"
-#include "main/context.h"
-#include "main/enums.h"
-#include "main/macros.h"
-#include "main/state.h"
-
-#include "genX_boilerplate.h"
-
-#include "brw_context.h"
-#include "brw_cs.h"
-#include "brw_draw.h"
-#include "brw_multisample_state.h"
-#include "brw_state.h"
-#include "brw_wm.h"
-#include "brw_util.h"
-
-#include "brw_batch.h"
-#include "brw_buffer_objects.h"
-#include "brw_fbo.h"
-
-#include "main/enums.h"
-#include "main/fbobject.h"
-#include "main/framebuffer.h"
-#include "main/glformats.h"
-#include "main/shaderapi.h"
-#include "main/stencil.h"
-#include "main/transformfeedback.h"
-#include "main/varray.h"
-#include "main/viewport.h"
-#include "util/half_float.h"
-
-#if GFX_VER == 4
-static struct brw_address
-KSP(struct brw_context *brw, uint32_t offset)
-{
- return ro_bo(brw->cache.bo, offset);
-}
-#else
-static uint32_t
-KSP(UNUSED struct brw_context *brw, uint32_t offset)
-{
- return offset;
-}
-#endif
-
-#if GFX_VER >= 7
-static void
-emit_lrm(struct brw_context *brw, uint32_t reg, struct brw_address addr)
-{
- brw_batch_emit(brw, GENX(MI_LOAD_REGISTER_MEM), lrm) {
- lrm.RegisterAddress = reg;
- lrm.MemoryAddress = addr;
- }
-}
-#endif
-
-#if GFX_VER == 7
-static void
-emit_lri(struct brw_context *brw, uint32_t reg, uint32_t imm)
-{
- brw_batch_emit(brw, GENX(MI_LOAD_REGISTER_IMM), lri) {
- lri.RegisterOffset = reg;
- lri.DataDWord = imm;
- }
-}
-#endif
-
-/**
- * Define the base addresses which some state is referenced from.
- *
- * This allows us to avoid having to emit relocations for the objects,
- * and is actually required for binding table pointers on Gfx6.
- *
- * Surface state base address covers binding table pointers and surface state
- * objects, but not the surfaces that the surface state objects point to.
- */
-static void
-genX(emit_state_base_address)(struct brw_context *brw)
-{
- if (brw->batch.state_base_address_emitted)
- return;
-
- /* FINISHME: According to section 3.6.1 "STATE_BASE_ADDRESS" of
- * vol1a of the G45 PRM, MI_FLUSH with the ISC invalidate should be
- * programmed prior to STATE_BASE_ADDRESS.
- *
- * However, given that the instruction SBA (general state base
- * address) on this chipset is always set to 0 across X and GL,
- * maybe this isn't required for us in particular.
- */
-
- UNUSED uint32_t mocs = brw_mocs(&brw->isl_dev, NULL);
-
- /* Flush before updating STATE_BASE_ADDRESS */
-#if GFX_VER >= 6
- const unsigned dc_flush =
- GFX_VER >= 7 ? PIPE_CONTROL_DATA_CACHE_FLUSH : 0;
-
- /* Emit a render target cache flush.
- *
- * This isn't documented anywhere in the PRM. However, it seems to be
- * necessary prior to changing the surface state base adress. We've
- * seen issues in Vulkan where we get GPU hangs when using multi-level
- * command buffers which clear depth, reset state base address, and then
- * go render stuff.
- *
- * Normally, in GL, we would trust the kernel to do sufficient stalls
- * and flushes prior to executing our batch. However, it doesn't seem
- * as if the kernel's flushing is always sufficient and we don't want to
- * rely on it.
- *
- * We make this an end-of-pipe sync instead of a normal flush because we
- * do not know the current status of the GPU. On Haswell at least,
- * having a fast-clear operation in flight at the same time as a normal
- * rendering operation can cause hangs. Since the kernel's flushing is
- * insufficient, we need to ensure that any rendering operations from
- * other processes are definitely complete before we try to do our own
- * rendering. It's a bit of a big hammer but it appears to work.
- */
- brw_emit_end_of_pipe_sync(brw,
- PIPE_CONTROL_RENDER_TARGET_FLUSH |
- PIPE_CONTROL_DEPTH_CACHE_FLUSH |
- dc_flush);
-#endif
-
- brw_batch_emit(brw, GENX(STATE_BASE_ADDRESS), sba) {
- /* Set base addresses */
- sba.GeneralStateBaseAddressModifyEnable = true;
-
-#if GFX_VER >= 6
- sba.DynamicStateBaseAddressModifyEnable = true;
- sba.DynamicStateBaseAddress = ro_bo(brw->batch.state.bo, 0);
-#endif
-
- sba.SurfaceStateBaseAddressModifyEnable = true;
- sba.SurfaceStateBaseAddress = ro_bo(brw->batch.state.bo, 0);
-
- sba.IndirectObjectBaseAddressModifyEnable = true;
-
-#if GFX_VER >= 5
- sba.InstructionBaseAddressModifyEnable = true;
- sba.InstructionBaseAddress = ro_bo(brw->cache.bo, 0);
-#endif
-
- /* Set buffer sizes on Gfx8+ or upper bounds on Gfx4-7 */
-#if GFX_VER >= 8
- sba.GeneralStateBufferSize = 0xfffff;
- sba.IndirectObjectBufferSize = 0xfffff;
- sba.InstructionBufferSize = 0xfffff;
- sba.DynamicStateBufferSize = MAX_STATE_SIZE;
-
- sba.GeneralStateBufferSizeModifyEnable = true;
- sba.DynamicStateBufferSizeModifyEnable = true;
- sba.IndirectObjectBufferSizeModifyEnable = true;
- sba.InstructionBuffersizeModifyEnable = true;
-#else
- sba.GeneralStateAccessUpperBoundModifyEnable = true;
- sba.IndirectObjectAccessUpperBoundModifyEnable = true;
-
-#if GFX_VER >= 5
- sba.InstructionAccessUpperBoundModifyEnable = true;
-#endif
-
-#if GFX_VER >= 6
- /* Dynamic state upper bound. Although the documentation says that
- * programming it to zero will cause it to be ignored, that is a lie.
- * If this isn't programmed to a real bound, the sampler border color
- * pointer is rejected, causing border color to mysteriously fail.
- */
- sba.DynamicStateAccessUpperBound = ro_bo(NULL, 0xfffff000);
- sba.DynamicStateAccessUpperBoundModifyEnable = true;
-#else
- /* Same idea but using General State Base Address on Gfx4-5 */
- sba.GeneralStateAccessUpperBound = ro_bo(NULL, 0xfffff000);
-#endif
-#endif
-
-#if GFX_VER >= 6
- /* The hardware appears to pay attention to the MOCS fields even
- * if you don't set the "Address Modify Enable" bit for the base.
- */
- sba.GeneralStateMOCS = mocs;
- sba.StatelessDataPortAccessMOCS = mocs;
- sba.DynamicStateMOCS = mocs;
- sba.IndirectObjectMOCS = mocs;
- sba.InstructionMOCS = mocs;
- sba.SurfaceStateMOCS = mocs;
-#endif
-#if GFX_VER >= 9
- sba.BindlessSurfaceStateMOCS = mocs;
-#endif
-#if GFX_VER >= 11
- sba.BindlessSamplerStateMOCS = mocs;
-#endif
- }
-
- /* Flush after updating STATE_BASE_ADDRESS */
-#if GFX_VER >= 6
- brw_emit_pipe_control_flush(brw,
- PIPE_CONTROL_INSTRUCTION_INVALIDATE |
- PIPE_CONTROL_STATE_CACHE_INVALIDATE |
- PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
-#endif
-
- /* According to section 3.6.1 of VOL1 of the 965 PRM,
- * STATE_BASE_ADDRESS updates require a reissue of:
- *
- * 3DSTATE_PIPELINE_POINTERS
- * 3DSTATE_BINDING_TABLE_POINTERS
- * MEDIA_STATE_POINTERS
- *
- * and this continues through Ironlake. The Sandy Bridge PRM, vol
- * 1 part 1 says that the folowing packets must be reissued:
- *
- * 3DSTATE_CC_POINTERS
- * 3DSTATE_BINDING_TABLE_POINTERS
- * 3DSTATE_SAMPLER_STATE_POINTERS
- * 3DSTATE_VIEWPORT_STATE_POINTERS
- * MEDIA_STATE_POINTERS
- *
- * Those are always reissued following SBA updates anyway (new
- * batch time), except in the case of the program cache BO
- * changing. Having a separate state flag makes the sequence more
- * obvious.
- */
- brw->ctx.NewDriverState |= BRW_NEW_STATE_BASE_ADDRESS;
- brw->batch.state_base_address_emitted = true;
-}
-
-/**
- * Polygon stipple packet
- */
-static void
-genX(upload_polygon_stipple)(struct brw_context *brw)
-{
- struct gl_context *ctx = &brw->ctx;
-
- /* _NEW_POLYGON */
- if (!ctx->Polygon.StippleFlag)
- return;
-
- brw_batch_emit(brw, GENX(3DSTATE_POLY_STIPPLE_PATTERN), poly) {
- /* Polygon stipple is provided in OpenGL order, i.e. bottom
- * row first. If we're rendering to a window (i.e. the
- * default frame buffer object, 0), then we need to invert
- * it to match our pixel layout. But if we're rendering
- * to a FBO (i.e. any named frame buffer object), we *don't*
- * need to invert - we already match the layout.
- */
- if (ctx->DrawBuffer->FlipY) {
- for (unsigned i = 0; i < 32; i++)
- poly.PatternRow[i] = ctx->PolygonStipple[31 - i]; /* invert */
- } else {
- for (unsigned i = 0; i < 32; i++)
- poly.PatternRow[i] = ctx->PolygonStipple[i];
- }
- }
-}
-
-static const struct brw_tracked_state genX(polygon_stipple) = {
- .dirty = {
- .mesa = _NEW_POLYGON |
- _NEW_POLYGONSTIPPLE,
- .brw = BRW_NEW_CONTEXT,
- },
- .emit = genX(upload_polygon_stipple),
-};
-
-/**
- * Polygon stipple offset packet
- */
-static void
-genX(upload_polygon_stipple_offset)(struct brw_context *brw)
-{
- struct gl_context *ctx = &brw->ctx;
-
- /* _NEW_POLYGON */
- if (!ctx->Polygon.StippleFlag)
- return;
-
- brw_batch_emit(brw, GENX(3DSTATE_POLY_STIPPLE_OFFSET), poly) {
- /* _NEW_BUFFERS
- *
- * If we're drawing to a system window we have to invert the Y axis
- * in order to match the OpenGL pixel coordinate system, and our
- * offset must be matched to the window position. If we're drawing
- * to a user-created FBO then our native pixel coordinate system
- * works just fine, and there's no window system to worry about.
- */
- if (ctx->DrawBuffer->FlipY) {
- poly.PolygonStippleYOffset =
- (32 - (_mesa_geometric_height(ctx->DrawBuffer) & 31)) & 31;
- }
- }
-}
-
-static const struct brw_tracked_state genX(polygon_stipple_offset) = {
- .dirty = {
- .mesa = _NEW_BUFFERS |
- _NEW_POLYGON,
- .brw = BRW_NEW_CONTEXT,
- },
- .emit = genX(upload_polygon_stipple_offset),
-};
-
-/**
- * Line stipple packet
- */
-static void
-genX(upload_line_stipple)(struct brw_context *brw)
-{
- struct gl_context *ctx = &brw->ctx;
-
- if (!ctx->Line.StippleFlag)
- return;
-
- brw_batch_emit(brw, GENX(3DSTATE_LINE_STIPPLE), line) {
- line.LineStipplePattern = ctx->Line.StipplePattern;
-
- line.LineStippleInverseRepeatCount = 1.0f / ctx->Line.StippleFactor;
- line.LineStippleRepeatCount = ctx->Line.StippleFactor;
- }
-}
-
-static const struct brw_tracked_state genX(line_stipple) = {
- .dirty = {
- .mesa = _NEW_LINE,
- .brw = BRW_NEW_CONTEXT,
- },
- .emit = genX(upload_line_stipple),
-};
-
-/* Constant single cliprect for framebuffer object or DRI2 drawing */
-static void
-genX(upload_drawing_rect)(struct brw_context *brw)
-{
- struct gl_context *ctx = &brw->ctx;
- const struct gl_framebuffer *fb = ctx->DrawBuffer;
- const unsigned int fb_width = _mesa_geometric_width(fb);
- const unsigned int fb_height = _mesa_geometric_height(fb);
-
- brw_batch_emit(brw, GENX(3DSTATE_DRAWING_RECTANGLE), rect) {
- rect.ClippedDrawingRectangleXMax = fb_width - 1;
- rect.ClippedDrawingRectangleYMax = fb_height - 1;
- }
-}
-
-static const struct brw_tracked_state genX(drawing_rect) = {
- .dirty = {
- .mesa = _NEW_BUFFERS,
- .brw = BRW_NEW_BLORP |
- BRW_NEW_CONTEXT,
- },
- .emit = genX(upload_drawing_rect),
-};
-
-static uint32_t *
-genX(emit_vertex_buffer_state)(struct brw_context *brw,
- uint32_t *dw,
- unsigned buffer_nr,
- struct brw_bo *bo,
- unsigned start_offset,
- UNUSED unsigned end_offset,
- unsigned stride,
- UNUSED unsigned step_rate)
-{
- struct GENX(VERTEX_BUFFER_STATE) buf_state = {
- .VertexBufferIndex = buffer_nr,
- .BufferPitch = stride,
-
- /* The VF cache designers apparently cut corners, and made the cache
- * only consider the bottom 32 bits of memory addresses. If you happen
- * to have two vertex buffers which get placed exactly 4 GiB apart and
- * use them in back-to-back draw calls, you can get collisions. To work
- * around this problem, we restrict vertex buffers to the low 32 bits of
- * the address space.
- */
- .BufferStartingAddress = ro_32_bo(bo, start_offset),
-#if GFX_VER >= 8
- .BufferSize = end_offset - start_offset,
-#endif
-
-#if GFX_VER >= 7
- .AddressModifyEnable = true,
-#endif
-
-#if GFX_VER >= 6
- .MOCS = brw_mocs(&brw->isl_dev, bo),
-#endif
-
-#if GFX_VER < 8
- .BufferAccessType = step_rate ? INSTANCEDATA : VERTEXDATA,
- .InstanceDataStepRate = step_rate,
-#if GFX_VER >= 5
- .EndAddress = ro_bo(bo, end_offset - 1),
-#endif
-#endif
- };
-
- GENX(VERTEX_BUFFER_STATE_pack)(brw, dw, &buf_state);
- return dw + GENX(VERTEX_BUFFER_STATE_length);
-}
-
-UNUSED static bool
-is_passthru_format(uint32_t format)
-{
- switch (format) {
- case ISL_FORMAT_R64_PASSTHRU:
- case ISL_FORMAT_R64G64_PASSTHRU:
- case ISL_FORMAT_R64G64B64_PASSTHRU:
- case ISL_FORMAT_R64G64B64A64_PASSTHRU:
- return true;
- default:
- return false;
- }
-}
-
-UNUSED static int
-uploads_needed(uint32_t format,
- bool is_dual_slot)
-{
- if (!is_passthru_format(format))
- return 1;
-
- if (is_dual_slot)
- return 2;
-
- switch (format) {
- case ISL_FORMAT_R64_PASSTHRU:
- case ISL_FORMAT_R64G64_PASSTHRU:
- return 1;
- case ISL_FORMAT_R64G64B64_PASSTHRU:
- case ISL_FORMAT_R64G64B64A64_PASSTHRU:
- return 2;
- default:
- unreachable("not reached");
- }
-}
-
-/*
- * Returns the format that we are finally going to use when upload a vertex
- * element. It will only change if we are using *64*PASSTHRU formats, as for
- * gen < 8 they need to be splitted on two *32*FLOAT formats.
- *
- * @upload points in which upload we are. Valid values are [0,1]
- */
-static uint32_t
-downsize_format_if_needed(uint32_t format,
- int upload)
-{
- assert(upload == 0 || upload == 1);
-
- if (!is_passthru_format(format))
- return format;
-
- /* ISL_FORMAT_R64_PASSTHRU and ISL_FORMAT_R64G64_PASSTHRU with an upload ==
- * 1 means that we have been forced to do 2 uploads for a size <= 2. This
- * happens with gen < 8 and dvec3 or dvec4 vertex shader input
- * variables. In those cases, we return ISL_FORMAT_R32_FLOAT as a way of
- * flagging that we want to fill with zeroes this second forced upload.
- */
- switch (format) {
- case ISL_FORMAT_R64_PASSTHRU:
- return upload == 0 ? ISL_FORMAT_R32G32_FLOAT
- : ISL_FORMAT_R32_FLOAT;
- case ISL_FORMAT_R64G64_PASSTHRU:
- return upload == 0 ? ISL_FORMAT_R32G32B32A32_FLOAT
- : ISL_FORMAT_R32_FLOAT;
- case ISL_FORMAT_R64G64B64_PASSTHRU:
- return upload == 0 ? ISL_FORMAT_R32G32B32A32_FLOAT
- : ISL_FORMAT_R32G32_FLOAT;
- case ISL_FORMAT_R64G64B64A64_PASSTHRU:
- return ISL_FORMAT_R32G32B32A32_FLOAT;
- default:
- unreachable("not reached");
- }
-}
-
-/*
- * Returns the number of componentes associated with a format that is used on
- * a 64 to 32 format split. See downsize_format()
- */
-static int
-upload_format_size(uint32_t upload_format)
-{
- switch (upload_format) {
- case ISL_FORMAT_R32_FLOAT:
-
- /* downsized_format has returned this one in order to flag that we are
- * performing a second upload which we want to have filled with
- * zeroes. This happens with gen < 8, a size <= 2, and dvec3 or dvec4
- * vertex shader input variables.
- */
-
- return 0;
- case ISL_FORMAT_R32G32_FLOAT:
- return 2;
- case ISL_FORMAT_R32G32B32A32_FLOAT:
- return 4;
- default:
- unreachable("not reached");
- }
-}
-
-static UNUSED uint16_t
-pinned_bo_high_bits(struct brw_bo *bo)
-{
- return (bo->kflags & EXEC_OBJECT_PINNED) ? bo->gtt_offset >> 32ull : 0;
-}
-
-/* The VF cache designers apparently cut corners, and made the cache key's
- * <VertexBufferIndex, Memory Address> tuple only consider the bottom 32 bits
- * of the address. If you happen to have two vertex buffers which get placed
- * exactly 4 GiB apart and use them in back-to-back draw calls, you can get
- * collisions. (These collisions can happen within a single batch.)
- *
- * In the soft-pin world, we'd like to assign addresses up front, and never
- * move buffers. So, we need to do a VF cache invalidate if the buffer for
- * a particular VB slot has different [48:32] address bits than the last one.
- *
- * In the relocation world, we have no idea what the addresses will be, so
- * we can't apply this workaround. Instead, we tell the kernel to move it
- * to the low 4GB regardless.
- *
- * This HW issue is gone on Gfx11+.
- */
-static void
-vf_invalidate_for_vb_48bit_transitions(UNUSED struct brw_context *brw)
-{
-#if GFX_VER >= 8 && GFX_VER < 11
- bool need_invalidate = false;
-
- for (unsigned i = 0; i < brw->vb.nr_buffers; i++) {
- uint16_t high_bits = pinned_bo_high_bits(brw->vb.buffers[i].bo);
-
- if (high_bits != brw->vb.last_bo_high_bits[i]) {
- need_invalidate = true;
- brw->vb.last_bo_high_bits[i] = high_bits;
- }
- }
-
- if (brw->draw.draw_params_bo) {
- uint16_t high_bits = pinned_bo_high_bits(brw->draw.draw_params_bo);
-
- if (brw->vb.last_bo_high_bits[brw->vb.nr_buffers] != high_bits) {
- need_invalidate = true;
- brw->vb.last_bo_high_bits[brw->vb.nr_buffers] = high_bits;
- }
- }
-
- if (brw->draw.derived_draw_params_bo) {
- uint16_t high_bits = pinned_bo_high_bits(brw->draw.derived_draw_params_bo);
-
- if (brw->vb.last_bo_high_bits[brw->vb.nr_buffers + 1] != high_bits) {
- need_invalidate = true;
- brw->vb.last_bo_high_bits[brw->vb.nr_buffers + 1] = high_bits;
- }
- }
-
- if (need_invalidate) {
- brw_emit_pipe_control_flush(brw, PIPE_CONTROL_VF_CACHE_INVALIDATE | PIPE_CONTROL_CS_STALL);
- }
-#endif
-}
-
-static void
-vf_invalidate_for_ib_48bit_transition(UNUSED struct brw_context *brw)
-{
-#if GFX_VER >= 8
- uint16_t high_bits = pinned_bo_high_bits(brw->ib.bo);
-
- if (high_bits != brw->ib.last_bo_high_bits) {
- brw_emit_pipe_control_flush(brw, PIPE_CONTROL_VF_CACHE_INVALIDATE);
- brw->ib.last_bo_high_bits = high_bits;
- }
-#endif
-}
-
-static void
-genX(emit_vertices)(struct brw_context *brw)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- uint32_t *dw;
-
- brw_prepare_vertices(brw);
- brw_prepare_shader_draw_parameters(brw);
-
-#if GFX_VER < 6
- brw_emit_query_begin(brw);
-#endif
-
- const struct brw_vs_prog_data *vs_prog_data =
- brw_vs_prog_data(brw->vs.base.prog_data);
-
-#if GFX_VER >= 8
- struct gl_context *ctx = &brw->ctx;
- const bool uses_edge_flag = (ctx->Polygon.FrontMode != GL_FILL ||
- ctx->Polygon.BackMode != GL_FILL);
-
- if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid) {
- unsigned vue = brw->vb.nr_enabled;
-
- /* The element for the edge flags must always be last, so we have to
- * insert the SGVS before it in that case.
- */
- if (uses_edge_flag) {
- assert(vue > 0);
- vue--;
- }
-
- WARN_ONCE(vue >= 33,
- "Trying to insert VID/IID past 33rd vertex element, "
- "need to reorder the vertex attrbutes.");
-
- brw_batch_emit(brw, GENX(3DSTATE_VF_SGVS), vfs) {
- if (vs_prog_data->uses_vertexid) {
- vfs.VertexIDEnable = true;
- vfs.VertexIDComponentNumber = 2;
- vfs.VertexIDElementOffset = vue;
- }
-
- if (vs_prog_data->uses_instanceid) {
- vfs.InstanceIDEnable = true;
- vfs.InstanceIDComponentNumber = 3;
- vfs.InstanceIDElementOffset = vue;
- }
- }
-
- brw_batch_emit(brw, GENX(3DSTATE_VF_INSTANCING), vfi) {
- vfi.InstancingEnable = true;
- vfi.VertexElementIndex = vue;
- }
- } else {
- brw_batch_emit(brw, GENX(3DSTATE_VF_SGVS), vfs);
- }
-#endif
-
- const bool uses_draw_params =
- vs_prog_data->uses_firstvertex ||
- vs_prog_data->uses_baseinstance;
-
- const bool uses_derived_draw_params =
- vs_prog_data->uses_drawid ||
- vs_prog_data->uses_is_indexed_draw;
-
- const bool needs_sgvs_element = (uses_draw_params ||
- vs_prog_data->uses_instanceid ||
- vs_prog_data->uses_vertexid);
-
- unsigned nr_elements =
- brw->vb.nr_enabled + needs_sgvs_element + uses_derived_draw_params;
-
-#if GFX_VER < 8
- /* If any of the formats of vb.enabled needs more that one upload, we need
- * to add it to nr_elements
- */
- for (unsigned i = 0; i < brw->vb.nr_enabled; i++) {
- struct brw_vertex_element *input = brw->vb.enabled[i];
- uint32_t format = brw_get_vertex_surface_type(brw, input->glformat);
-
- if (uploads_needed(format, input->is_dual_slot) > 1)
- nr_elements++;
- }
-#endif
-
- /* If the VS doesn't read any inputs (calculating vertex position from
- * a state variable for some reason, for example), emit a single pad
- * VERTEX_ELEMENT struct and bail.
- *
- * The stale VB state stays in place, but they don't do anything unless
- * a VE loads from them.
- */
- if (nr_elements == 0) {
- dw = brw_batch_emitn(brw, GENX(3DSTATE_VERTEX_ELEMENTS),
- 1 + GENX(VERTEX_ELEMENT_STATE_length));
- struct GENX(VERTEX_ELEMENT_STATE) elem = {
- .Valid = true,
- .SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT,
- .Component0Control = VFCOMP_STORE_0,
- .Component1Control = VFCOMP_STORE_0,
- .Component2Control = VFCOMP_STORE_0,
- .Component3Control = VFCOMP_STORE_1_FP,
- };
- GENX(VERTEX_ELEMENT_STATE_pack)(brw, dw, &elem);
- return;
- }
-
- /* Now emit 3DSTATE_VERTEX_BUFFERS and 3DSTATE_VERTEX_ELEMENTS packets. */
- const unsigned nr_buffers = brw->vb.nr_buffers +
- uses_draw_params + uses_derived_draw_params;
-
- vf_invalidate_for_vb_48bit_transitions(brw);
-
- if (nr_buffers) {
- assert(nr_buffers <= (GFX_VER >= 6 ? 33 : 17));
-
- dw = brw_batch_emitn(brw, GENX(3DSTATE_VERTEX_BUFFERS),
- 1 + GENX(VERTEX_BUFFER_STATE_length) * nr_buffers);
-
- for (unsigned i = 0; i < brw->vb.nr_buffers; i++) {
- const struct brw_vertex_buffer *buffer = &brw->vb.buffers[i];
- /* Prior to Haswell and Bay Trail we have to use 4-component formats
- * to fake 3-component ones. In particular, we do this for
- * half-float and 8 and 16-bit integer formats. This means that the
- * vertex element may poke over the end of the buffer by 2 bytes.
- */
- const unsigned padding =
- (GFX_VERx10 < 75 && devinfo->platform != INTEL_PLATFORM_BYT) * 2;
- const unsigned end = buffer->offset + buffer->size + padding;
- dw = genX(emit_vertex_buffer_state)(brw, dw, i, buffer->bo,
- buffer->offset,
- end,
- buffer->stride,
- buffer->step_rate);
- }
-
- if (uses_draw_params) {
- dw = genX(emit_vertex_buffer_state)(brw, dw, brw->vb.nr_buffers,
- brw->draw.draw_params_bo,
- brw->draw.draw_params_offset,
- brw->draw.draw_params_bo->size,
- 0 /* stride */,
- 0 /* step rate */);
- }
-
- if (uses_derived_draw_params) {
- dw = genX(emit_vertex_buffer_state)(brw, dw, brw->vb.nr_buffers + 1,
- brw->draw.derived_draw_params_bo,
- brw->draw.derived_draw_params_offset,
- brw->draw.derived_draw_params_bo->size,
- 0 /* stride */,
- 0 /* step rate */);
- }
- }
-
- /* The hardware allows one more VERTEX_ELEMENTS than VERTEX_BUFFERS,
- * presumably for VertexID/InstanceID.
- */
-#if GFX_VER >= 6
- assert(nr_elements <= 34);
- const struct brw_vertex_element *gfx6_edgeflag_input = NULL;
-#else
- assert(nr_elements <= 18);
-#endif
-
- dw = brw_batch_emitn(brw, GENX(3DSTATE_VERTEX_ELEMENTS),
- 1 + GENX(VERTEX_ELEMENT_STATE_length) * nr_elements);
- unsigned i;
- for (i = 0; i < brw->vb.nr_enabled; i++) {
- const struct brw_vertex_element *input = brw->vb.enabled[i];
- const struct gl_vertex_format *glformat = input->glformat;
- uint32_t format = brw_get_vertex_surface_type(brw, glformat);
- uint32_t comp0 = VFCOMP_STORE_SRC;
- uint32_t comp1 = VFCOMP_STORE_SRC;
- uint32_t comp2 = VFCOMP_STORE_SRC;
- uint32_t comp3 = VFCOMP_STORE_SRC;
- const unsigned num_uploads = GFX_VER < 8 ?
- uploads_needed(format, input->is_dual_slot) : 1;
-
-#if GFX_VER >= 8
- /* From the BDW PRM, Volume 2d, page 588 (VERTEX_ELEMENT_STATE):
- * "Any SourceElementFormat of *64*_PASSTHRU cannot be used with an
- * element which has edge flag enabled."
- */
- assert(!(is_passthru_format(format) && uses_edge_flag));
-#endif
-
- /* The gfx4 driver expects edgeflag to come in as a float, and passes
- * that float on to the tests in the clipper. Mesa's current vertex
- * attribute value for EdgeFlag is stored as a float, which works out.
- * glEdgeFlagPointer, on the other hand, gives us an unnormalized
- * integer ubyte. Just rewrite that to convert to a float.
- *
- * Gfx6+ passes edgeflag as sideband along with the vertex, instead
- * of in the VUE. We have to upload it sideband as the last vertex
- * element according to the B-Spec.
- */
-#if GFX_VER >= 6
- if (input == &brw->vb.inputs[VERT_ATTRIB_EDGEFLAG]) {
- gfx6_edgeflag_input = input;
- continue;
- }
-#endif
-
- for (unsigned c = 0; c < num_uploads; c++) {
- const uint32_t upload_format = GFX_VER >= 8 ? format :
- downsize_format_if_needed(format, c);
- /* If we need more that one upload, the offset stride would be 128
- * bits (16 bytes), as for previous uploads we are using the full
- * entry. */
- const unsigned offset = input->offset + c * 16;
-
- const int size = (GFX_VER < 8 && is_passthru_format(format)) ?
- upload_format_size(upload_format) : glformat->Size;
-
- switch (size) {
- case 0: comp0 = VFCOMP_STORE_0; FALLTHROUGH;
- case 1: comp1 = VFCOMP_STORE_0; FALLTHROUGH;
- case 2: comp2 = VFCOMP_STORE_0; FALLTHROUGH;
- case 3:
- if (GFX_VER >= 8 && glformat->Doubles) {
- comp3 = VFCOMP_STORE_0;
- } else if (glformat->Integer) {
- comp3 = VFCOMP_STORE_1_INT;
- } else {
- comp3 = VFCOMP_STORE_1_FP;
- }
-
- break;
- }
-
-#if GFX_VER >= 8
- /* From the BDW PRM, Volume 2d, page 586 (VERTEX_ELEMENT_STATE):
- *
- * "When SourceElementFormat is set to one of the *64*_PASSTHRU
- * formats, 64-bit components are stored in the URB without any
- * conversion. In this case, vertex elements must be written as 128
- * or 256 bits, with VFCOMP_STORE_0 being used to pad the output as
- * required. E.g., if R64_PASSTHRU is used to copy a 64-bit Red
- * component into the URB, Component 1 must be specified as
- * VFCOMP_STORE_0 (with Components 2,3 set to VFCOMP_NOSTORE) in
- * order to output a 128-bit vertex element, or Components 1-3 must
- * be specified as VFCOMP_STORE_0 in order to output a 256-bit vertex
- * element. Likewise, use of R64G64B64_PASSTHRU requires Component 3
- * to be specified as VFCOMP_STORE_0 in order to output a 256-bit
- * vertex element."
- */
- if (glformat->Doubles && !input->is_dual_slot) {
- /* Store vertex elements which correspond to double and dvec2 vertex
- * shader inputs as 128-bit vertex elements, instead of 256-bits.
- */
- comp2 = VFCOMP_NOSTORE;
- comp3 = VFCOMP_NOSTORE;
- }
-#endif
-
- struct GENX(VERTEX_ELEMENT_STATE) elem_state = {
- .VertexBufferIndex = input->buffer,
- .Valid = true,
- .SourceElementFormat = upload_format,
- .SourceElementOffset = offset,
- .Component0Control = comp0,
- .Component1Control = comp1,
- .Component2Control = comp2,
- .Component3Control = comp3,
-#if GFX_VER < 5
- .DestinationElementOffset = i * 4,
-#endif
- };
-
- GENX(VERTEX_ELEMENT_STATE_pack)(brw, dw, &elem_state);
- dw += GENX(VERTEX_ELEMENT_STATE_length);
- }
- }
-
- if (needs_sgvs_element) {
- struct GENX(VERTEX_ELEMENT_STATE) elem_state = {
- .Valid = true,
- .Component0Control = VFCOMP_STORE_0,
- .Component1Control = VFCOMP_STORE_0,
- .Component2Control = VFCOMP_STORE_0,
- .Component3Control = VFCOMP_STORE_0,
-#if GFX_VER < 5
- .DestinationElementOffset = i * 4,
-#endif
- };
-
-#if GFX_VER >= 8
- if (uses_draw_params) {
- elem_state.VertexBufferIndex = brw->vb.nr_buffers;
- elem_state.SourceElementFormat = ISL_FORMAT_R32G32_UINT;
- elem_state.Component0Control = VFCOMP_STORE_SRC;
- elem_state.Component1Control = VFCOMP_STORE_SRC;
- }
-#else
- elem_state.VertexBufferIndex = brw->vb.nr_buffers;
- elem_state.SourceElementFormat = ISL_FORMAT_R32G32_UINT;
- if (uses_draw_params) {
- elem_state.Component0Control = VFCOMP_STORE_SRC;
- elem_state.Component1Control = VFCOMP_STORE_SRC;
- }
-
- if (vs_prog_data->uses_vertexid)
- elem_state.Component2Control = VFCOMP_STORE_VID;
-
- if (vs_prog_data->uses_instanceid)
- elem_state.Component3Control = VFCOMP_STORE_IID;
-#endif
-
- GENX(VERTEX_ELEMENT_STATE_pack)(brw, dw, &elem_state);
- dw += GENX(VERTEX_ELEMENT_STATE_length);
- }
-
- if (uses_derived_draw_params) {
- struct GENX(VERTEX_ELEMENT_STATE) elem_state = {
- .Valid = true,
- .VertexBufferIndex = brw->vb.nr_buffers + 1,
- .SourceElementFormat = ISL_FORMAT_R32G32_UINT,
- .Component0Control = VFCOMP_STORE_SRC,
- .Component1Control = VFCOMP_STORE_SRC,
- .Component2Control = VFCOMP_STORE_0,
- .Component3Control = VFCOMP_STORE_0,
-#if GFX_VER < 5
- .DestinationElementOffset = i * 4,
-#endif
- };
-
- GENX(VERTEX_ELEMENT_STATE_pack)(brw, dw, &elem_state);
- dw += GENX(VERTEX_ELEMENT_STATE_length);
- }
-
-#if GFX_VER >= 6
- if (gfx6_edgeflag_input) {
- const struct gl_vertex_format *glformat = gfx6_edgeflag_input->glformat;
- const uint32_t format = brw_get_vertex_surface_type(brw, glformat);
-
- struct GENX(VERTEX_ELEMENT_STATE) elem_state = {
- .Valid = true,
- .VertexBufferIndex = gfx6_edgeflag_input->buffer,
- .EdgeFlagEnable = true,
- .SourceElementFormat = format,
- .SourceElementOffset = gfx6_edgeflag_input->offset,
- .Component0Control = VFCOMP_STORE_SRC,
- .Component1Control = VFCOMP_STORE_0,
- .Component2Control = VFCOMP_STORE_0,
- .Component3Control = VFCOMP_STORE_0,
- };
-
- GENX(VERTEX_ELEMENT_STATE_pack)(brw, dw, &elem_state);
- dw += GENX(VERTEX_ELEMENT_STATE_length);
- }
-#endif
-
-#if GFX_VER >= 8
- for (unsigned i = 0, j = 0; i < brw->vb.nr_enabled; i++) {
- const struct brw_vertex_element *input = brw->vb.enabled[i];
- const struct brw_vertex_buffer *buffer = &brw->vb.buffers[input->buffer];
- unsigned element_index;
-
- /* The edge flag element is reordered to be the last one in the code
- * above so we need to compensate for that in the element indices used
- * below.
- */
- if (input == gfx6_edgeflag_input)
- element_index = nr_elements - 1;
- else
- element_index = j++;
-
- brw_batch_emit(brw, GENX(3DSTATE_VF_INSTANCING), vfi) {
- vfi.VertexElementIndex = element_index;
- vfi.InstancingEnable = buffer->step_rate != 0;
- vfi.InstanceDataStepRate = buffer->step_rate;
- }
- }
-
- if (vs_prog_data->uses_drawid) {
- const unsigned element = brw->vb.nr_enabled + needs_sgvs_element;
-
- brw_batch_emit(brw, GENX(3DSTATE_VF_INSTANCING), vfi) {
- vfi.VertexElementIndex = element;
- }
- }
-#endif
-}
-
-static const struct brw_tracked_state genX(vertices) = {
- .dirty = {
- .mesa = _NEW_POLYGON,
- .brw = BRW_NEW_BATCH |
- BRW_NEW_BLORP |
- BRW_NEW_VERTEX_PROGRAM |
- BRW_NEW_VERTICES |
- BRW_NEW_VS_PROG_DATA,
- },
- .emit = genX(emit_vertices),
-};
-
-static void
-genX(emit_index_buffer)(struct brw_context *brw)
-{
- const struct _mesa_index_buffer *index_buffer = brw->ib.ib;
-
- if (index_buffer == NULL)
- return;
-
- vf_invalidate_for_ib_48bit_transition(brw);
-
- brw_batch_emit(brw, GENX(3DSTATE_INDEX_BUFFER), ib) {
-#if GFX_VERx10 < 75
- assert(brw->ib.enable_cut_index == brw->prim_restart.enable_cut_index);
- ib.CutIndexEnable = brw->ib.enable_cut_index;
-#endif
- ib.IndexFormat = brw_get_index_type(1 << index_buffer->index_size_shift);
-
-#if GFX_VER >= 6
- ib.MOCS = brw_mocs(&brw->isl_dev, brw->ib.bo);
-#endif
-
- /* The VF cache designers apparently cut corners, and made the cache
- * only consider the bottom 32 bits of memory addresses. If you happen
- * to have two index buffers which get placed exactly 4 GiB apart and
- * use them in back-to-back draw calls, you can get collisions. To work
- * around this problem, we restrict index buffers to the low 32 bits of
- * the address space.
- */
- ib.BufferStartingAddress = ro_32_bo(brw->ib.bo, 0);
-#if GFX_VER >= 8
- ib.BufferSize = brw->ib.size;
-#else
- ib.BufferEndingAddress = ro_bo(brw->ib.bo, brw->ib.size - 1);
-#endif
- }
-}
-
-static const struct brw_tracked_state genX(index_buffer) = {
- .dirty = {
- .mesa = 0,
- .brw = BRW_NEW_BATCH |
- BRW_NEW_BLORP |
- BRW_NEW_INDEX_BUFFER,
- },
- .emit = genX(emit_index_buffer),
-};
-
-#if GFX_VERx10 >= 75
-static void
-genX(upload_cut_index)(struct brw_context *brw)
-{
- brw_batch_emit(brw, GENX(3DSTATE_VF), vf) {
- if (brw->prim_restart.enable_cut_index && brw->ib.ib) {
- vf.IndexedDrawCutIndexEnable = true;
- vf.CutIndex = brw->prim_restart.restart_index;
- }
- }
-}
-
-const struct brw_tracked_state genX(cut_index) = {
- .dirty = {
- .mesa = _NEW_TRANSFORM,
- .brw = BRW_NEW_INDEX_BUFFER,
- },
- .emit = genX(upload_cut_index),
-};
-#endif
-
-static void
-genX(upload_vf_statistics)(struct brw_context *brw)
-{
- brw_batch_emit(brw, GENX(3DSTATE_VF_STATISTICS), vf) {
- vf.StatisticsEnable = true;
- }
-}
-
-const struct brw_tracked_state genX(vf_statistics) = {
- .dirty = {
- .mesa = 0,
- .brw = BRW_NEW_BLORP | BRW_NEW_CONTEXT,
- },
- .emit = genX(upload_vf_statistics),
-};
-
-#if GFX_VER >= 6
-/**
- * Determine the appropriate attribute override value to store into the
- * 3DSTATE_SF structure for a given fragment shader attribute. The attribute
- * override value contains two pieces of information: the location of the
- * attribute in the VUE (relative to urb_entry_read_offset, see below), and a
- * flag indicating whether to "swizzle" the attribute based on the direction
- * the triangle is facing.
- *
- * If an attribute is "swizzled", then the given VUE location is used for
- * front-facing triangles, and the VUE location that immediately follows is
- * used for back-facing triangles. We use this to implement the mapping from
- * gl_FrontColor/gl_BackColor to gl_Color.
- *
- * urb_entry_read_offset is the offset into the VUE at which the SF unit is
- * being instructed to begin reading attribute data. It can be set to a
- * nonzero value to prevent the SF unit from wasting time reading elements of
- * the VUE that are not needed by the fragment shader. It is measured in
- * 256-bit increments.
- */
-static void
-genX(get_attr_override)(struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) *attr,
- const struct brw_vue_map *vue_map,
- int urb_entry_read_offset, int fs_attr,
- bool two_side_color, uint32_t *max_source_attr)
-{
- /* Find the VUE slot for this attribute. */
- int slot = vue_map->varying_to_slot[fs_attr];
-
- /* Viewport and Layer are stored in the VUE header. We need to override
- * them to zero if earlier stages didn't write them, as GL requires that
- * they read back as zero when not explicitly set.
- */
- if (fs_attr == VARYING_SLOT_VIEWPORT || fs_attr == VARYING_SLOT_LAYER) {
- attr->ComponentOverrideX = true;
- attr->ComponentOverrideW = true;
- attr->ConstantSource = CONST_0000;
-
- if (!(vue_map->slots_valid & VARYING_BIT_LAYER))
- attr->ComponentOverrideY = true;
- if (!(vue_map->slots_valid & VARYING_BIT_VIEWPORT))
- attr->ComponentOverrideZ = true;
-
- return;
- }
-
- /* If there was only a back color written but not front, use back
- * as the color instead of undefined
- */
- if (slot == -1 && fs_attr == VARYING_SLOT_COL0)
- slot = vue_map->varying_to_slot[VARYING_SLOT_BFC0];
- if (slot == -1 && fs_attr == VARYING_SLOT_COL1)
- slot = vue_map->varying_to_slot[VARYING_SLOT_BFC1];
-
- if (slot == -1) {
- /* This attribute does not exist in the VUE--that means that the vertex
- * shader did not write to it. This means that either:
- *
- * (a) This attribute is a texture coordinate, and it is going to be
- * replaced with point coordinates (as a consequence of a call to
- * glTexEnvi(GL_POINT_SPRITE, GL_COORD_REPLACE, GL_TRUE)), so the
- * hardware will ignore whatever attribute override we supply.
- *
- * (b) This attribute is read by the fragment shader but not written by
- * the vertex shader, so its value is undefined. Therefore the
- * attribute override we supply doesn't matter.
- *
- * (c) This attribute is gl_PrimitiveID, and it wasn't written by the
- * previous shader stage.
- *
- * Note that we don't have to worry about the cases where the attribute
- * is gl_PointCoord or is undergoing point sprite coordinate
- * replacement, because in those cases, this function isn't called.
- *
- * In case (c), we need to program the attribute overrides so that the
- * primitive ID will be stored in this slot. In every other case, the
- * attribute override we supply doesn't matter. So just go ahead and
- * program primitive ID in every case.
- */
- attr->ComponentOverrideW = true;
- attr->ComponentOverrideX = true;
- attr->ComponentOverrideY = true;
- attr->ComponentOverrideZ = true;
- attr->ConstantSource = PRIM_ID;
- return;
- }
-
- /* Compute the location of the attribute relative to urb_entry_read_offset.
- * Each increment of urb_entry_read_offset represents a 256-bit value, so
- * it counts for two 128-bit VUE slots.
- */
- int source_attr = slot - 2 * urb_entry_read_offset;
- assert(source_attr >= 0 && source_attr < 32);
-
- /* If we are doing two-sided color, and the VUE slot following this one
- * represents a back-facing color, then we need to instruct the SF unit to
- * do back-facing swizzling.
- */
- bool swizzling = two_side_color &&
- ((vue_map->slot_to_varying[slot] == VARYING_SLOT_COL0 &&
- vue_map->slot_to_varying[slot+1] == VARYING_SLOT_BFC0) ||
- (vue_map->slot_to_varying[slot] == VARYING_SLOT_COL1 &&
- vue_map->slot_to_varying[slot+1] == VARYING_SLOT_BFC1));
-
- /* Update max_source_attr. If swizzling, the SF will read this slot + 1. */
- if (*max_source_attr < source_attr + swizzling)
- *max_source_attr = source_attr + swizzling;
-
- attr->SourceAttribute = source_attr;
- if (swizzling)
- attr->SwizzleSelect = INPUTATTR_FACING;
-}
-
-
-static void
-genX(calculate_attr_overrides)(const struct brw_context *brw,
- struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) *attr_overrides,
- uint32_t *point_sprite_enables,
- uint32_t *urb_entry_read_length,
- uint32_t *urb_entry_read_offset)
-{
- const struct gl_context *ctx = &brw->ctx;
-
- /* _NEW_POINT */
- const struct gl_point_attrib *point = &ctx->Point;
-
- /* BRW_NEW_FRAGMENT_PROGRAM */
- const struct gl_program *fp = brw->programs[MESA_SHADER_FRAGMENT];
-
- /* BRW_NEW_FS_PROG_DATA */
- const struct brw_wm_prog_data *wm_prog_data =
- brw_wm_prog_data(brw->wm.base.prog_data);
- uint32_t max_source_attr = 0;
-
- *point_sprite_enables = 0;
-
- int first_slot =
- brw_compute_first_urb_slot_required(fp->info.inputs_read,
- &brw->vue_map_geom_out);
-
- /* Each URB offset packs two varying slots */
- assert(first_slot % 2 == 0);
- *urb_entry_read_offset = first_slot / 2;
-
- /* From the Ivybridge PRM, Vol 2 Part 1, 3DSTATE_SBE,
- * description of dw10 Point Sprite Texture Coordinate Enable:
- *
- * "This field must be programmed to zero when non-point primitives
- * are rendered."
- *
- * The SandyBridge PRM doesn't explicitly say that point sprite enables
- * must be programmed to zero when rendering non-point primitives, but
- * the IvyBridge PRM does, and if we don't, we get garbage.
- *
- * This is not required on Haswell, as the hardware ignores this state
- * when drawing non-points -- although we do still need to be careful to
- * correctly set the attr overrides.
- *
- * _NEW_POLYGON
- * BRW_NEW_PRIMITIVE | BRW_NEW_GS_PROG_DATA | BRW_NEW_TES_PROG_DATA
- */
- bool drawing_points = brw_is_drawing_points(brw);
-
- for (uint8_t idx = 0; idx < wm_prog_data->urb_setup_attribs_count; idx++) {
- uint8_t attr = wm_prog_data->urb_setup_attribs[idx];
- int input_index = wm_prog_data->urb_setup[attr];
-
- assert(0 <= input_index);
-
- /* _NEW_POINT */
- bool point_sprite = false;
- if (drawing_points) {
- if (point->PointSprite &&
- (attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7) &&
- (point->CoordReplace & (1u << (attr - VARYING_SLOT_TEX0)))) {
- point_sprite = true;
- }
-
- if (attr == VARYING_SLOT_PNTC)
- point_sprite = true;
-
- if (point_sprite)
- *point_sprite_enables |= (1 << input_index);
- }
-
- /* BRW_NEW_VUE_MAP_GEOM_OUT | _NEW_LIGHT | _NEW_PROGRAM */
- struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) attribute = { 0 };
-
- if (!point_sprite) {
- genX(get_attr_override)(&attribute,
- &brw->vue_map_geom_out,
- *urb_entry_read_offset, attr,
- _mesa_vertex_program_two_side_enabled(ctx),
- &max_source_attr);
- }
-
- /* The hardware can only do the overrides on 16 overrides at a
- * time, and the other up to 16 have to be lined up so that the
- * input index = the output index. We'll need to do some
- * tweaking to make sure that's the case.
- */
- if (input_index < 16)
- attr_overrides[input_index] = attribute;
- else
- assert(attribute.SourceAttribute == input_index);
- }
-
- /* From the Sandy Bridge PRM, Volume 2, Part 1, documentation for
- * 3DSTATE_SF DWord 1 bits 15:11, "Vertex URB Entry Read Length":
- *
- * "This field should be set to the minimum length required to read the
- * maximum source attribute. The maximum source attribute is indicated
- * by the maximum value of the enabled Attribute # Source Attribute if
- * Attribute Swizzle Enable is set, Number of Output Attributes-1 if
- * enable is not set.
- * read_length = ceiling((max_source_attr + 1) / 2)
- *
- * [errata] Corruption/Hang possible if length programmed larger than
- * recommended"
- *
- * Similar text exists for Ivy Bridge.
- */
- *urb_entry_read_length = DIV_ROUND_UP(max_source_attr + 1, 2);
-}
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-#if GFX_VER >= 8
-typedef struct GENX(3DSTATE_WM_DEPTH_STENCIL) DEPTH_STENCIL_GENXML;
-#elif GFX_VER >= 6
-typedef struct GENX(DEPTH_STENCIL_STATE) DEPTH_STENCIL_GENXML;
-#else
-typedef struct GENX(COLOR_CALC_STATE) DEPTH_STENCIL_GENXML;
-#endif
-
-static inline void
-set_depth_stencil_bits(struct brw_context *brw, DEPTH_STENCIL_GENXML *ds)
-{
- struct gl_context *ctx = &brw->ctx;
-
- /* _NEW_BUFFERS */
- struct brw_renderbuffer *depth_irb =
- brw_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH);
-
- /* _NEW_DEPTH */
- struct gl_depthbuffer_attrib *depth = &ctx->Depth;
-
- /* _NEW_STENCIL */
- struct gl_stencil_attrib *stencil = &ctx->Stencil;
- const int b = stencil->_BackFace;
-
- if (depth->Test && depth_irb) {
- ds->DepthTestEnable = true;
- ds->DepthBufferWriteEnable = brw_depth_writes_enabled(brw);
- ds->DepthTestFunction = brw_translate_compare_func(depth->Func);
- }
-
- if (brw->stencil_enabled) {
- ds->StencilTestEnable = true;
- ds->StencilWriteMask = stencil->WriteMask[0] & 0xff;
- ds->StencilTestMask = stencil->ValueMask[0] & 0xff;
-
- ds->StencilTestFunction =
- brw_translate_compare_func(stencil->Function[0]);
- ds->StencilFailOp =
- brw_translate_stencil_op(stencil->FailFunc[0]);
- ds->StencilPassDepthPassOp =
- brw_translate_stencil_op(stencil->ZPassFunc[0]);
- ds->StencilPassDepthFailOp =
- brw_translate_stencil_op(stencil->ZFailFunc[0]);
-
- ds->StencilBufferWriteEnable = brw->stencil_write_enabled;
-
- if (brw->stencil_two_sided) {
- ds->DoubleSidedStencilEnable = true;
- ds->BackfaceStencilWriteMask = stencil->WriteMask[b] & 0xff;
- ds->BackfaceStencilTestMask = stencil->ValueMask[b] & 0xff;
-
- ds->BackfaceStencilTestFunction =
- brw_translate_compare_func(stencil->Function[b]);
- ds->BackfaceStencilFailOp =
- brw_translate_stencil_op(stencil->FailFunc[b]);
- ds->BackfaceStencilPassDepthPassOp =
- brw_translate_stencil_op(stencil->ZPassFunc[b]);
- ds->BackfaceStencilPassDepthFailOp =
- brw_translate_stencil_op(stencil->ZFailFunc[b]);
- }
-
-#if GFX_VER <= 5 || GFX_VER >= 9
- ds->StencilReferenceValue = _mesa_get_stencil_ref(ctx, 0);
- ds->BackfaceStencilReferenceValue = _mesa_get_stencil_ref(ctx, b);
-#endif
- }
-}
-
-#if GFX_VER >= 6
-static void
-genX(upload_depth_stencil_state)(struct brw_context *brw)
-{
-#if GFX_VER >= 8
- brw_batch_emit(brw, GENX(3DSTATE_WM_DEPTH_STENCIL), wmds) {
- set_depth_stencil_bits(brw, &wmds);
- }
-#else
- uint32_t ds_offset;
- brw_state_emit(brw, GENX(DEPTH_STENCIL_STATE), 64, &ds_offset, ds) {
- set_depth_stencil_bits(brw, &ds);
- }
-
- /* Now upload a pointer to the indirect state */
-#if GFX_VER == 6
- brw_batch_emit(brw, GENX(3DSTATE_CC_STATE_POINTERS), ptr) {
- ptr.PointertoDEPTH_STENCIL_STATE = ds_offset;
- ptr.DEPTH_STENCIL_STATEChange = true;
- }
-#else
- brw_batch_emit(brw, GENX(3DSTATE_DEPTH_STENCIL_STATE_POINTERS), ptr) {
- ptr.PointertoDEPTH_STENCIL_STATE = ds_offset;
- }
-#endif
-#endif
-}
-
-static const struct brw_tracked_state genX(depth_stencil_state) = {
- .dirty = {
- .mesa = _NEW_BUFFERS |
- _NEW_DEPTH |
- _NEW_STENCIL,
- .brw = BRW_NEW_BLORP |
- (GFX_VER >= 8 ? BRW_NEW_CONTEXT
- : BRW_NEW_BATCH |
- BRW_NEW_STATE_BASE_ADDRESS),
- },
- .emit = genX(upload_depth_stencil_state),
-};
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-#if GFX_VER <= 5
-
-static void
-genX(upload_clip_state)(struct brw_context *brw)
-{
- struct gl_context *ctx = &brw->ctx;
-
- ctx->NewDriverState |= BRW_NEW_GFX4_UNIT_STATE;
- brw_state_emit(brw, GENX(CLIP_STATE), 32, &brw->clip.state_offset, clip) {
- clip.KernelStartPointer = KSP(brw, brw->clip.prog_offset);
- clip.GRFRegisterCount =
- DIV_ROUND_UP(brw->clip.prog_data->total_grf, 16) - 1;
- clip.FloatingPointMode = FLOATING_POINT_MODE_Alternate;
- clip.SingleProgramFlow = true;
- clip.VertexURBEntryReadLength = brw->clip.prog_data->urb_read_length;
- clip.ConstantURBEntryReadLength = brw->clip.prog_data->curb_read_length;
-
- /* BRW_NEW_PUSH_CONSTANT_ALLOCATION */
- clip.ConstantURBEntryReadOffset = brw->curbe.clip_start * 2;
- clip.DispatchGRFStartRegisterForURBData = 1;
- clip.VertexURBEntryReadOffset = 0;
-
- /* BRW_NEW_URB_FENCE */
- clip.NumberofURBEntries = brw->urb.nr_clip_entries;
- clip.URBEntryAllocationSize = brw->urb.vsize - 1;
-
- if (brw->urb.nr_clip_entries >= 10) {
- /* Half of the URB entries go to each thread, and it has to be an
- * even number.
- */
- assert(brw->urb.nr_clip_entries % 2 == 0);
-
- /* Although up to 16 concurrent Clip threads are allowed on Ironlake,
- * only 2 threads can output VUEs at a time.
- */
- clip.MaximumNumberofThreads = (GFX_VER == 5 ? 16 : 2) - 1;
- } else {
- assert(brw->urb.nr_clip_entries >= 5);
- clip.MaximumNumberofThreads = 1 - 1;
- }
-
- clip.VertexPositionSpace = VPOS_NDCSPACE;
- clip.UserClipFlagsMustClipEnable = true;
- clip.GuardbandClipTestEnable = true;
-
- clip.ClipperViewportStatePointer =
- ro_bo(brw->batch.state.bo, brw->clip.vp_offset);
-
- clip.ScreenSpaceViewportXMin = -1;
- clip.ScreenSpaceViewportXMax = 1;
- clip.ScreenSpaceViewportYMin = -1;
- clip.ScreenSpaceViewportYMax = 1;
-
- clip.ViewportXYClipTestEnable = true;
- clip.ViewportZClipTestEnable = !(ctx->Transform.DepthClampNear &&
- ctx->Transform.DepthClampFar);
-
- /* _NEW_TRANSFORM */
- if (GFX_VER == 5 || GFX_VERx10 == 45) {
- clip.UserClipDistanceClipTestEnableBitmask =
- ctx->Transform.ClipPlanesEnabled;
- } else {
- /* Up to 6 actual clip flags, plus the 7th for the negative RHW
- * workaround.
- */
- clip.UserClipDistanceClipTestEnableBitmask =
- (ctx->Transform.ClipPlanesEnabled & 0x3f) | 0x40;
- }
-
- if (ctx->Transform.ClipDepthMode == GL_ZERO_TO_ONE)
- clip.APIMode = APIMODE_D3D;
- else
- clip.APIMode = APIMODE_OGL;
-
- clip.GuardbandClipTestEnable = true;
-
- clip.ClipMode = brw->clip.prog_data->clip_mode;
-
-#if GFX_VERx10 == 45
- clip.NegativeWClipTestEnable = true;
-#endif
- }
-}
-
-const struct brw_tracked_state genX(clip_state) = {
- .dirty = {
- .mesa = _NEW_TRANSFORM |
- _NEW_VIEWPORT,
- .brw = BRW_NEW_BATCH |
- BRW_NEW_BLORP |
- BRW_NEW_CLIP_PROG_DATA |
- BRW_NEW_PUSH_CONSTANT_ALLOCATION |
- BRW_NEW_PROGRAM_CACHE |
- BRW_NEW_URB_FENCE,
- },
- .emit = genX(upload_clip_state),
-};
-
-#else
-
-static void
-genX(upload_clip_state)(struct brw_context *brw)
-{
- struct gl_context *ctx = &brw->ctx;
-
- /* _NEW_BUFFERS */
- struct gl_framebuffer *fb = ctx->DrawBuffer;
-
- /* BRW_NEW_FS_PROG_DATA */
- struct brw_wm_prog_data *wm_prog_data =
- brw_wm_prog_data(brw->wm.base.prog_data);
-
- brw_batch_emit(brw, GENX(3DSTATE_CLIP), clip) {
- clip.StatisticsEnable = !brw->meta_in_progress;
-
- if (wm_prog_data->barycentric_interp_modes &
- BRW_BARYCENTRIC_NONPERSPECTIVE_BITS)
- clip.NonPerspectiveBarycentricEnable = true;
-
-#if GFX_VER >= 7
- clip.EarlyCullEnable = true;
-#endif
-
-#if GFX_VER == 7
- clip.FrontWinding = brw->polygon_front_bit != fb->FlipY;
-
- if (ctx->Polygon.CullFlag) {
- switch (ctx->Polygon.CullFaceMode) {
- case GL_FRONT:
- clip.CullMode = CULLMODE_FRONT;
- break;
- case GL_BACK:
- clip.CullMode = CULLMODE_BACK;
- break;
- case GL_FRONT_AND_BACK:
- clip.CullMode = CULLMODE_BOTH;
- break;
- default:
- unreachable("Should not get here: invalid CullFlag");
- }
- } else {
- clip.CullMode = CULLMODE_NONE;
- }
-#endif
-
-#if GFX_VER < 8
- clip.UserClipDistanceCullTestEnableBitmask =
- brw_vue_prog_data(brw->vs.base.prog_data)->cull_distance_mask;
-
- clip.ViewportZClipTestEnable = !(ctx->Transform.DepthClampNear &&
- ctx->Transform.DepthClampFar);
-#endif
-
- /* _NEW_LIGHT */
- if (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION) {
- clip.TriangleStripListProvokingVertexSelect = 0;
- clip.TriangleFanProvokingVertexSelect = 1;
- clip.LineStripListProvokingVertexSelect = 0;
- } else {
- clip.TriangleStripListProvokingVertexSelect = 2;
- clip.TriangleFanProvokingVertexSelect = 2;
- clip.LineStripListProvokingVertexSelect = 1;
- }
-
- /* _NEW_TRANSFORM */
- clip.UserClipDistanceClipTestEnableBitmask =
- ctx->Transform.ClipPlanesEnabled;
-
-#if GFX_VER >= 8
- clip.ForceUserClipDistanceClipTestEnableBitmask = true;
-#endif
-
- if (ctx->Transform.ClipDepthMode == GL_ZERO_TO_ONE)
- clip.APIMode = APIMODE_D3D;
- else
- clip.APIMode = APIMODE_OGL;
-
- clip.GuardbandClipTestEnable = true;
-
- /* BRW_NEW_VIEWPORT_COUNT */
- const unsigned viewport_count = brw->clip.viewport_count;
-
- if (ctx->RasterDiscard) {
- clip.ClipMode = CLIPMODE_REJECT_ALL;
-#if GFX_VER == 6
- perf_debug("Rasterizer discard is currently implemented via the "
- "clipper; having the GS not write primitives would "
- "likely be faster.\n");
-#endif
- } else {
- clip.ClipMode = CLIPMODE_NORMAL;
- }
-
- clip.ClipEnable = true;
-
- /* _NEW_POLYGON,
- * BRW_NEW_GEOMETRY_PROGRAM | BRW_NEW_TES_PROG_DATA | BRW_NEW_PRIMITIVE
- */
- if (!brw_is_drawing_points(brw) && !brw_is_drawing_lines(brw))
- clip.ViewportXYClipTestEnable = true;
-
- clip.MinimumPointWidth = 0.125;
- clip.MaximumPointWidth = 255.875;
- clip.MaximumVPIndex = viewport_count - 1;
- if (_mesa_geometric_layers(fb) == 0)
- clip.ForceZeroRTAIndexEnable = true;
- }
-}
-
-static const struct brw_tracked_state genX(clip_state) = {
- .dirty = {
- .mesa = _NEW_BUFFERS |
- _NEW_LIGHT |
- _NEW_POLYGON |
- _NEW_TRANSFORM,
- .brw = BRW_NEW_BLORP |
- BRW_NEW_CONTEXT |
- BRW_NEW_FS_PROG_DATA |
- BRW_NEW_GS_PROG_DATA |
- BRW_NEW_VS_PROG_DATA |
- BRW_NEW_META_IN_PROGRESS |
- BRW_NEW_PRIMITIVE |
- BRW_NEW_RASTERIZER_DISCARD |
- BRW_NEW_TES_PROG_DATA |
- BRW_NEW_VIEWPORT_COUNT,
- },
- .emit = genX(upload_clip_state),
-};
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-static void
-genX(upload_sf)(struct brw_context *brw)
-{
- struct gl_context *ctx = &brw->ctx;
- float point_size;
-
-#if GFX_VER <= 7
- /* _NEW_BUFFERS */
- bool flip_y = ctx->DrawBuffer->FlipY;
- UNUSED const bool multisampled_fbo =
- _mesa_geometric_samples(ctx->DrawBuffer) > 1;
-#endif
-
-#if GFX_VER < 6
- const struct brw_sf_prog_data *sf_prog_data = brw->sf.prog_data;
-
- ctx->NewDriverState |= BRW_NEW_GFX4_UNIT_STATE;
-
- brw_state_emit(brw, GENX(SF_STATE), 64, &brw->sf.state_offset, sf) {
- sf.KernelStartPointer = KSP(brw, brw->sf.prog_offset);
- sf.FloatingPointMode = FLOATING_POINT_MODE_Alternate;
- sf.GRFRegisterCount = DIV_ROUND_UP(sf_prog_data->total_grf, 16) - 1;
- sf.DispatchGRFStartRegisterForURBData = 3;
- sf.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET;
- sf.VertexURBEntryReadLength = sf_prog_data->urb_read_length;
- sf.NumberofURBEntries = brw->urb.nr_sf_entries;
- sf.URBEntryAllocationSize = brw->urb.sfsize - 1;
-
- /* STATE_PREFETCH command description describes this state as being
- * something loaded through the GPE (L2 ISC), so it's INSTRUCTION
- * domain.
- */
- sf.SetupViewportStateOffset =
- ro_bo(brw->batch.state.bo, brw->sf.vp_offset);
-
- sf.PointRasterizationRule = RASTRULE_UPPER_RIGHT;
-
- /* sf.ConstantURBEntryReadLength = stage_prog_data->curb_read_length; */
- /* sf.ConstantURBEntryReadOffset = brw->curbe.vs_start * 2; */
-
- sf.MaximumNumberofThreads =
- MIN2(GFX_VER == 5 ? 48 : 24, brw->urb.nr_sf_entries) - 1;
-
- sf.SpritePointEnable = ctx->Point.PointSprite;
-
- sf.DestinationOriginHorizontalBias = 0.5;
- sf.DestinationOriginVerticalBias = 0.5;
-#else
- brw_batch_emit(brw, GENX(3DSTATE_SF), sf) {
- sf.StatisticsEnable = true;
-#endif
- sf.ViewportTransformEnable = true;
-
-#if GFX_VER == 7
- /* _NEW_BUFFERS */
- sf.DepthBufferSurfaceFormat = brw_depthbuffer_format(brw);
-#endif
-
-#if GFX_VER <= 7
- /* _NEW_POLYGON */
- sf.FrontWinding = brw->polygon_front_bit != flip_y;
-#if GFX_VER >= 6
- sf.GlobalDepthOffsetEnableSolid = ctx->Polygon.OffsetFill;
- sf.GlobalDepthOffsetEnableWireframe = ctx->Polygon.OffsetLine;
- sf.GlobalDepthOffsetEnablePoint = ctx->Polygon.OffsetPoint;
-
- switch (ctx->Polygon.FrontMode) {
- case GL_FILL:
- sf.FrontFaceFillMode = FILL_MODE_SOLID;
- break;
- case GL_LINE:
- sf.FrontFaceFillMode = FILL_MODE_WIREFRAME;
- break;
- case GL_POINT:
- sf.FrontFaceFillMode = FILL_MODE_POINT;
- break;
- default:
- unreachable("not reached");
- }
-
- switch (ctx->Polygon.BackMode) {
- case GL_FILL:
- sf.BackFaceFillMode = FILL_MODE_SOLID;
- break;
- case GL_LINE:
- sf.BackFaceFillMode = FILL_MODE_WIREFRAME;
- break;
- case GL_POINT:
- sf.BackFaceFillMode = FILL_MODE_POINT;
- break;
- default:
- unreachable("not reached");
- }
-
- if (multisampled_fbo && ctx->Multisample.Enabled)
- sf.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN;
-
- sf.GlobalDepthOffsetConstant = ctx->Polygon.OffsetUnits * 2;
- sf.GlobalDepthOffsetScale = ctx->Polygon.OffsetFactor;
- sf.GlobalDepthOffsetClamp = ctx->Polygon.OffsetClamp;
-#endif
-
- sf.ScissorRectangleEnable = true;
-
- if (ctx->Polygon.CullFlag) {
- switch (ctx->Polygon.CullFaceMode) {
- case GL_FRONT:
- sf.CullMode = CULLMODE_FRONT;
- break;
- case GL_BACK:
- sf.CullMode = CULLMODE_BACK;
- break;
- case GL_FRONT_AND_BACK:
- sf.CullMode = CULLMODE_BOTH;
- break;
- default:
- unreachable("not reached");
- }
- } else {
- sf.CullMode = CULLMODE_NONE;
- }
-
-#if GFX_VERx10 == 75
- sf.LineStippleEnable = ctx->Line.StippleFlag;
-#endif
-
-#endif
-
- /* _NEW_LINE */
-#if GFX_VER == 8
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- if (devinfo->platform == INTEL_PLATFORM_CHV)
- sf.CHVLineWidth = brw_get_line_width(brw);
- else
- sf.LineWidth = brw_get_line_width(brw);
-#else
- sf.LineWidth = brw_get_line_width(brw);
-#endif
-
- if (ctx->Line.SmoothFlag) {
- sf.LineEndCapAntialiasingRegionWidth = _10pixels;
-#if GFX_VER <= 7
- sf.AntialiasingEnable = true;
-#endif
- }
-
- /* _NEW_POINT - Clamp to ARB_point_parameters user limits */
- point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize);
- /* Clamp to the hardware limits */
- sf.PointWidth = CLAMP(point_size, 0.125f, 255.875f);
-
- /* _NEW_PROGRAM | _NEW_POINT, BRW_NEW_VUE_MAP_GEOM_OUT */
- if (use_state_point_size(brw))
- sf.PointWidthSource = State;
-
-#if GFX_VER >= 8
- /* _NEW_POINT | _NEW_MULTISAMPLE */
- if ((ctx->Point.SmoothFlag || _mesa_is_multisample_enabled(ctx)) &&
- !ctx->Point.PointSprite)
- sf.SmoothPointEnable = true;
-#endif
-
-#if GFX_VER == 10
- /* _NEW_BUFFERS
- * Smooth Point Enable bit MUST not be set when NUM_MULTISAMPLES > 1.
- */
- const bool multisampled_fbo =
- _mesa_geometric_samples(ctx->DrawBuffer) > 1;
- if (multisampled_fbo)
- sf.SmoothPointEnable = false;
-#endif
-
-#if GFX_VERx10 >= 45
- sf.AALineDistanceMode = AALINEDISTANCE_TRUE;
-#endif
-
- /* _NEW_LIGHT */
- if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) {
- sf.TriangleStripListProvokingVertexSelect = 2;
- sf.TriangleFanProvokingVertexSelect = 2;
- sf.LineStripListProvokingVertexSelect = 1;
- } else {
- sf.TriangleFanProvokingVertexSelect = 1;
- }
-
-#if GFX_VER == 6
- /* BRW_NEW_FS_PROG_DATA */
- const struct brw_wm_prog_data *wm_prog_data =
- brw_wm_prog_data(brw->wm.base.prog_data);
-
- sf.AttributeSwizzleEnable = true;
- sf.NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs;
-
- /*
- * Window coordinates in an FBO are inverted, which means point
- * sprite origin must be inverted, too.
- */
- if ((ctx->Point.SpriteOrigin == GL_LOWER_LEFT) == flip_y) {
- sf.PointSpriteTextureCoordinateOrigin = LOWERLEFT;
- } else {
- sf.PointSpriteTextureCoordinateOrigin = UPPERLEFT;
- }
-
- /* BRW_NEW_VUE_MAP_GEOM_OUT | BRW_NEW_FRAGMENT_PROGRAM |
- * _NEW_POINT | _NEW_LIGHT | _NEW_PROGRAM | BRW_NEW_FS_PROG_DATA
- */
- uint32_t urb_entry_read_length;
- uint32_t urb_entry_read_offset;
- uint32_t point_sprite_enables;
- genX(calculate_attr_overrides)(brw, sf.Attribute, &point_sprite_enables,
- &urb_entry_read_length,
- &urb_entry_read_offset);
- sf.VertexURBEntryReadLength = urb_entry_read_length;
- sf.VertexURBEntryReadOffset = urb_entry_read_offset;
- sf.PointSpriteTextureCoordinateEnable = point_sprite_enables;
- sf.ConstantInterpolationEnable = wm_prog_data->flat_inputs;
-#endif
- }
-}
-
-static const struct brw_tracked_state genX(sf_state) = {
- .dirty = {
- .mesa = _NEW_LIGHT |
- _NEW_LINE |
- _NEW_POINT |
- _NEW_PROGRAM |
- (GFX_VER >= 6 ? _NEW_MULTISAMPLE : 0) |
- (GFX_VER <= 7 ? _NEW_BUFFERS | _NEW_POLYGON : 0) |
- (GFX_VER == 10 ? _NEW_BUFFERS : 0),
- .brw = BRW_NEW_BLORP |
- BRW_NEW_VUE_MAP_GEOM_OUT |
- (GFX_VER <= 5 ? BRW_NEW_BATCH |
- BRW_NEW_PROGRAM_CACHE |
- BRW_NEW_SF_PROG_DATA |
- BRW_NEW_SF_VP |
- BRW_NEW_URB_FENCE
- : 0) |
- (GFX_VER >= 6 ? BRW_NEW_CONTEXT : 0) |
- (GFX_VER >= 6 && GFX_VER <= 7 ?
- BRW_NEW_GS_PROG_DATA |
- BRW_NEW_PRIMITIVE |
- BRW_NEW_TES_PROG_DATA
- : 0) |
- (GFX_VER == 6 ? BRW_NEW_FS_PROG_DATA |
- BRW_NEW_FRAGMENT_PROGRAM
- : 0),
- },
- .emit = genX(upload_sf),
-};
-
-/* ---------------------------------------------------------------------- */
-
-static bool
-brw_color_buffer_write_enabled(struct brw_context *brw)
-{
- struct gl_context *ctx = &brw->ctx;
- /* BRW_NEW_FRAGMENT_PROGRAM */
- const struct gl_program *fp = brw->programs[MESA_SHADER_FRAGMENT];
- unsigned i;
-
- /* _NEW_BUFFERS */
- for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
- struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i];
- uint64_t outputs_written = fp->info.outputs_written;
-
- /* _NEW_COLOR */
- if (rb && (outputs_written & BITFIELD64_BIT(FRAG_RESULT_COLOR) ||
- outputs_written & BITFIELD64_BIT(FRAG_RESULT_DATA0 + i)) &&
- GET_COLORMASK(ctx->Color.ColorMask, i)) {
- return true;
- }
- }
-
- return false;
-}
-
-static void
-genX(upload_wm)(struct brw_context *brw)
-{
- struct gl_context *ctx = &brw->ctx;
-
- /* BRW_NEW_FS_PROG_DATA */
- const struct brw_wm_prog_data *wm_prog_data =
- brw_wm_prog_data(brw->wm.base.prog_data);
-
- UNUSED bool writes_depth =
- wm_prog_data->computed_depth_mode != BRW_PSCDEPTH_OFF;
- UNUSED struct brw_stage_state *stage_state = &brw->wm.base;
- UNUSED const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-#if GFX_VER == 6
- /* We can't fold this into gfx6_upload_wm_push_constants(), because
- * according to the SNB PRM, vol 2 part 1 section 7.2.2
- * (3DSTATE_CONSTANT_PS [DevSNB]):
- *
- * "[DevSNB]: This packet must be followed by WM_STATE."
- */
- brw_batch_emit(brw, GENX(3DSTATE_CONSTANT_PS), wmcp) {
- if (wm_prog_data->base.nr_params != 0) {
- wmcp.Buffer0Valid = true;
- /* Pointer to the WM constant buffer. Covered by the set of
- * state flags from gfx6_upload_wm_push_constants.
- */
- wmcp.ConstantBody.PointertoConstantBuffer0 = stage_state->push_const_offset;
- wmcp.ConstantBody.ConstantBuffer0ReadLength = stage_state->push_const_size - 1;
- }
- }
-#endif
-
-#if GFX_VER >= 6
- brw_batch_emit(brw, GENX(3DSTATE_WM), wm) {
-#else
- ctx->NewDriverState |= BRW_NEW_GFX4_UNIT_STATE;
- brw_state_emit(brw, GENX(WM_STATE), 64, &stage_state->state_offset, wm) {
-#endif
-
-#if GFX_VER <= 6
- wm._8PixelDispatchEnable = wm_prog_data->dispatch_8;
- wm._16PixelDispatchEnable = wm_prog_data->dispatch_16;
- wm._32PixelDispatchEnable = wm_prog_data->dispatch_32;
-#endif
-
-#if GFX_VER == 4
- /* On gfx4, we only have one shader kernel */
- if (brw_wm_state_has_ksp(wm, 0)) {
- assert(brw_wm_prog_data_prog_offset(wm_prog_data, wm, 0) == 0);
- wm.KernelStartPointer0 = KSP(brw, stage_state->prog_offset);
- wm.GRFRegisterCount0 = brw_wm_prog_data_reg_blocks(wm_prog_data, wm, 0);
- wm.DispatchGRFStartRegisterForConstantSetupData0 =
- brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, wm, 0);
- }
-#elif GFX_VER == 5
- /* On gfx5, we have multiple shader kernels but only one GRF start
- * register for all kernels
- */
- wm.KernelStartPointer0 = stage_state->prog_offset +
- brw_wm_prog_data_prog_offset(wm_prog_data, wm, 0);
- wm.KernelStartPointer1 = stage_state->prog_offset +
- brw_wm_prog_data_prog_offset(wm_prog_data, wm, 1);
- wm.KernelStartPointer2 = stage_state->prog_offset +
- brw_wm_prog_data_prog_offset(wm_prog_data, wm, 2);
-
- wm.GRFRegisterCount0 = brw_wm_prog_data_reg_blocks(wm_prog_data, wm, 0);
- wm.GRFRegisterCount1 = brw_wm_prog_data_reg_blocks(wm_prog_data, wm, 1);
- wm.GRFRegisterCount2 = brw_wm_prog_data_reg_blocks(wm_prog_data, wm, 2);
-
- wm.DispatchGRFStartRegisterForConstantSetupData0 =
- wm_prog_data->base.dispatch_grf_start_reg;
-
- /* Dispatch GRF Start should be the same for all shaders on gfx5 */
- if (brw_wm_state_has_ksp(wm, 1)) {
- assert(wm_prog_data->base.dispatch_grf_start_reg ==
- brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, wm, 1));
- }
- if (brw_wm_state_has_ksp(wm, 2)) {
- assert(wm_prog_data->base.dispatch_grf_start_reg ==
- brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, wm, 2));
- }
-#elif GFX_VER == 6
- /* On gfx6, we have multiple shader kernels and we no longer specify a
- * register count for each one.
- */
- wm.KernelStartPointer0 = stage_state->prog_offset +
- brw_wm_prog_data_prog_offset(wm_prog_data, wm, 0);
- wm.KernelStartPointer1 = stage_state->prog_offset +
- brw_wm_prog_data_prog_offset(wm_prog_data, wm, 1);
- wm.KernelStartPointer2 = stage_state->prog_offset +
- brw_wm_prog_data_prog_offset(wm_prog_data, wm, 2);
-
- wm.DispatchGRFStartRegisterForConstantSetupData0 =
- brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, wm, 0);
- wm.DispatchGRFStartRegisterForConstantSetupData1 =
- brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, wm, 1);
- wm.DispatchGRFStartRegisterForConstantSetupData2 =
- brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, wm, 2);
-#endif
-
-#if GFX_VER <= 5
- wm.ConstantURBEntryReadLength = wm_prog_data->base.curb_read_length;
- /* BRW_NEW_PUSH_CONSTANT_ALLOCATION */
- wm.ConstantURBEntryReadOffset = brw->curbe.wm_start * 2;
- wm.SetupURBEntryReadLength = wm_prog_data->num_varying_inputs * 2;
- wm.SetupURBEntryReadOffset = 0;
- wm.EarlyDepthTestEnable = true;
-#endif
-
-#if GFX_VER >= 6
- wm.LineAntialiasingRegionWidth = _10pixels;
- wm.LineEndCapAntialiasingRegionWidth = _05pixels;
-
- wm.PointRasterizationRule = RASTRULE_UPPER_RIGHT;
- wm.BarycentricInterpolationMode = wm_prog_data->barycentric_interp_modes;
-#else
- if (stage_state->sampler_count)
- wm.SamplerStatePointer =
- ro_bo(brw->batch.state.bo, stage_state->sampler_offset);
-
- wm.LineAntialiasingRegionWidth = _05pixels;
- wm.LineEndCapAntialiasingRegionWidth = _10pixels;
-
- /* _NEW_POLYGON */
- if (ctx->Polygon.OffsetFill) {
- wm.GlobalDepthOffsetEnable = true;
- /* Something weird going on with legacy_global_depth_bias,
- * offset_constant, scaling and MRD. This value passes glean
- * but gives some odd results elsewere (eg. the
- * quad-offset-units test).
- */
- wm.GlobalDepthOffsetConstant = ctx->Polygon.OffsetUnits * 2;
-
- /* This is the only value that passes glean:
- */
- wm.GlobalDepthOffsetScale = ctx->Polygon.OffsetFactor;
- }
-
- wm.DepthCoefficientURBReadOffset = 1;
-#endif
-
- /* BRW_NEW_STATS_WM */
- wm.StatisticsEnable = GFX_VER >= 6 || brw->stats_wm;
-
-#if GFX_VER < 7
- if (wm_prog_data->base.use_alt_mode)
- wm.FloatingPointMode = FLOATING_POINT_MODE_Alternate;
-
- wm.SamplerCount = GFX_VER == 5 ?
- 0 : DIV_ROUND_UP(stage_state->sampler_count, 4);
-
- wm.BindingTableEntryCount =
- wm_prog_data->base.binding_table.size_bytes / 4;
- wm.MaximumNumberofThreads = devinfo->max_wm_threads - 1;
-
-#if GFX_VER == 6
- wm.DualSourceBlendEnable =
- wm_prog_data->dual_src_blend && (ctx->Color.BlendEnabled & 1) &&
- ctx->Color._BlendUsesDualSrc & 0x1;
- wm.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask;
- wm.NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs;
-
- /* From the SNB PRM, volume 2 part 1, page 281:
- * "If the PS kernel does not need the Position XY Offsets
- * to compute a Position XY value, then this field should be
- * programmed to POSOFFSET_NONE."
- *
- * "SW Recommendation: If the PS kernel needs the Position Offsets
- * to compute a Position XY value, this field should match Position
- * ZW Interpolation Mode to ensure a consistent position.xyzw
- * computation."
- * We only require XY sample offsets. So, this recommendation doesn't
- * look useful at the moment. We might need this in future.
- */
- if (wm_prog_data->uses_pos_offset)
- wm.PositionXYOffsetSelect = POSOFFSET_SAMPLE;
- else
- wm.PositionXYOffsetSelect = POSOFFSET_NONE;
-#endif
-
- if (wm_prog_data->base.total_scratch) {
- wm.ScratchSpaceBasePointer = rw_32_bo(stage_state->scratch_bo, 0);
- wm.PerThreadScratchSpace =
- ffs(stage_state->per_thread_scratch) - 11;
- }
-
- wm.PixelShaderComputedDepth = writes_depth;
-#endif
-
- /* _NEW_LINE */
- wm.LineStippleEnable = ctx->Line.StippleFlag;
-
- /* _NEW_POLYGON */
- wm.PolygonStippleEnable = ctx->Polygon.StippleFlag;
-
-#if GFX_VER < 8
-
-#if GFX_VER >= 6
- wm.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;
-
- /* _NEW_BUFFERS */
- const bool multisampled_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1;
-
- if (multisampled_fbo) {
- /* _NEW_MULTISAMPLE */
- if (ctx->Multisample.Enabled)
- wm.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN;
- else
- wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL;
-
- if (wm_prog_data->persample_dispatch)
- wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
- else
- wm.MultisampleDispatchMode = MSDISPMODE_PERPIXEL;
- } else {
- wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL;
- wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
- }
-#endif
- wm.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
- if (wm_prog_data->uses_kill ||
- _mesa_is_alpha_test_enabled(ctx) ||
- _mesa_is_alpha_to_coverage_enabled(ctx) ||
- (GFX_VER >= 6 && wm_prog_data->uses_omask)) {
- wm.PixelShaderKillsPixel = true;
- }
-
- /* _NEW_BUFFERS | _NEW_COLOR */
- if (brw_color_buffer_write_enabled(brw) || writes_depth ||
- wm.PixelShaderKillsPixel ||
- (GFX_VER >= 6 && wm_prog_data->has_side_effects)) {
- wm.ThreadDispatchEnable = true;
- }
-
-#if GFX_VER >= 7
- wm.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode;
- wm.PixelShaderUsesInputCoverageMask = wm_prog_data->uses_sample_mask;
-#endif
-
- /* The "UAV access enable" bits are unnecessary on HSW because they only
- * seem to have an effect on the HW-assisted coherency mechanism which we
- * don't need, and the rasterization-related UAV_ONLY flag and the
- * DISPATCH_ENABLE bit can be set independently from it.
- * C.f. gfx8_upload_ps_extra().
- *
- * BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_FS_PROG_DATA | _NEW_BUFFERS |
- * _NEW_COLOR
- */
-#if GFX_VERx10 == 75
- if (!(brw_color_buffer_write_enabled(brw) || writes_depth) &&
- wm_prog_data->has_side_effects)
- wm.PSUAVonly = ON;
-#endif
-#endif
-
-#if GFX_VER >= 7
- /* BRW_NEW_FS_PROG_DATA */
- if (wm_prog_data->early_fragment_tests)
- wm.EarlyDepthStencilControl = EDSC_PREPS;
- else if (wm_prog_data->has_side_effects)
- wm.EarlyDepthStencilControl = EDSC_PSEXEC;
-#endif
- }
-
-#if GFX_VER <= 5
- if (brw->wm.offset_clamp != ctx->Polygon.OffsetClamp) {
- brw_batch_emit(brw, GENX(3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP), clamp) {
- clamp.GlobalDepthOffsetClamp = ctx->Polygon.OffsetClamp;
- }
-
- brw->wm.offset_clamp = ctx->Polygon.OffsetClamp;
- }
-#endif
-}
-
-static const struct brw_tracked_state genX(wm_state) = {
- .dirty = {
- .mesa = _NEW_LINE |
- _NEW_POLYGON |
- (GFX_VER < 8 ? _NEW_BUFFERS |
- _NEW_COLOR :
- 0) |
- (GFX_VER == 6 ? _NEW_PROGRAM_CONSTANTS : 0) |
- (GFX_VER < 6 ? _NEW_POLYGONSTIPPLE : 0) |
- (GFX_VER < 8 && GFX_VER >= 6 ? _NEW_MULTISAMPLE : 0),
- .brw = BRW_NEW_BLORP |
- BRW_NEW_FS_PROG_DATA |
- (GFX_VER < 6 ? BRW_NEW_PUSH_CONSTANT_ALLOCATION |
- BRW_NEW_FRAGMENT_PROGRAM |
- BRW_NEW_PROGRAM_CACHE |
- BRW_NEW_SAMPLER_STATE_TABLE |
- BRW_NEW_STATS_WM
- : 0) |
- (GFX_VER < 7 ? BRW_NEW_BATCH : BRW_NEW_CONTEXT),
- },
- .emit = genX(upload_wm),
-};
-
-/* ---------------------------------------------------------------------- */
-
-/* We restrict scratch buffers to the bottom 32 bits of the address space
- * by using rw_32_bo().
- *
- * General State Base Address is a bit broken. If the address + size as
- * seen by STATE_BASE_ADDRESS overflows 48 bits, the GPU appears to treat
- * all accesses to the buffer as being out of bounds and returns zero.
- */
-
-#define INIT_THREAD_DISPATCH_FIELDS(pkt, prefix) \
- pkt.KernelStartPointer = KSP(brw, stage_state->prog_offset); \
- /* Wa_1606682166 */ \
- pkt.SamplerCount = \
- GFX_VER == 11 ? \
- 0 : \
- DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4); \
- pkt.BindingTableEntryCount = \
- stage_prog_data->binding_table.size_bytes / 4; \
- pkt.FloatingPointMode = stage_prog_data->use_alt_mode; \
- \
- if (stage_prog_data->total_scratch) { \
- pkt.ScratchSpaceBasePointer = rw_32_bo(stage_state->scratch_bo, 0); \
- pkt.PerThreadScratchSpace = \
- ffs(stage_state->per_thread_scratch) - 11; \
- } \
- \
- pkt.DispatchGRFStartRegisterForURBData = \
- stage_prog_data->dispatch_grf_start_reg; \
- pkt.prefix##URBEntryReadLength = vue_prog_data->urb_read_length; \
- pkt.prefix##URBEntryReadOffset = 0; \
- \
- pkt.StatisticsEnable = true; \
- pkt.Enable = true;
-
-static void
-genX(upload_vs_state)(struct brw_context *brw)
-{
- UNUSED struct gl_context *ctx = &brw->ctx;
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- struct brw_stage_state *stage_state = &brw->vs.base;
-
- /* BRW_NEW_VS_PROG_DATA */
- const struct brw_vue_prog_data *vue_prog_data =
- brw_vue_prog_data(brw->vs.base.prog_data);
- const struct brw_stage_prog_data *stage_prog_data = &vue_prog_data->base;
-
- assert(vue_prog_data->dispatch_mode == DISPATCH_MODE_SIMD8 ||
- vue_prog_data->dispatch_mode == DISPATCH_MODE_4X2_DUAL_OBJECT);
- assert(GFX_VER < 11 ||
- vue_prog_data->dispatch_mode == DISPATCH_MODE_SIMD8);
-
-#if GFX_VER == 6
- /* From the BSpec, 3D Pipeline > Geometry > Vertex Shader > State,
- * 3DSTATE_VS, Dword 5.0 "VS Function Enable":
- *
- * [DevSNB] A pipeline flush must be programmed prior to a 3DSTATE_VS
- * command that causes the VS Function Enable to toggle. Pipeline
- * flush can be executed by sending a PIPE_CONTROL command with CS
- * stall bit set and a post sync operation.
- *
- * We've already done such a flush at the start of state upload, so we
- * don't need to do another one here.
- */
- brw_batch_emit(brw, GENX(3DSTATE_CONSTANT_VS), cvs) {
- if (stage_state->push_const_size != 0) {
- cvs.Buffer0Valid = true;
- cvs.ConstantBody.PointertoConstantBuffer0 = stage_state->push_const_offset;
- cvs.ConstantBody.ConstantBuffer0ReadLength = stage_state->push_const_size - 1;
- }
- }
-#endif
-
- if (GFX_VER == 7 && devinfo->platform == INTEL_PLATFORM_IVB)
- gfx7_emit_vs_workaround_flush(brw);
-
-#if GFX_VER >= 6
- brw_batch_emit(brw, GENX(3DSTATE_VS), vs) {
-#else
- ctx->NewDriverState |= BRW_NEW_GFX4_UNIT_STATE;
- brw_state_emit(brw, GENX(VS_STATE), 32, &stage_state->state_offset, vs) {
-#endif
- INIT_THREAD_DISPATCH_FIELDS(vs, Vertex);
-
- vs.MaximumNumberofThreads = devinfo->max_vs_threads - 1;
-
-#if GFX_VER < 6
- vs.GRFRegisterCount = DIV_ROUND_UP(vue_prog_data->total_grf, 16) - 1;
- vs.ConstantURBEntryReadLength = stage_prog_data->curb_read_length;
- vs.ConstantURBEntryReadOffset = brw->curbe.vs_start * 2;
-
- vs.NumberofURBEntries = brw->urb.nr_vs_entries >> (GFX_VER == 5 ? 2 : 0);
- vs.URBEntryAllocationSize = brw->urb.vsize - 1;
-
- vs.MaximumNumberofThreads =
- CLAMP(brw->urb.nr_vs_entries / 2, 1, devinfo->max_vs_threads) - 1;
-
- vs.StatisticsEnable = false;
- vs.SamplerStatePointer =
- ro_bo(brw->batch.state.bo, stage_state->sampler_offset);
-#endif
-
-#if GFX_VER == 5
- /* Force single program flow on Ironlake. We cannot reliably get
- * all applications working without it. See:
- * https://bugs.freedesktop.org/show_bug.cgi?id=29172
- *
- * The most notable and reliably failing application is the Humus
- * demo "CelShading"
- */
- vs.SingleProgramFlow = true;
- vs.SamplerCount = 0; /* hardware requirement */
-#endif
-
-#if GFX_VER >= 8
- vs.SIMD8DispatchEnable =
- vue_prog_data->dispatch_mode == DISPATCH_MODE_SIMD8;
-
- vs.UserClipDistanceCullTestEnableBitmask =
- vue_prog_data->cull_distance_mask;
-#endif
- }
-
-#if GFX_VER == 6
- /* Based on my reading of the simulator, the VS constants don't get
- * pulled into the VS FF unit until an appropriate pipeline flush
- * happens, and instead the 3DSTATE_CONSTANT_VS packet just adds
- * references to them into a little FIFO. The flushes are common,
- * but don't reliably happen between this and a 3DPRIMITIVE, causing
- * the primitive to use the wrong constants. Then the FIFO
- * containing the constant setup gets added to again on the next
- * constants change, and eventually when a flush does happen the
- * unit is overwhelmed by constant changes and dies.
- *
- * To avoid this, send a PIPE_CONTROL down the line that will
- * update the unit immediately loading the constants. The flush
- * type bits here were those set by the STATE_BASE_ADDRESS whose
- * move in a82a43e8d99e1715dd11c9c091b5ab734079b6a6 triggered the
- * bug reports that led to this workaround, and may be more than
- * what is strictly required to avoid the issue.
- */
- brw_emit_pipe_control_flush(brw,
- PIPE_CONTROL_DEPTH_STALL |
- PIPE_CONTROL_INSTRUCTION_INVALIDATE |
- PIPE_CONTROL_STATE_CACHE_INVALIDATE);
-#endif
-}
-
-static const struct brw_tracked_state genX(vs_state) = {
- .dirty = {
- .mesa = (GFX_VER == 6 ? (_NEW_PROGRAM_CONSTANTS | _NEW_TRANSFORM) : 0),
- .brw = BRW_NEW_BATCH |
- BRW_NEW_BLORP |
- BRW_NEW_CONTEXT |
- BRW_NEW_VS_PROG_DATA |
- (GFX_VER == 6 ? BRW_NEW_VERTEX_PROGRAM : 0) |
- (GFX_VER <= 5 ? BRW_NEW_PUSH_CONSTANT_ALLOCATION |
- BRW_NEW_PROGRAM_CACHE |
- BRW_NEW_SAMPLER_STATE_TABLE |
- BRW_NEW_URB_FENCE
- : 0),
- },
- .emit = genX(upload_vs_state),
-};
-
-/* ---------------------------------------------------------------------- */
-
-static void
-genX(upload_cc_viewport)(struct brw_context *brw)
-{
- struct gl_context *ctx = &brw->ctx;
-
- /* BRW_NEW_VIEWPORT_COUNT */
- const unsigned viewport_count = brw->clip.viewport_count;
-
- struct GENX(CC_VIEWPORT) ccv;
- uint32_t cc_vp_offset;
- uint32_t *cc_map =
- brw_state_batch(brw, 4 * GENX(CC_VIEWPORT_length) * viewport_count,
- 32, &cc_vp_offset);
-
- for (unsigned i = 0; i < viewport_count; i++) {
- /* _NEW_VIEWPORT | _NEW_TRANSFORM */
- const struct gl_viewport_attrib *vp = &ctx->ViewportArray[i];
- if (ctx->Transform.DepthClampNear && ctx->Transform.DepthClampFar) {
- ccv.MinimumDepth = MIN2(vp->Near, vp->Far);
- ccv.MaximumDepth = MAX2(vp->Near, vp->Far);
- } else if (ctx->Transform.DepthClampNear) {
- ccv.MinimumDepth = MIN2(vp->Near, vp->Far);
- ccv.MaximumDepth = 0.0;
- } else if (ctx->Transform.DepthClampFar) {
- ccv.MinimumDepth = 0.0;
- ccv.MaximumDepth = MAX2(vp->Near, vp->Far);
- } else {
- ccv.MinimumDepth = 0.0;
- ccv.MaximumDepth = 1.0;
- }
- GENX(CC_VIEWPORT_pack)(NULL, cc_map, &ccv);
- cc_map += GENX(CC_VIEWPORT_length);
- }
-
-#if GFX_VER >= 7
- brw_batch_emit(brw, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), ptr) {
- ptr.CCViewportPointer = cc_vp_offset;
- }
-#elif GFX_VER == 6
- brw_batch_emit(brw, GENX(3DSTATE_VIEWPORT_STATE_POINTERS), vp) {
- vp.CCViewportStateChange = 1;
- vp.PointertoCC_VIEWPORT = cc_vp_offset;
- }
-#else
- brw->cc.vp_offset = cc_vp_offset;
- ctx->NewDriverState |= BRW_NEW_CC_VP;
-#endif
-}
-
-const struct brw_tracked_state genX(cc_vp) = {
- .dirty = {
- .mesa = _NEW_TRANSFORM |
- _NEW_VIEWPORT,
- .brw = BRW_NEW_BATCH |
- BRW_NEW_BLORP |
- BRW_NEW_VIEWPORT_COUNT,
- },
- .emit = genX(upload_cc_viewport)
-};
-
-/* ---------------------------------------------------------------------- */
-
-static void
-set_scissor_bits(const struct gl_context *ctx, int i,
- bool flip_y, unsigned fb_width, unsigned fb_height,
- struct GENX(SCISSOR_RECT) *sc)
-{
- int bbox[4];
-
- bbox[0] = MAX2(ctx->ViewportArray[i].X, 0);
- bbox[1] = MIN2(bbox[0] + ctx->ViewportArray[i].Width, fb_width);
- bbox[2] = CLAMP(ctx->ViewportArray[i].Y, 0, fb_height);
- bbox[3] = MIN2(bbox[2] + ctx->ViewportArray[i].Height, fb_height);
- _mesa_intersect_scissor_bounding_box(ctx, i, bbox);
-
- if (bbox[0] == bbox[1] || bbox[2] == bbox[3]) {
- /* If the scissor was out of bounds and got clamped to 0 width/height
- * at the bounds, the subtraction of 1 from maximums could produce a
- * negative number and thus not clip anything. Instead, just provide
- * a min > max scissor inside the bounds, which produces the expected
- * no rendering.
- */
- sc->ScissorRectangleXMin = 1;
- sc->ScissorRectangleXMax = 0;
- sc->ScissorRectangleYMin = 1;
- sc->ScissorRectangleYMax = 0;
- } else if (!flip_y) {
- /* texmemory: Y=0=bottom */
- sc->ScissorRectangleXMin = bbox[0];
- sc->ScissorRectangleXMax = bbox[1] - 1;
- sc->ScissorRectangleYMin = bbox[2];
- sc->ScissorRectangleYMax = bbox[3] - 1;
- } else {
- /* memory: Y=0=top */
- sc->ScissorRectangleXMin = bbox[0];
- sc->ScissorRectangleXMax = bbox[1] - 1;
- sc->ScissorRectangleYMin = fb_height - bbox[3];
- sc->ScissorRectangleYMax = fb_height - bbox[2] - 1;
- }
-}
-
-#if GFX_VER >= 6
-static void
-genX(upload_scissor_state)(struct brw_context *brw)
-{
- struct gl_context *ctx = &brw->ctx;
- const bool flip_y = ctx->DrawBuffer->FlipY;
- struct GENX(SCISSOR_RECT) scissor;
- uint32_t scissor_state_offset;
- const unsigned int fb_width = _mesa_geometric_width(ctx->DrawBuffer);
- const unsigned int fb_height = _mesa_geometric_height(ctx->DrawBuffer);
- uint32_t *scissor_map;
-
- /* BRW_NEW_VIEWPORT_COUNT */
- const unsigned viewport_count = brw->clip.viewport_count;
- /* Wa_1409725701:
- * "The viewport-specific state used by the SF unit (SCISSOR_RECT) is
- * stored as an array of up to 16 elements. The location of first
- * element of the array, as specified by Pointer to SCISSOR_RECT, should
- * be aligned to a 64-byte boundary.
- */
- const unsigned alignment = 64;
- scissor_map = brw_state_batch(
- brw, GENX(SCISSOR_RECT_length) * sizeof(uint32_t) * viewport_count,
- alignment, &scissor_state_offset);
-
- /* _NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT */
-
- /* The scissor only needs to handle the intersection of drawable and
- * scissor rect. Clipping to the boundaries of static shared buffers
- * for front/back/depth is covered by looping over cliprects in brw_draw.c.
- *
- * Note that the hardware's coordinates are inclusive, while Mesa's min is
- * inclusive but max is exclusive.
- */
- for (unsigned i = 0; i < viewport_count; i++) {
- set_scissor_bits(ctx, i, flip_y, fb_width, fb_height, &scissor);
- GENX(SCISSOR_RECT_pack)(
- NULL, scissor_map + i * GENX(SCISSOR_RECT_length), &scissor);
- }
-
- brw_batch_emit(brw, GENX(3DSTATE_SCISSOR_STATE_POINTERS), ptr) {
- ptr.ScissorRectPointer = scissor_state_offset;
- }
-}
-
-static const struct brw_tracked_state genX(scissor_state) = {
- .dirty = {
- .mesa = _NEW_BUFFERS |
- _NEW_SCISSOR |
- _NEW_VIEWPORT,
- .brw = BRW_NEW_BATCH |
- BRW_NEW_BLORP |
- BRW_NEW_VIEWPORT_COUNT,
- },
- .emit = genX(upload_scissor_state),
-};
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-static void
-genX(upload_sf_clip_viewport)(struct brw_context *brw)
-{
- struct gl_context *ctx = &brw->ctx;
- float y_scale, y_bias;
-
- /* BRW_NEW_VIEWPORT_COUNT */
- const unsigned viewport_count = brw->clip.viewport_count;
-
- /* _NEW_BUFFERS */
- const bool flip_y = ctx->DrawBuffer->FlipY;
- const uint32_t fb_width = (float)_mesa_geometric_width(ctx->DrawBuffer);
- const uint32_t fb_height = (float)_mesa_geometric_height(ctx->DrawBuffer);
-
-#if GFX_VER >= 7
-#define clv sfv
- struct GENX(SF_CLIP_VIEWPORT) sfv;
- uint32_t sf_clip_vp_offset;
- uint32_t *sf_clip_map =
- brw_state_batch(brw, GENX(SF_CLIP_VIEWPORT_length) * 4 * viewport_count,
- 64, &sf_clip_vp_offset);
-#else
- struct GENX(SF_VIEWPORT) sfv;
- struct GENX(CLIP_VIEWPORT) clv;
- uint32_t sf_vp_offset, clip_vp_offset;
- uint32_t *sf_map =
- brw_state_batch(brw, GENX(SF_VIEWPORT_length) * 4 * viewport_count,
- 32, &sf_vp_offset);
- uint32_t *clip_map =
- brw_state_batch(brw, GENX(CLIP_VIEWPORT_length) * 4 * viewport_count,
- 32, &clip_vp_offset);
-#endif
-
- /* _NEW_BUFFERS */
- if (flip_y) {
- y_scale = -1.0;
- y_bias = (float)fb_height;
- } else {
- y_scale = 1.0;
- y_bias = 0;
- }
-
- for (unsigned i = 0; i < brw->clip.viewport_count; i++) {
- /* _NEW_VIEWPORT: Guardband Clipping */
- float scale[3], translate[3], gb_xmin, gb_xmax, gb_ymin, gb_ymax;
- _mesa_get_viewport_xform(ctx, i, scale, translate);
-
- sfv.ViewportMatrixElementm00 = scale[0];
- sfv.ViewportMatrixElementm11 = scale[1] * y_scale,
- sfv.ViewportMatrixElementm22 = scale[2],
- sfv.ViewportMatrixElementm30 = translate[0],
- sfv.ViewportMatrixElementm31 = translate[1] * y_scale + y_bias,
- sfv.ViewportMatrixElementm32 = translate[2],
- intel_calculate_guardband_size(fb_width, fb_height,
- sfv.ViewportMatrixElementm00,
- sfv.ViewportMatrixElementm11,
- sfv.ViewportMatrixElementm30,
- sfv.ViewportMatrixElementm31,
- &gb_xmin, &gb_xmax, &gb_ymin, &gb_ymax);
-
-
- clv.XMinClipGuardband = gb_xmin;
- clv.XMaxClipGuardband = gb_xmax;
- clv.YMinClipGuardband = gb_ymin;
- clv.YMaxClipGuardband = gb_ymax;
-
-#if GFX_VER < 6
- set_scissor_bits(ctx, i, flip_y, fb_width, fb_height,
- &sfv.ScissorRectangle);
-#elif GFX_VER >= 8
- /* _NEW_VIEWPORT | _NEW_BUFFERS: Screen Space Viewport
- * The hardware will take the intersection of the drawing rectangle,
- * scissor rectangle, and the viewport extents. However, emitting
- * 3DSTATE_DRAWING_RECTANGLE is expensive since it requires a full
- * pipeline stall so we're better off just being a little more clever
- * with our viewport so we can emit it once at context creation time.
- */
- const float viewport_Xmin = MAX2(ctx->ViewportArray[i].X, 0);
- const float viewport_Ymin = MAX2(ctx->ViewportArray[i].Y, 0);
- const float viewport_Xmax =
- MIN2(ctx->ViewportArray[i].X + ctx->ViewportArray[i].Width, fb_width);
- const float viewport_Ymax =
- MIN2(ctx->ViewportArray[i].Y + ctx->ViewportArray[i].Height, fb_height);
-
- if (flip_y) {
- sfv.XMinViewPort = viewport_Xmin;
- sfv.XMaxViewPort = viewport_Xmax - 1;
- sfv.YMinViewPort = fb_height - viewport_Ymax;
- sfv.YMaxViewPort = fb_height - viewport_Ymin - 1;
- } else {
- sfv.XMinViewPort = viewport_Xmin;
- sfv.XMaxViewPort = viewport_Xmax - 1;
- sfv.YMinViewPort = viewport_Ymin;
- sfv.YMaxViewPort = viewport_Ymax - 1;
- }
-#endif
-
-#if GFX_VER >= 7
- GENX(SF_CLIP_VIEWPORT_pack)(NULL, sf_clip_map, &sfv);
- sf_clip_map += GENX(SF_CLIP_VIEWPORT_length);
-#else
- GENX(SF_VIEWPORT_pack)(NULL, sf_map, &sfv);
- GENX(CLIP_VIEWPORT_pack)(NULL, clip_map, &clv);
- sf_map += GENX(SF_VIEWPORT_length);
- clip_map += GENX(CLIP_VIEWPORT_length);
-#endif
- }
-
-#if GFX_VER >= 7
- brw_batch_emit(brw, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), ptr) {
- ptr.SFClipViewportPointer = sf_clip_vp_offset;
- }
-#elif GFX_VER == 6
- brw_batch_emit(brw, GENX(3DSTATE_VIEWPORT_STATE_POINTERS), vp) {
- vp.SFViewportStateChange = 1;
- vp.CLIPViewportStateChange = 1;
- vp.PointertoCLIP_VIEWPORT = clip_vp_offset;
- vp.PointertoSF_VIEWPORT = sf_vp_offset;
- }
-#else
- brw->sf.vp_offset = sf_vp_offset;
- brw->clip.vp_offset = clip_vp_offset;
- brw->ctx.NewDriverState |= BRW_NEW_SF_VP | BRW_NEW_CLIP_VP;
-#endif
-}
-
-static const struct brw_tracked_state genX(sf_clip_viewport) = {
- .dirty = {
- .mesa = _NEW_BUFFERS |
- _NEW_VIEWPORT |
- (GFX_VER <= 5 ? _NEW_SCISSOR : 0),
- .brw = BRW_NEW_BATCH |
- BRW_NEW_BLORP |
- BRW_NEW_VIEWPORT_COUNT,
- },
- .emit = genX(upload_sf_clip_viewport),
-};
-
-/* ---------------------------------------------------------------------- */
-
-static void
-genX(upload_gs_state)(struct brw_context *brw)
-{
- UNUSED struct gl_context *ctx = &brw->ctx;
- UNUSED const struct intel_device_info *devinfo = &brw->screen->devinfo;
- const struct brw_stage_state *stage_state = &brw->gs.base;
- const struct gl_program *gs_prog = brw->programs[MESA_SHADER_GEOMETRY];
- /* BRW_NEW_GEOMETRY_PROGRAM */
- bool active = GFX_VER >= 6 && gs_prog;
-
- /* BRW_NEW_GS_PROG_DATA */
- struct brw_stage_prog_data *stage_prog_data = stage_state->prog_data;
- UNUSED const struct brw_vue_prog_data *vue_prog_data =
- brw_vue_prog_data(stage_prog_data);
-#if GFX_VER >= 7
- const struct brw_gs_prog_data *gs_prog_data =
- brw_gs_prog_data(stage_prog_data);
-#endif
-
-#if GFX_VER == 6
- brw_batch_emit(brw, GENX(3DSTATE_CONSTANT_GS), cgs) {
- if (active && stage_state->push_const_size != 0) {
- cgs.Buffer0Valid = true;
- cgs.ConstantBody.PointertoConstantBuffer0 = stage_state->push_const_offset;
- cgs.ConstantBody.ConstantBuffer0ReadLength = stage_state->push_const_size - 1;
- }
- }
-#endif
-
-#if GFX_VERx10 == 70
- /**
- * From Graphics BSpec: 3D-Media-GPGPU Engine > 3D Pipeline Stages >
- * Geometry > Geometry Shader > State:
- *
- * "Note: Because of corruption in IVB:GT2, software needs to flush the
- * whole fixed function pipeline when the GS enable changes value in
- * the 3DSTATE_GS."
- *
- * The hardware architects have clarified that in this context "flush the
- * whole fixed function pipeline" means to emit a PIPE_CONTROL with the "CS
- * Stall" bit set.
- */
- if (devinfo->gt == 2 && brw->gs.enabled != active)
- gfx7_emit_cs_stall_flush(brw);
-#endif
-
-#if GFX_VER >= 6
- brw_batch_emit(brw, GENX(3DSTATE_GS), gs) {
-#else
- ctx->NewDriverState |= BRW_NEW_GFX4_UNIT_STATE;
- brw_state_emit(brw, GENX(GS_STATE), 32, &brw->ff_gs.state_offset, gs) {
-#endif
-
-#if GFX_VER >= 6
- if (active) {
- INIT_THREAD_DISPATCH_FIELDS(gs, Vertex);
-
-#if GFX_VER >= 7
- gs.OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1;
- gs.OutputTopology = gs_prog_data->output_topology;
- gs.ControlDataHeaderSize =
- gs_prog_data->control_data_header_size_hwords;
-
- gs.InstanceControl = gs_prog_data->invocations - 1;
- gs.DispatchMode = vue_prog_data->dispatch_mode;
-
- gs.IncludePrimitiveID = gs_prog_data->include_primitive_id;
-
- gs.ControlDataFormat = gs_prog_data->control_data_format;
-#endif
-
- /* Note: the meaning of the GFX7_GS_REORDER_TRAILING bit changes between
- * Ivy Bridge and Haswell.
- *
- * On Ivy Bridge, setting this bit causes the vertices of a triangle
- * strip to be delivered to the geometry shader in an order that does
- * not strictly follow the OpenGL spec, but preserves triangle
- * orientation. For example, if the vertices are (1, 2, 3, 4, 5), then
- * the geometry shader sees triangles:
- *
- * (1, 2, 3), (2, 4, 3), (3, 4, 5)
- *
- * (Clearing the bit is even worse, because it fails to preserve
- * orientation).
- *
- * Triangle strips with adjacency always ordered in a way that preserves
- * triangle orientation but does not strictly follow the OpenGL spec,
- * regardless of the setting of this bit.
- *
- * On Haswell, both triangle strips and triangle strips with adjacency
- * are always ordered in a way that preserves triangle orientation.
- * Setting this bit causes the ordering to strictly follow the OpenGL
- * spec.
- *
- * So in either case we want to set the bit. Unfortunately on Ivy
- * Bridge this will get the order close to correct but not perfect.
- */
- gs.ReorderMode = TRAILING;
- gs.MaximumNumberofThreads =
- GFX_VER == 8 ? (devinfo->max_gs_threads / 2 - 1)
- : (devinfo->max_gs_threads - 1);
-
-#if GFX_VER < 7
- gs.SOStatisticsEnable = true;
- if (gs_prog->info.has_transform_feedback_varyings)
- gs.SVBIPayloadEnable = _mesa_is_xfb_active_and_unpaused(ctx);
-
- /* GFX6_GS_SPF_MODE and GFX6_GS_VECTOR_MASK_ENABLE are enabled as it
- * was previously done for gfx6.
- *
- * TODO: test with both disabled to see if the HW is behaving
- * as expected, like in gfx7.
- */
- gs.SingleProgramFlow = true;
- gs.VectorMaskEnable = true;
-#endif
-
-#if GFX_VER >= 8
- gs.ExpectedVertexCount = gs_prog_data->vertices_in;
-
- if (gs_prog_data->static_vertex_count != -1) {
- gs.StaticOutput = true;
- gs.StaticOutputVertexCount = gs_prog_data->static_vertex_count;
- }
- gs.IncludeVertexHandles = vue_prog_data->include_vue_handles;
-
- gs.UserClipDistanceCullTestEnableBitmask =
- vue_prog_data->cull_distance_mask;
-
- const int urb_entry_write_offset = 1;
- const uint32_t urb_entry_output_length =
- DIV_ROUND_UP(vue_prog_data->vue_map.num_slots, 2) -
- urb_entry_write_offset;
-
- gs.VertexURBEntryOutputReadOffset = urb_entry_write_offset;
- gs.VertexURBEntryOutputLength = MAX2(urb_entry_output_length, 1);
-#endif
- }
-#endif
-
-#if GFX_VER <= 6
- if (!active && brw->ff_gs.prog_active) {
- /* In gfx6, transform feedback for the VS stage is done with an
- * ad-hoc GS program. This function provides the needed 3DSTATE_GS
- * for this.
- */
- gs.KernelStartPointer = KSP(brw, brw->ff_gs.prog_offset);
- gs.SingleProgramFlow = true;
- gs.DispatchGRFStartRegisterForURBData = GFX_VER == 6 ? 2 : 1;
- gs.VertexURBEntryReadLength = brw->ff_gs.prog_data->urb_read_length;
-
-#if GFX_VER <= 5
- gs.GRFRegisterCount =
- DIV_ROUND_UP(brw->ff_gs.prog_data->total_grf, 16) - 1;
- /* BRW_NEW_URB_FENCE */
- gs.NumberofURBEntries = brw->urb.nr_gs_entries;
- gs.URBEntryAllocationSize = brw->urb.vsize - 1;
- gs.MaximumNumberofThreads = brw->urb.nr_gs_entries >= 8 ? 1 : 0;
- gs.FloatingPointMode = FLOATING_POINT_MODE_Alternate;
-#else
- gs.Enable = true;
- gs.VectorMaskEnable = true;
- gs.SVBIPayloadEnable = true;
- gs.SVBIPostIncrementEnable = true;
- gs.SVBIPostIncrementValue =
- brw->ff_gs.prog_data->svbi_postincrement_value;
- gs.SOStatisticsEnable = true;
- gs.MaximumNumberofThreads = devinfo->max_gs_threads - 1;
-#endif
- }
-#endif
- if (!active && !brw->ff_gs.prog_active) {
-#if GFX_VER < 8
- gs.DispatchGRFStartRegisterForURBData = 1;
-#if GFX_VER >= 7
- gs.IncludeVertexHandles = true;
-#endif
-#endif
- }
-
-#if GFX_VER >= 6
- gs.StatisticsEnable = true;
-#endif
-#if GFX_VER == 5 || GFX_VER == 6
- gs.RenderingEnabled = true;
-#endif
-#if GFX_VER <= 5
- gs.MaximumVPIndex = brw->clip.viewport_count - 1;
-#endif
- }
-
-#if GFX_VER == 6
- brw->gs.enabled = active;
-#endif
-}
-
-static const struct brw_tracked_state genX(gs_state) = {
- .dirty = {
- .mesa = (GFX_VER == 6 ? _NEW_PROGRAM_CONSTANTS : 0),
- .brw = BRW_NEW_BATCH |
- BRW_NEW_BLORP |
- (GFX_VER <= 5 ? BRW_NEW_PUSH_CONSTANT_ALLOCATION |
- BRW_NEW_PROGRAM_CACHE |
- BRW_NEW_URB_FENCE |
- BRW_NEW_VIEWPORT_COUNT
- : 0) |
- (GFX_VER >= 6 ? BRW_NEW_CONTEXT |
- BRW_NEW_GEOMETRY_PROGRAM |
- BRW_NEW_GS_PROG_DATA
- : 0) |
- (GFX_VER < 7 ? BRW_NEW_FF_GS_PROG_DATA : 0),
- },
- .emit = genX(upload_gs_state),
-};
-
-/* ---------------------------------------------------------------------- */
-
-UNUSED static GLenum
-fix_dual_blend_alpha_to_one(GLenum function)
-{
- switch (function) {
- case GL_SRC1_ALPHA:
- return GL_ONE;
-
- case GL_ONE_MINUS_SRC1_ALPHA:
- return GL_ZERO;
- }
-
- return function;
-}
-
-#define blend_factor(x) brw_translate_blend_factor(x)
-#define blend_eqn(x) brw_translate_blend_equation(x)
-
-/**
- * Modify blend function to force destination alpha to 1.0
- *
- * If \c function specifies a blend function that uses destination alpha,
- * replace it with a function that hard-wires destination alpha to 1.0. This
- * is used when rendering to xRGB targets.
- */
-static GLenum
-brw_fix_xRGB_alpha(GLenum function)
-{
- switch (function) {
- case GL_DST_ALPHA:
- return GL_ONE;
-
- case GL_ONE_MINUS_DST_ALPHA:
- case GL_SRC_ALPHA_SATURATE:
- return GL_ZERO;
- }
-
- return function;
-}
-
-#if GFX_VER >= 6
-typedef struct GENX(BLEND_STATE_ENTRY) BLEND_ENTRY_GENXML;
-#else
-typedef struct GENX(COLOR_CALC_STATE) BLEND_ENTRY_GENXML;
-#endif
-
-UNUSED static bool
-set_blend_entry_bits(struct brw_context *brw, BLEND_ENTRY_GENXML *entry, int i,
- bool alpha_to_one)
-{
- struct gl_context *ctx = &brw->ctx;
-
- /* _NEW_BUFFERS */
- const struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i];
-
- bool independent_alpha_blend = false;
-
- /* Used for implementing the following bit of GL_EXT_texture_integer:
- * "Per-fragment operations that require floating-point color
- * components, including multisample alpha operations, alpha test,
- * blending, and dithering, have no effect when the corresponding
- * colors are written to an integer color buffer."
- */
- const bool integer = ctx->DrawBuffer->_IntegerBuffers & (0x1 << i);
-
- const unsigned blend_enabled = GFX_VER >= 6 ?
- ctx->Color.BlendEnabled & (1 << i) : ctx->Color.BlendEnabled;
-
- /* _NEW_COLOR */
- if (ctx->Color.ColorLogicOpEnabled) {
- GLenum rb_type = rb ? _mesa_get_format_datatype(rb->Format)
- : GL_UNSIGNED_NORMALIZED;
- WARN_ONCE(ctx->Color.LogicOp != GL_COPY &&
- rb_type != GL_UNSIGNED_NORMALIZED &&
- rb_type != GL_FLOAT, "Ignoring %s logic op on %s "
- "renderbuffer\n",
- _mesa_enum_to_string(ctx->Color.LogicOp),
- _mesa_enum_to_string(rb_type));
- if (GFX_VER >= 8 || rb_type == GL_UNSIGNED_NORMALIZED) {
- entry->LogicOpEnable = true;
- entry->LogicOpFunction = ctx->Color._LogicOp;
- }
- } else if (blend_enabled &&
- ctx->Color._AdvancedBlendMode == BLEND_NONE
- && (GFX_VER <= 5 || !integer)) {
- GLenum eqRGB = ctx->Color.Blend[i].EquationRGB;
- GLenum eqA = ctx->Color.Blend[i].EquationA;
- GLenum srcRGB = ctx->Color.Blend[i].SrcRGB;
- GLenum dstRGB = ctx->Color.Blend[i].DstRGB;
- GLenum srcA = ctx->Color.Blend[i].SrcA;
- GLenum dstA = ctx->Color.Blend[i].DstA;
-
- if (eqRGB == GL_MIN || eqRGB == GL_MAX)
- srcRGB = dstRGB = GL_ONE;
-
- if (eqA == GL_MIN || eqA == GL_MAX)
- srcA = dstA = GL_ONE;
-
- /* Due to hardware limitations, the destination may have information
- * in an alpha channel even when the format specifies no alpha
- * channel. In order to avoid getting any incorrect blending due to
- * that alpha channel, coerce the blend factors to values that will
- * not read the alpha channel, but will instead use the correct
- * implicit value for alpha.
- */
- if (rb && !_mesa_base_format_has_channel(rb->_BaseFormat,
- GL_TEXTURE_ALPHA_TYPE)) {
- srcRGB = brw_fix_xRGB_alpha(srcRGB);
- srcA = brw_fix_xRGB_alpha(srcA);
- dstRGB = brw_fix_xRGB_alpha(dstRGB);
- dstA = brw_fix_xRGB_alpha(dstA);
- }
-
- /* From the BLEND_STATE docs, DWord 0, Bit 29 (AlphaToOne Enable):
- * "If Dual Source Blending is enabled, this bit must be disabled."
- *
- * We override SRC1_ALPHA to ONE and ONE_MINUS_SRC1_ALPHA to ZERO,
- * and leave it enabled anyway.
- */
- if (GFX_VER >= 6 && ctx->Color._BlendUsesDualSrc & (1 << i) && alpha_to_one) {
- srcRGB = fix_dual_blend_alpha_to_one(srcRGB);
- srcA = fix_dual_blend_alpha_to_one(srcA);
- dstRGB = fix_dual_blend_alpha_to_one(dstRGB);
- dstA = fix_dual_blend_alpha_to_one(dstA);
- }
-
- /* BRW_NEW_FS_PROG_DATA */
- const struct brw_wm_prog_data *wm_prog_data =
- brw_wm_prog_data(brw->wm.base.prog_data);
-
- /* The Dual Source Blending documentation says:
- *
- * "If SRC1 is included in a src/dst blend factor and
- * a DualSource RT Write message is not used, results
- * are UNDEFINED. (This reflects the same restriction in DX APIs,
- * where undefined results are produced if “o1” is not written
- * by a PS – there are no default values defined).
- * If SRC1 is not included in a src/dst blend factor,
- * dual source blending must be disabled."
- *
- * There is no way to gracefully fix this undefined situation
- * so we just disable the blending to prevent possible issues.
- */
- entry->ColorBufferBlendEnable =
- !(ctx->Color._BlendUsesDualSrc & 0x1) || wm_prog_data->dual_src_blend;
-
- entry->DestinationBlendFactor = blend_factor(dstRGB);
- entry->SourceBlendFactor = blend_factor(srcRGB);
- entry->DestinationAlphaBlendFactor = blend_factor(dstA);
- entry->SourceAlphaBlendFactor = blend_factor(srcA);
- entry->ColorBlendFunction = blend_eqn(eqRGB);
- entry->AlphaBlendFunction = blend_eqn(eqA);
-
- if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB)
- independent_alpha_blend = true;
- }
-
- return independent_alpha_blend;
-}
-
-#if GFX_VER >= 6
-static void
-genX(upload_blend_state)(struct brw_context *brw)
-{
- struct gl_context *ctx = &brw->ctx;
- int size;
-
- /* We need at least one BLEND_STATE written, because we might do
- * thread dispatch even if _NumColorDrawBuffers is 0 (for example
- * for computed depth or alpha test), which will do an FB write
- * with render target 0, which will reference BLEND_STATE[0] for
- * alpha test enable.
- */
- int nr_draw_buffers = ctx->DrawBuffer->_NumColorDrawBuffers;
- if (nr_draw_buffers == 0 && ctx->Color.AlphaEnabled)
- nr_draw_buffers = 1;
-
- size = GENX(BLEND_STATE_ENTRY_length) * 4 * nr_draw_buffers;
-#if GFX_VER >= 8
- size += GENX(BLEND_STATE_length) * 4;
-#endif
-
- uint32_t *blend_map;
- blend_map = brw_state_batch(brw, size, 64, &brw->cc.blend_state_offset);
-
-#if GFX_VER >= 8
- struct GENX(BLEND_STATE) blend = { 0 };
- {
-#else
- for (int i = 0; i < nr_draw_buffers; i++) {
- struct GENX(BLEND_STATE_ENTRY) entry = { 0 };
-#define blend entry
-#endif
- /* OpenGL specification 3.3 (page 196), section 4.1.3 says:
- * "If drawbuffer zero is not NONE and the buffer it references has an
- * integer format, the SAMPLE_ALPHA_TO_COVERAGE and SAMPLE_ALPHA_TO_ONE
- * operations are skipped."
- */
- if (!(ctx->DrawBuffer->_IntegerBuffers & 0x1)) {
- /* _NEW_MULTISAMPLE */
- if (_mesa_is_multisample_enabled(ctx)) {
- if (ctx->Multisample.SampleAlphaToCoverage) {
- blend.AlphaToCoverageEnable = true;
- blend.AlphaToCoverageDitherEnable = GFX_VER >= 7;
- }
- if (ctx->Multisample.SampleAlphaToOne)
- blend.AlphaToOneEnable = true;
- }
-
- /* _NEW_COLOR */
- if (ctx->Color.AlphaEnabled) {
- blend.AlphaTestEnable = true;
- blend.AlphaTestFunction =
- brw_translate_compare_func(ctx->Color.AlphaFunc);
- }
-
- if (ctx->Color.DitherFlag) {
- blend.ColorDitherEnable = true;
- }
- }
-
-#if GFX_VER >= 8
- for (int i = 0; i < nr_draw_buffers; i++) {
- struct GENX(BLEND_STATE_ENTRY) entry = { 0 };
-#else
- {
-#endif
- blend.IndependentAlphaBlendEnable =
- set_blend_entry_bits(brw, &entry, i, blend.AlphaToOneEnable) ||
- blend.IndependentAlphaBlendEnable;
-
- /* See section 8.1.6 "Pre-Blend Color Clamping" of the
- * SandyBridge PRM Volume 2 Part 1 for HW requirements.
- *
- * We do our ARB_color_buffer_float CLAMP_FRAGMENT_COLOR
- * clamping in the fragment shader. For its clamping of
- * blending, the spec says:
- *
- * "RESOLVED: For fixed-point color buffers, the inputs and
- * the result of the blending equation are clamped. For
- * floating-point color buffers, no clamping occurs."
- *
- * So, generally, we want clamping to the render target's range.
- * And, good news, the hardware tables for both pre- and
- * post-blend color clamping are either ignored, or any are
- * allowed, or clamping is required but RT range clamping is a
- * valid option.
- */
- entry.PreBlendColorClampEnable = true;
- entry.PostBlendColorClampEnable = true;
- entry.ColorClampRange = COLORCLAMP_RTFORMAT;
-
- entry.WriteDisableRed = !GET_COLORMASK_BIT(ctx->Color.ColorMask, i, 0);
- entry.WriteDisableGreen = !GET_COLORMASK_BIT(ctx->Color.ColorMask, i, 1);
- entry.WriteDisableBlue = !GET_COLORMASK_BIT(ctx->Color.ColorMask, i, 2);
- entry.WriteDisableAlpha = !GET_COLORMASK_BIT(ctx->Color.ColorMask, i, 3);
-
-#if GFX_VER >= 8
- GENX(BLEND_STATE_ENTRY_pack)(NULL, &blend_map[1 + i * 2], &entry);
-#else
- GENX(BLEND_STATE_ENTRY_pack)(NULL, &blend_map[i * 2], &entry);
-#endif
- }
- }
-
-#if GFX_VER >= 8
- GENX(BLEND_STATE_pack)(NULL, blend_map, &blend);
-#endif
-
-#if GFX_VER < 7
- brw_batch_emit(brw, GENX(3DSTATE_CC_STATE_POINTERS), ptr) {
- ptr.PointertoBLEND_STATE = brw->cc.blend_state_offset;
- ptr.BLEND_STATEChange = true;
- }
-#else
- brw_batch_emit(brw, GENX(3DSTATE_BLEND_STATE_POINTERS), ptr) {
- ptr.BlendStatePointer = brw->cc.blend_state_offset;
-#if GFX_VER >= 8
- ptr.BlendStatePointerValid = true;
-#endif
- }
-#endif
-}
-
-UNUSED static const struct brw_tracked_state genX(blend_state) = {
- .dirty = {
- .mesa = _NEW_BUFFERS |
- _NEW_COLOR |
- _NEW_MULTISAMPLE,
- .brw = BRW_NEW_BATCH |
- BRW_NEW_BLORP |
- BRW_NEW_FS_PROG_DATA |
- BRW_NEW_STATE_BASE_ADDRESS,
- },
- .emit = genX(upload_blend_state),
-};
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-#if GFX_VER >= 7
-UNUSED static const uint32_t push_constant_opcodes[] = {
- [MESA_SHADER_VERTEX] = 21,
- [MESA_SHADER_TESS_CTRL] = 25, /* HS */
- [MESA_SHADER_TESS_EVAL] = 26, /* DS */
- [MESA_SHADER_GEOMETRY] = 22,
- [MESA_SHADER_FRAGMENT] = 23,
- [MESA_SHADER_COMPUTE] = 0,
-};
-
-static void
-genX(upload_push_constant_packets)(struct brw_context *brw)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- struct gl_context *ctx = &brw->ctx;
-
- UNUSED uint32_t mocs = brw_mocs(&brw->isl_dev, NULL);
-
- struct brw_stage_state *stage_states[] = {
- &brw->vs.base,
- &brw->tcs.base,
- &brw->tes.base,
- &brw->gs.base,
- &brw->wm.base,
- };
-
-
- if (GFX_VERx10 == 70 &&
- devinfo->platform == INTEL_PLATFORM_IVB &&
- stage_states[MESA_SHADER_VERTEX]->push_constants_dirty)
- gfx7_emit_vs_workaround_flush(brw);
-
- for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
- struct brw_stage_state *stage_state = stage_states[stage];
- UNUSED struct gl_program *prog = ctx->_Shader->CurrentProgram[stage];
-
- if (!stage_state->push_constants_dirty)
- continue;
-
- brw_batch_emit(brw, GENX(3DSTATE_CONSTANT_VS), pkt) {
- pkt._3DCommandSubOpcode = push_constant_opcodes[stage];
-#if GFX_VER >= 9
- pkt.MOCS = mocs;
-#elif GFX_VER < 8
- /* MOCS is MBZ on Gfx8 so we skip it there */
- pkt.ConstantBody.MOCS = mocs;
-#endif
- if (stage_state->prog_data) {
-#if GFX_VERx10 >= 75
- /* The Skylake PRM contains the following restriction:
- *
- * "The driver must ensure The following case does not occur
- * without a flush to the 3D engine: 3DSTATE_CONSTANT_* with
- * buffer 3 read length equal to zero committed followed by a
- * 3DSTATE_CONSTANT_* with buffer 0 read length not equal to
- * zero committed."
- *
- * To avoid this, we program the buffers in the highest slots.
- * This way, slot 0 is only used if slot 3 is also used.
- */
- int n = 3;
-
- for (int i = 3; i >= 0; i--) {
- const struct brw_ubo_range *range =
- &stage_state->prog_data->ubo_ranges[i];
-
- if (range->length == 0)
- continue;
-
- const struct gl_uniform_block *block =
- prog->sh.UniformBlocks[range->block];
- const struct gl_buffer_binding *binding =
- &ctx->UniformBufferBindings[block->Binding];
-
- if (!binding->BufferObject) {
- static unsigned msg_id = 0;
- _mesa_gl_debugf(ctx, &msg_id, MESA_DEBUG_SOURCE_API,
- MESA_DEBUG_TYPE_UNDEFINED,
- MESA_DEBUG_SEVERITY_HIGH,
- "UBO %d unbound, %s shader uniform data "
- "will be undefined.",
- range->block,
- _mesa_shader_stage_to_string(stage));
- continue;
- }
-
- assert(binding->Offset % 32 == 0);
-
- struct brw_bo *bo = brw_bufferobj_buffer(brw,
- brw_buffer_object(binding->BufferObject),
- binding->Offset, range->length * 32, false);
-
- pkt.ConstantBody.ReadLength[n] = range->length;
- pkt.ConstantBody.Buffer[n] =
- ro_bo(bo, range->start * 32 + binding->Offset);
- n--;
- }
-
- if (stage_state->push_const_size > 0) {
- assert(n >= 0);
- pkt.ConstantBody.ReadLength[n] = stage_state->push_const_size;
- pkt.ConstantBody.Buffer[n] =
- ro_bo(stage_state->push_const_bo,
- stage_state->push_const_offset);
- }
-#else
- pkt.ConstantBody.ReadLength[0] = stage_state->push_const_size;
- pkt.ConstantBody.Buffer[0].offset = stage_state->push_const_offset;
-#endif
- }
- }
-
- stage_state->push_constants_dirty = false;
- brw->ctx.NewDriverState |= GFX_VER >= 9 ? BRW_NEW_SURFACES : 0;
- }
-}
-
-const struct brw_tracked_state genX(push_constant_packets) = {
- .dirty = {
- .mesa = 0,
- .brw = BRW_NEW_DRAW_CALL,
- },
- .emit = genX(upload_push_constant_packets),
-};
-#endif
-
-#if GFX_VER >= 6
-static void
-genX(upload_vs_push_constants)(struct brw_context *brw)
-{
- struct brw_stage_state *stage_state = &brw->vs.base;
-
- /* BRW_NEW_VERTEX_PROGRAM */
- const struct gl_program *vp = brw->programs[MESA_SHADER_VERTEX];
- /* BRW_NEW_VS_PROG_DATA */
- const struct brw_stage_prog_data *prog_data = brw->vs.base.prog_data;
-
- gfx6_upload_push_constants(brw, vp, prog_data, stage_state);
-}
-
-static const struct brw_tracked_state genX(vs_push_constants) = {
- .dirty = {
- .mesa = _NEW_PROGRAM_CONSTANTS |
- _NEW_TRANSFORM,
- .brw = BRW_NEW_BATCH |
- BRW_NEW_BLORP |
- BRW_NEW_VERTEX_PROGRAM |
- BRW_NEW_VS_PROG_DATA,
- },
- .emit = genX(upload_vs_push_constants),
-};
-
-static void
-genX(upload_gs_push_constants)(struct brw_context *brw)
-{
- struct brw_stage_state *stage_state = &brw->gs.base;
-
- /* BRW_NEW_GEOMETRY_PROGRAM */
- const struct gl_program *gp = brw->programs[MESA_SHADER_GEOMETRY];
-
- /* BRW_NEW_GS_PROG_DATA */
- struct brw_stage_prog_data *prog_data = brw->gs.base.prog_data;
-
- gfx6_upload_push_constants(brw, gp, prog_data, stage_state);
-}
-
-static const struct brw_tracked_state genX(gs_push_constants) = {
- .dirty = {
- .mesa = _NEW_PROGRAM_CONSTANTS |
- _NEW_TRANSFORM,
- .brw = BRW_NEW_BATCH |
- BRW_NEW_BLORP |
- BRW_NEW_GEOMETRY_PROGRAM |
- BRW_NEW_GS_PROG_DATA,
- },
- .emit = genX(upload_gs_push_constants),
-};
-
-static void
-genX(upload_wm_push_constants)(struct brw_context *brw)
-{
- struct brw_stage_state *stage_state = &brw->wm.base;
- /* BRW_NEW_FRAGMENT_PROGRAM */
- const struct gl_program *fp = brw->programs[MESA_SHADER_FRAGMENT];
- /* BRW_NEW_FS_PROG_DATA */
- const struct brw_stage_prog_data *prog_data = brw->wm.base.prog_data;
-
- gfx6_upload_push_constants(brw, fp, prog_data, stage_state);
-}
-
-static const struct brw_tracked_state genX(wm_push_constants) = {
- .dirty = {
- .mesa = _NEW_PROGRAM_CONSTANTS,
- .brw = BRW_NEW_BATCH |
- BRW_NEW_BLORP |
- BRW_NEW_FRAGMENT_PROGRAM |
- BRW_NEW_FS_PROG_DATA,
- },
- .emit = genX(upload_wm_push_constants),
-};
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-#if GFX_VER >= 6
-static unsigned
-genX(determine_sample_mask)(struct brw_context *brw)
-{
- struct gl_context *ctx = &brw->ctx;
- float coverage = 1.0f;
- float coverage_invert = false;
- unsigned sample_mask = ~0u;
-
- /* BRW_NEW_NUM_SAMPLES */
- unsigned num_samples = brw->num_samples;
-
- if (_mesa_is_multisample_enabled(ctx)) {
- if (ctx->Multisample.SampleCoverage) {
- coverage = ctx->Multisample.SampleCoverageValue;
- coverage_invert = ctx->Multisample.SampleCoverageInvert;
- }
- if (ctx->Multisample.SampleMask) {
- sample_mask = ctx->Multisample.SampleMaskValue;
- }
- }
-
- if (num_samples > 1) {
- int coverage_int = (int) (num_samples * coverage + 0.5f);
- uint32_t coverage_bits = (1 << coverage_int) - 1;
- if (coverage_invert)
- coverage_bits ^= (1 << num_samples) - 1;
- return coverage_bits & sample_mask;
- } else {
- return 1;
- }
-}
-
-static void
-genX(emit_3dstate_multisample2)(struct brw_context *brw,
- unsigned num_samples)
-{
- unsigned log2_samples = ffs(num_samples) - 1;
-
- brw_batch_emit(brw, GENX(3DSTATE_MULTISAMPLE), multi) {
- multi.PixelLocation = CENTER;
- multi.NumberofMultisamples = log2_samples;
-#if GFX_VER == 6
- INTEL_SAMPLE_POS_4X(multi.Sample);
-#elif GFX_VER == 7
- switch (num_samples) {
- case 1:
- INTEL_SAMPLE_POS_1X(multi.Sample);
- break;
- case 2:
- INTEL_SAMPLE_POS_2X(multi.Sample);
- break;
- case 4:
- INTEL_SAMPLE_POS_4X(multi.Sample);
- break;
- case 8:
- INTEL_SAMPLE_POS_8X(multi.Sample);
- break;
- default:
- break;
- }
-#endif
- }
-}
-
-static void
-genX(upload_multisample_state)(struct brw_context *brw)
-{
- assert(brw->num_samples > 0 && brw->num_samples <= 16);
-
- genX(emit_3dstate_multisample2)(brw, brw->num_samples);
-
- brw_batch_emit(brw, GENX(3DSTATE_SAMPLE_MASK), sm) {
- sm.SampleMask = genX(determine_sample_mask)(brw);
- }
-}
-
-static const struct brw_tracked_state genX(multisample_state) = {
- .dirty = {
- .mesa = _NEW_MULTISAMPLE |
- (GFX_VER == 10 ? _NEW_BUFFERS : 0),
- .brw = BRW_NEW_BLORP |
- BRW_NEW_CONTEXT |
- BRW_NEW_NUM_SAMPLES,
- },
- .emit = genX(upload_multisample_state)
-};
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-static void
-genX(upload_color_calc_state)(struct brw_context *brw)
-{
- struct gl_context *ctx = &brw->ctx;
-
- brw_state_emit(brw, GENX(COLOR_CALC_STATE), 64, &brw->cc.state_offset, cc) {
-#if GFX_VER <= 5
- cc.IndependentAlphaBlendEnable =
- set_blend_entry_bits(brw, &cc, 0, false);
- set_depth_stencil_bits(brw, &cc);
-
- if (ctx->Color.AlphaEnabled &&
- ctx->DrawBuffer->_NumColorDrawBuffers <= 1) {
- cc.AlphaTestEnable = true;
- cc.AlphaTestFunction =
- brw_translate_compare_func(ctx->Color.AlphaFunc);
- }
-
- cc.ColorDitherEnable = ctx->Color.DitherFlag;
-
- cc.StatisticsEnable = brw->stats_wm;
-
- cc.CCViewportStatePointer =
- ro_bo(brw->batch.state.bo, brw->cc.vp_offset);
-#else
- /* _NEW_COLOR */
- cc.BlendConstantColorRed = ctx->Color.BlendColorUnclamped[0];
- cc.BlendConstantColorGreen = ctx->Color.BlendColorUnclamped[1];
- cc.BlendConstantColorBlue = ctx->Color.BlendColorUnclamped[2];
- cc.BlendConstantColorAlpha = ctx->Color.BlendColorUnclamped[3];
-
-#if GFX_VER < 9
- /* _NEW_STENCIL */
- cc.StencilReferenceValue = _mesa_get_stencil_ref(ctx, 0);
- cc.BackfaceStencilReferenceValue =
- _mesa_get_stencil_ref(ctx, ctx->Stencil._BackFace);
-#endif
-
-#endif
-
- /* _NEW_COLOR */
- UNCLAMPED_FLOAT_TO_UBYTE(cc.AlphaReferenceValueAsUNORM8,
- ctx->Color.AlphaRef);
- }
-
-#if GFX_VER >= 6
- brw_batch_emit(brw, GENX(3DSTATE_CC_STATE_POINTERS), ptr) {
- ptr.ColorCalcStatePointer = brw->cc.state_offset;
-#if GFX_VER != 7
- ptr.ColorCalcStatePointerValid = true;
-#endif
- }
-#else
- brw->ctx.NewDriverState |= BRW_NEW_GFX4_UNIT_STATE;
-#endif
-}
-
-UNUSED static const struct brw_tracked_state genX(color_calc_state) = {
- .dirty = {
- .mesa = _NEW_COLOR |
- _NEW_STENCIL |
- (GFX_VER <= 5 ? _NEW_BUFFERS |
- _NEW_DEPTH
- : 0),
- .brw = BRW_NEW_BATCH |
- BRW_NEW_BLORP |
- (GFX_VER <= 5 ? BRW_NEW_CC_VP |
- BRW_NEW_STATS_WM
- : BRW_NEW_CC_STATE |
- BRW_NEW_STATE_BASE_ADDRESS),
- },
- .emit = genX(upload_color_calc_state),
-};
-
-
-/* ---------------------------------------------------------------------- */
-
-#if GFX_VERx10 == 75
-static void
-genX(upload_color_calc_and_blend_state)(struct brw_context *brw)
-{
- genX(upload_blend_state)(brw);
- genX(upload_color_calc_state)(brw);
-}
-
-/* On Haswell when BLEND_STATE is emitted CC_STATE should also be re-emitted,
- * this workarounds the flickering shadows in several games.
- */
-static const struct brw_tracked_state genX(cc_and_blend_state) = {
- .dirty = {
- .mesa = _NEW_BUFFERS |
- _NEW_COLOR |
- _NEW_STENCIL |
- _NEW_MULTISAMPLE,
- .brw = BRW_NEW_BATCH |
- BRW_NEW_BLORP |
- BRW_NEW_CC_STATE |
- BRW_NEW_FS_PROG_DATA |
- BRW_NEW_STATE_BASE_ADDRESS,
- },
- .emit = genX(upload_color_calc_and_blend_state),
-};
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-#if GFX_VER >= 7
-static void
-genX(upload_sbe)(struct brw_context *brw)
-{
- struct gl_context *ctx = &brw->ctx;
- /* BRW_NEW_FRAGMENT_PROGRAM */
- UNUSED const struct gl_program *fp = brw->programs[MESA_SHADER_FRAGMENT];
- /* BRW_NEW_FS_PROG_DATA */
- const struct brw_wm_prog_data *wm_prog_data =
- brw_wm_prog_data(brw->wm.base.prog_data);
-#if GFX_VER >= 8
- struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) attr_overrides[16] = { { 0 } };
-#else
-#define attr_overrides sbe.Attribute
-#endif
- uint32_t urb_entry_read_length;
- uint32_t urb_entry_read_offset;
- uint32_t point_sprite_enables;
-
- brw_batch_emit(brw, GENX(3DSTATE_SBE), sbe) {
- sbe.AttributeSwizzleEnable = true;
- sbe.NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs;
-
- /* _NEW_BUFFERS */
- bool flip_y = ctx->DrawBuffer->FlipY;
-
- /* _NEW_POINT
- *
- * Window coordinates in an FBO are inverted, which means point
- * sprite origin must be inverted.
- */
- if ((ctx->Point.SpriteOrigin == GL_LOWER_LEFT) == flip_y)
- sbe.PointSpriteTextureCoordinateOrigin = LOWERLEFT;
- else
- sbe.PointSpriteTextureCoordinateOrigin = UPPERLEFT;
-
- /* _NEW_POINT | _NEW_LIGHT | _NEW_PROGRAM,
- * BRW_NEW_FS_PROG_DATA | BRW_NEW_FRAGMENT_PROGRAM |
- * BRW_NEW_GS_PROG_DATA | BRW_NEW_PRIMITIVE | BRW_NEW_TES_PROG_DATA |
- * BRW_NEW_VUE_MAP_GEOM_OUT
- */
- genX(calculate_attr_overrides)(brw,
- attr_overrides,
- &point_sprite_enables,
- &urb_entry_read_length,
- &urb_entry_read_offset);
-
- /* Typically, the URB entry read length and offset should be programmed
- * in 3DSTATE_VS and 3DSTATE_GS; SBE inherits it from the last active
- * stage which produces geometry. However, we don't know the proper
- * value until we call calculate_attr_overrides().
- *
- * To fit with our existing code, we override the inherited values and
- * specify it here directly, as we did on previous generations.
- */
- sbe.VertexURBEntryReadLength = urb_entry_read_length;
- sbe.VertexURBEntryReadOffset = urb_entry_read_offset;
- sbe.PointSpriteTextureCoordinateEnable = point_sprite_enables;
- sbe.ConstantInterpolationEnable = wm_prog_data->flat_inputs;
-
-#if GFX_VER >= 8
- sbe.ForceVertexURBEntryReadLength = true;
- sbe.ForceVertexURBEntryReadOffset = true;
-#endif
-
-#if GFX_VER >= 9
- /* prepare the active component dwords */
- for (int i = 0; i < 32; i++)
- sbe.AttributeActiveComponentFormat[i] = ACTIVE_COMPONENT_XYZW;
-#endif
- }
-
-#if GFX_VER >= 8
- brw_batch_emit(brw, GENX(3DSTATE_SBE_SWIZ), sbes) {
- for (int i = 0; i < 16; i++)
- sbes.Attribute[i] = attr_overrides[i];
- }
-#endif
-
-#undef attr_overrides
-}
-
-static const struct brw_tracked_state genX(sbe_state) = {
- .dirty = {
- .mesa = _NEW_BUFFERS |
- _NEW_LIGHT |
- _NEW_POINT |
- _NEW_POLYGON |
- _NEW_PROGRAM,
- .brw = BRW_NEW_BLORP |
- BRW_NEW_CONTEXT |
- BRW_NEW_FRAGMENT_PROGRAM |
- BRW_NEW_FS_PROG_DATA |
- BRW_NEW_GS_PROG_DATA |
- BRW_NEW_TES_PROG_DATA |
- BRW_NEW_VUE_MAP_GEOM_OUT |
- (GFX_VER == 7 ? BRW_NEW_PRIMITIVE
- : 0),
- },
- .emit = genX(upload_sbe),
-};
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-#if GFX_VER >= 7
-/**
- * Outputs the 3DSTATE_SO_DECL_LIST command.
- *
- * The data output is a series of 64-bit entries containing a SO_DECL per
- * stream. We only have one stream of rendering coming out of the GS unit, so
- * we only emit stream 0 (low 16 bits) SO_DECLs.
- */
-static void
-genX(upload_3dstate_so_decl_list)(struct brw_context *brw,
- const struct brw_vue_map *vue_map)
-{
- struct gl_context *ctx = &brw->ctx;
- /* BRW_NEW_TRANSFORM_FEEDBACK */
- struct gl_transform_feedback_object *xfb_obj =
- ctx->TransformFeedback.CurrentObject;
- const struct gl_transform_feedback_info *linked_xfb_info =
- xfb_obj->program->sh.LinkedTransformFeedback;
- struct GENX(SO_DECL) so_decl[MAX_VERTEX_STREAMS][128];
- int buffer_mask[MAX_VERTEX_STREAMS] = {0, 0, 0, 0};
- int next_offset[MAX_VERTEX_STREAMS] = {0, 0, 0, 0};
- int decls[MAX_VERTEX_STREAMS] = {0, 0, 0, 0};
- int max_decls = 0;
- STATIC_ASSERT(ARRAY_SIZE(so_decl[0]) >= MAX_PROGRAM_OUTPUTS);
-
- memset(so_decl, 0, sizeof(so_decl));
-
- /* Construct the list of SO_DECLs to be emitted. The formatting of the
- * command feels strange -- each dword pair contains a SO_DECL per stream.
- */
- for (unsigned i = 0; i < linked_xfb_info->NumOutputs; i++) {
- const struct gl_transform_feedback_output *output =
- &linked_xfb_info->Outputs[i];
- const int buffer = output->OutputBuffer;
- const int varying = output->OutputRegister;
- const unsigned stream_id = output->StreamId;
- assert(stream_id < MAX_VERTEX_STREAMS);
-
- buffer_mask[stream_id] |= 1 << buffer;
-
- assert(vue_map->varying_to_slot[varying] >= 0);
-
- /* Mesa doesn't store entries for gl_SkipComponents in the Outputs[]
- * array. Instead, it simply increments DstOffset for the following
- * input by the number of components that should be skipped.
- *
- * Our hardware is unusual in that it requires us to program SO_DECLs
- * for fake "hole" components, rather than simply taking the offset
- * for each real varying. Each hole can have size 1, 2, 3, or 4; we
- * program as many size = 4 holes as we can, then a final hole to
- * accommodate the final 1, 2, or 3 remaining.
- */
- int skip_components = output->DstOffset - next_offset[buffer];
-
- while (skip_components > 0) {
- so_decl[stream_id][decls[stream_id]++] = (struct GENX(SO_DECL)) {
- .HoleFlag = 1,
- .OutputBufferSlot = output->OutputBuffer,
- .ComponentMask = (1 << MIN2(skip_components, 4)) - 1,
- };
- skip_components -= 4;
- }
-
- next_offset[buffer] = output->DstOffset + output->NumComponents;
-
- so_decl[stream_id][decls[stream_id]++] = (struct GENX(SO_DECL)) {
- .OutputBufferSlot = output->OutputBuffer,
- .RegisterIndex = vue_map->varying_to_slot[varying],
- .ComponentMask =
- ((1 << output->NumComponents) - 1) << output->ComponentOffset,
- };
-
- if (decls[stream_id] > max_decls)
- max_decls = decls[stream_id];
- }
-
- uint32_t *dw;
- dw = brw_batch_emitn(brw, GENX(3DSTATE_SO_DECL_LIST), 3 + 2 * max_decls,
- .StreamtoBufferSelects0 = buffer_mask[0],
- .StreamtoBufferSelects1 = buffer_mask[1],
- .StreamtoBufferSelects2 = buffer_mask[2],
- .StreamtoBufferSelects3 = buffer_mask[3],
- .NumEntries0 = decls[0],
- .NumEntries1 = decls[1],
- .NumEntries2 = decls[2],
- .NumEntries3 = decls[3]);
-
- for (int i = 0; i < max_decls; i++) {
- GENX(SO_DECL_ENTRY_pack)(
- brw, dw + 2 + i * 2,
- &(struct GENX(SO_DECL_ENTRY)) {
- .Stream0Decl = so_decl[0][i],
- .Stream1Decl = so_decl[1][i],
- .Stream2Decl = so_decl[2][i],
- .Stream3Decl = so_decl[3][i],
- });
- }
-}
-
-static void
-genX(upload_3dstate_so_buffers)(struct brw_context *brw)
-{
- struct gl_context *ctx = &brw->ctx;
- /* BRW_NEW_TRANSFORM_FEEDBACK */
- struct gl_transform_feedback_object *xfb_obj =
- ctx->TransformFeedback.CurrentObject;
-#if GFX_VER < 8
- const struct gl_transform_feedback_info *linked_xfb_info =
- xfb_obj->program->sh.LinkedTransformFeedback;
-#else
- struct brw_transform_feedback_object *brw_obj =
- (struct brw_transform_feedback_object *) xfb_obj;
-#endif
-
- /* Set up the up to 4 output buffers. These are the ranges defined in the
- * gl_transform_feedback_object.
- */
- for (int i = 0; i < 4; i++) {
- struct brw_buffer_object *bufferobj =
- brw_buffer_object(xfb_obj->Buffers[i]);
- uint32_t start = xfb_obj->Offset[i];
- uint32_t end = ALIGN(start + xfb_obj->Size[i], 4);
- uint32_t const size = end - start;
-
- if (!bufferobj || !size) {
- brw_batch_emit(brw, GENX(3DSTATE_SO_BUFFER), sob) {
- sob.SOBufferIndex = i;
- sob.MOCS = brw_mocs(&brw->isl_dev, NULL);
- }
- continue;
- }
-
- assert(start % 4 == 0);
- struct brw_bo *bo =
- brw_bufferobj_buffer(brw, bufferobj, start, size, true);
- assert(end <= bo->size);
-
- brw_batch_emit(brw, GENX(3DSTATE_SO_BUFFER), sob) {
- sob.SOBufferIndex = i;
-
- sob.SurfaceBaseAddress = rw_bo(bo, start);
- sob.MOCS = brw_mocs(&brw->isl_dev, bo);
-#if GFX_VER < 8
- sob.SurfacePitch = linked_xfb_info->Buffers[i].Stride * 4;
- sob.SurfaceEndAddress = rw_bo(bo, end);
-#else
- sob.SOBufferEnable = true;
- sob.StreamOffsetWriteEnable = true;
- sob.StreamOutputBufferOffsetAddressEnable = true;
-
- sob.SurfaceSize = MAX2(xfb_obj->Size[i] / 4, 1) - 1;
- sob.StreamOutputBufferOffsetAddress =
- rw_bo(brw_obj->offset_bo, i * sizeof(uint32_t));
-
- if (brw_obj->zero_offsets) {
- /* Zero out the offset and write that to offset_bo */
- sob.StreamOffset = 0;
- } else {
- /* Use offset_bo as the "Stream Offset." */
- sob.StreamOffset = 0xFFFFFFFF;
- }
-#endif
- }
- }
-
-#if GFX_VER >= 8
- brw_obj->zero_offsets = false;
-#endif
-}
-
-static bool
-query_active(struct gl_query_object *q)
-{
- return q && q->Active;
-}
-
-static void
-genX(upload_3dstate_streamout)(struct brw_context *brw, bool active,
- const struct brw_vue_map *vue_map)
-{
- struct gl_context *ctx = &brw->ctx;
- /* BRW_NEW_TRANSFORM_FEEDBACK */
- struct gl_transform_feedback_object *xfb_obj =
- ctx->TransformFeedback.CurrentObject;
-
- brw_batch_emit(brw, GENX(3DSTATE_STREAMOUT), sos) {
- if (active) {
- int urb_entry_read_offset = 0;
- int urb_entry_read_length = (vue_map->num_slots + 1) / 2 -
- urb_entry_read_offset;
-
- sos.SOFunctionEnable = true;
- sos.SOStatisticsEnable = true;
-
- /* BRW_NEW_RASTERIZER_DISCARD */
- if (ctx->RasterDiscard) {
- if (!query_active(ctx->Query.PrimitivesGenerated[0])) {
- sos.RenderingDisable = true;
- } else {
- perf_debug("Rasterizer discard with a GL_PRIMITIVES_GENERATED "
- "query active relies on the clipper.\n");
- }
- }
-
- /* _NEW_LIGHT */
- if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION)
- sos.ReorderMode = TRAILING;
-
-#if GFX_VER < 8
- sos.SOBufferEnable0 = xfb_obj->Buffers[0] != NULL;
- sos.SOBufferEnable1 = xfb_obj->Buffers[1] != NULL;
- sos.SOBufferEnable2 = xfb_obj->Buffers[2] != NULL;
- sos.SOBufferEnable3 = xfb_obj->Buffers[3] != NULL;
-#else
- const struct gl_transform_feedback_info *linked_xfb_info =
- xfb_obj->program->sh.LinkedTransformFeedback;
- /* Set buffer pitches; 0 means unbound. */
- if (xfb_obj->Buffers[0])
- sos.Buffer0SurfacePitch = linked_xfb_info->Buffers[0].Stride * 4;
- if (xfb_obj->Buffers[1])
- sos.Buffer1SurfacePitch = linked_xfb_info->Buffers[1].Stride * 4;
- if (xfb_obj->Buffers[2])
- sos.Buffer2SurfacePitch = linked_xfb_info->Buffers[2].Stride * 4;
- if (xfb_obj->Buffers[3])
- sos.Buffer3SurfacePitch = linked_xfb_info->Buffers[3].Stride * 4;
-#endif
-
- /* We always read the whole vertex. This could be reduced at some
- * point by reading less and offsetting the register index in the
- * SO_DECLs.
- */
- sos.Stream0VertexReadOffset = urb_entry_read_offset;
- sos.Stream0VertexReadLength = urb_entry_read_length - 1;
- sos.Stream1VertexReadOffset = urb_entry_read_offset;
- sos.Stream1VertexReadLength = urb_entry_read_length - 1;
- sos.Stream2VertexReadOffset = urb_entry_read_offset;
- sos.Stream2VertexReadLength = urb_entry_read_length - 1;
- sos.Stream3VertexReadOffset = urb_entry_read_offset;
- sos.Stream3VertexReadLength = urb_entry_read_length - 1;
- }
- }
-}
-
-static void
-genX(upload_sol)(struct brw_context *brw)
-{
- struct gl_context *ctx = &brw->ctx;
- /* BRW_NEW_TRANSFORM_FEEDBACK */
- bool active = _mesa_is_xfb_active_and_unpaused(ctx);
-
- if (active) {
- genX(upload_3dstate_so_buffers)(brw);
-
- /* BRW_NEW_VUE_MAP_GEOM_OUT */
- genX(upload_3dstate_so_decl_list)(brw, &brw->vue_map_geom_out);
- }
-
- /* Finally, set up the SOL stage. This command must always follow updates to
- * the nonpipelined SOL state (3DSTATE_SO_BUFFER, 3DSTATE_SO_DECL_LIST) or
- * MMIO register updates (current performed by the kernel at each batch
- * emit).
- */
- genX(upload_3dstate_streamout)(brw, active, &brw->vue_map_geom_out);
-}
-
-static const struct brw_tracked_state genX(sol_state) = {
- .dirty = {
- .mesa = _NEW_LIGHT,
- .brw = BRW_NEW_BATCH |
- BRW_NEW_BLORP |
- BRW_NEW_RASTERIZER_DISCARD |
- BRW_NEW_VUE_MAP_GEOM_OUT |
- BRW_NEW_TRANSFORM_FEEDBACK,
- },
- .emit = genX(upload_sol),
-};
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-#if GFX_VER >= 7
-static void
-genX(upload_ps)(struct brw_context *brw)
-{
- UNUSED const struct gl_context *ctx = &brw->ctx;
- UNUSED const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- /* BRW_NEW_FS_PROG_DATA */
- const struct brw_wm_prog_data *prog_data =
- brw_wm_prog_data(brw->wm.base.prog_data);
- const struct brw_stage_state *stage_state = &brw->wm.base;
-
-#if GFX_VER < 8
-#endif
-
- brw_batch_emit(brw, GENX(3DSTATE_PS), ps) {
- /* Initialize the execution mask with VMask. Otherwise, derivatives are
- * incorrect for subspans where some of the pixels are unlit. We believe
- * the bit just didn't take effect in previous generations.
- */
- ps.VectorMaskEnable = GFX_VER >= 8;
-
- /* Wa_1606682166:
- * "Incorrect TDL's SSP address shift in SARB for 16:6 & 18:8 modes.
- * Disable the Sampler state prefetch functionality in the SARB by
- * programming 0xB000[30] to '1'."
- */
- ps.SamplerCount = GFX_VER == 11 ?
- 0 : DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4);
-
- /* BRW_NEW_FS_PROG_DATA */
- ps.BindingTableEntryCount = prog_data->base.binding_table.size_bytes / 4;
-
- if (prog_data->base.use_alt_mode)
- ps.FloatingPointMode = Alternate;
-
- /* Haswell requires the sample mask to be set in this packet as well as
- * in 3DSTATE_SAMPLE_MASK; the values should match.
- */
-
- /* _NEW_BUFFERS, _NEW_MULTISAMPLE */
-#if GFX_VERx10 == 75
- ps.SampleMask = genX(determine_sample_mask(brw));
-#endif
-
- /* 3DSTATE_PS expects the number of threads per PSD, which is always 64
- * for pre Gfx11 and 128 for gfx11+; On gfx11+ If a programmed value is
- * k, it implies 2(k+1) threads. It implicitly scales for different GT
- * levels (which have some # of PSDs).
- *
- * In Gfx8 the format is U8-2 whereas in Gfx9+ it is U9-1.
- */
-#if GFX_VER >= 9
- ps.MaximumNumberofThreadsPerPSD = 64 - 1;
-#elif GFX_VER >= 8
- ps.MaximumNumberofThreadsPerPSD = 64 - 2;
-#else
- ps.MaximumNumberofThreads = devinfo->max_wm_threads - 1;
-#endif
-
- if (prog_data->base.nr_params > 0 ||
- prog_data->base.ubo_ranges[0].length > 0)
- ps.PushConstantEnable = true;
-
-#if GFX_VER < 8
- /* From the IVB PRM, volume 2 part 1, page 287:
- * "This bit is inserted in the PS payload header and made available to
- * the DataPort (either via the message header or via header bypass) to
- * indicate that oMask data (one or two phases) is included in Render
- * Target Write messages. If present, the oMask data is used to mask off
- * samples."
- */
- ps.oMaskPresenttoRenderTarget = prog_data->uses_omask;
-
- /* The hardware wedges if you have this bit set but don't turn on any
- * dual source blend factors.
- *
- * BRW_NEW_FS_PROG_DATA | _NEW_COLOR
- */
- ps.DualSourceBlendEnable = prog_data->dual_src_blend &&
- (ctx->Color.BlendEnabled & 1) &&
- ctx->Color._BlendUsesDualSrc & 0x1;
-
- /* BRW_NEW_FS_PROG_DATA */
- ps.AttributeEnable = (prog_data->num_varying_inputs != 0);
-#endif
-
- /* From the documentation for this packet:
- * "If the PS kernel does not need the Position XY Offsets to
- * compute a Position Value, then this field should be programmed
- * to POSOFFSET_NONE."
- *
- * "SW Recommendation: If the PS kernel needs the Position Offsets
- * to compute a Position XY value, this field should match Position
- * ZW Interpolation Mode to ensure a consistent position.xyzw
- * computation."
- *
- * We only require XY sample offsets. So, this recommendation doesn't
- * look useful at the moment. We might need this in future.
- */
- if (prog_data->uses_pos_offset)
- ps.PositionXYOffsetSelect = POSOFFSET_SAMPLE;
- else
- ps.PositionXYOffsetSelect = POSOFFSET_NONE;
-
- ps._8PixelDispatchEnable = prog_data->dispatch_8;
- ps._16PixelDispatchEnable = prog_data->dispatch_16;
- ps._32PixelDispatchEnable = prog_data->dispatch_32;
-
- /* From the Sky Lake PRM 3DSTATE_PS::32 Pixel Dispatch Enable:
- *
- * "When NUM_MULTISAMPLES = 16 or FORCE_SAMPLE_COUNT = 16, SIMD32
- * Dispatch must not be enabled for PER_PIXEL dispatch mode."
- *
- * Since 16x MSAA is first introduced on SKL, we don't need to apply
- * the workaround on any older hardware.
- *
- * BRW_NEW_NUM_SAMPLES
- */
- if (GFX_VER >= 9 && !prog_data->persample_dispatch &&
- brw->num_samples == 16) {
- assert(ps._8PixelDispatchEnable || ps._16PixelDispatchEnable);
- ps._32PixelDispatchEnable = false;
- }
-
- ps.DispatchGRFStartRegisterForConstantSetupData0 =
- brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 0);
- ps.DispatchGRFStartRegisterForConstantSetupData1 =
- brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 1);
- ps.DispatchGRFStartRegisterForConstantSetupData2 =
- brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 2);
-
- ps.KernelStartPointer0 = stage_state->prog_offset +
- brw_wm_prog_data_prog_offset(prog_data, ps, 0);
- ps.KernelStartPointer1 = stage_state->prog_offset +
- brw_wm_prog_data_prog_offset(prog_data, ps, 1);
- ps.KernelStartPointer2 = stage_state->prog_offset +
- brw_wm_prog_data_prog_offset(prog_data, ps, 2);
-
- if (prog_data->base.total_scratch) {
- ps.ScratchSpaceBasePointer =
- rw_32_bo(stage_state->scratch_bo,
- ffs(stage_state->per_thread_scratch) - 11);
- }
- }
-}
-
-static const struct brw_tracked_state genX(ps_state) = {
- .dirty = {
- .mesa = _NEW_MULTISAMPLE |
- (GFX_VER < 8 ? _NEW_BUFFERS |
- _NEW_COLOR
- : 0),
- .brw = BRW_NEW_BATCH |
- BRW_NEW_BLORP |
- BRW_NEW_FS_PROG_DATA |
- (GFX_VER >= 9 ? BRW_NEW_NUM_SAMPLES : 0),
- },
- .emit = genX(upload_ps),
-};
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-#if GFX_VER >= 7
-static void
-genX(upload_hs_state)(struct brw_context *brw)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- struct brw_stage_state *stage_state = &brw->tcs.base;
- struct brw_stage_prog_data *stage_prog_data = stage_state->prog_data;
- const struct brw_vue_prog_data *vue_prog_data =
- brw_vue_prog_data(stage_prog_data);
-
- /* BRW_NEW_TES_PROG_DATA */
- struct brw_tcs_prog_data *tcs_prog_data =
- brw_tcs_prog_data(stage_prog_data);
-
- if (!tcs_prog_data) {
- brw_batch_emit(brw, GENX(3DSTATE_HS), hs);
- } else {
- brw_batch_emit(brw, GENX(3DSTATE_HS), hs) {
- INIT_THREAD_DISPATCH_FIELDS(hs, Vertex);
-
- hs.InstanceCount = tcs_prog_data->instances - 1;
- hs.IncludeVertexHandles = true;
-
- hs.MaximumNumberofThreads = devinfo->max_tcs_threads - 1;
-
-#if GFX_VER >= 9
- hs.DispatchMode = vue_prog_data->dispatch_mode;
- hs.IncludePrimitiveID = tcs_prog_data->include_primitive_id;
-#endif
- }
- }
-}
-
-static const struct brw_tracked_state genX(hs_state) = {
- .dirty = {
- .mesa = 0,
- .brw = BRW_NEW_BATCH |
- BRW_NEW_BLORP |
- BRW_NEW_TCS_PROG_DATA |
- BRW_NEW_TESS_PROGRAMS,
- },
- .emit = genX(upload_hs_state),
-};
-
-static void
-genX(upload_ds_state)(struct brw_context *brw)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- const struct brw_stage_state *stage_state = &brw->tes.base;
- struct brw_stage_prog_data *stage_prog_data = stage_state->prog_data;
-
- /* BRW_NEW_TES_PROG_DATA */
- const struct brw_tes_prog_data *tes_prog_data =
- brw_tes_prog_data(stage_prog_data);
- const struct brw_vue_prog_data *vue_prog_data =
- brw_vue_prog_data(stage_prog_data);
-
- if (!tes_prog_data) {
- brw_batch_emit(brw, GENX(3DSTATE_DS), ds);
- } else {
- assert(GFX_VER < 11 ||
- vue_prog_data->dispatch_mode == DISPATCH_MODE_SIMD8);
-
- brw_batch_emit(brw, GENX(3DSTATE_DS), ds) {
- INIT_THREAD_DISPATCH_FIELDS(ds, Patch);
-
- ds.MaximumNumberofThreads = devinfo->max_tes_threads - 1;
- ds.ComputeWCoordinateEnable =
- tes_prog_data->domain == BRW_TESS_DOMAIN_TRI;
-
-#if GFX_VER >= 8
- if (vue_prog_data->dispatch_mode == DISPATCH_MODE_SIMD8)
- ds.DispatchMode = DISPATCH_MODE_SIMD8_SINGLE_PATCH;
- ds.UserClipDistanceCullTestEnableBitmask =
- vue_prog_data->cull_distance_mask;
-#endif
- }
- }
-}
-
-static const struct brw_tracked_state genX(ds_state) = {
- .dirty = {
- .mesa = 0,
- .brw = BRW_NEW_BATCH |
- BRW_NEW_BLORP |
- BRW_NEW_TESS_PROGRAMS |
- BRW_NEW_TES_PROG_DATA,
- },
- .emit = genX(upload_ds_state),
-};
-
-/* ---------------------------------------------------------------------- */
-
-static void
-upload_te_state(struct brw_context *brw)
-{
- /* BRW_NEW_TESS_PROGRAMS */
- bool active = brw->programs[MESA_SHADER_TESS_EVAL];
-
- /* BRW_NEW_TES_PROG_DATA */
- const struct brw_tes_prog_data *tes_prog_data =
- brw_tes_prog_data(brw->tes.base.prog_data);
-
- if (active) {
- brw_batch_emit(brw, GENX(3DSTATE_TE), te) {
- te.Partitioning = tes_prog_data->partitioning;
- te.OutputTopology = tes_prog_data->output_topology;
- te.TEDomain = tes_prog_data->domain;
- te.TEEnable = true;
- te.MaximumTessellationFactorOdd = 63.0;
- te.MaximumTessellationFactorNotOdd = 64.0;
- }
- } else {
- brw_batch_emit(brw, GENX(3DSTATE_TE), te);
- }
-}
-
-static const struct brw_tracked_state genX(te_state) = {
- .dirty = {
- .mesa = 0,
- .brw = BRW_NEW_BLORP |
- BRW_NEW_CONTEXT |
- BRW_NEW_TES_PROG_DATA |
- BRW_NEW_TESS_PROGRAMS,
- },
- .emit = upload_te_state,
-};
-
-/* ---------------------------------------------------------------------- */
-
-static void
-genX(upload_tes_push_constants)(struct brw_context *brw)
-{
- struct brw_stage_state *stage_state = &brw->tes.base;
- /* BRW_NEW_TESS_PROGRAMS */
- const struct gl_program *tep = brw->programs[MESA_SHADER_TESS_EVAL];
-
- /* BRW_NEW_TES_PROG_DATA */
- const struct brw_stage_prog_data *prog_data = brw->tes.base.prog_data;
- gfx6_upload_push_constants(brw, tep, prog_data, stage_state);
-}
-
-static const struct brw_tracked_state genX(tes_push_constants) = {
- .dirty = {
- .mesa = _NEW_PROGRAM_CONSTANTS,
- .brw = BRW_NEW_BATCH |
- BRW_NEW_BLORP |
- BRW_NEW_TESS_PROGRAMS |
- BRW_NEW_TES_PROG_DATA,
- },
- .emit = genX(upload_tes_push_constants),
-};
-
-static void
-genX(upload_tcs_push_constants)(struct brw_context *brw)
-{
- struct brw_stage_state *stage_state = &brw->tcs.base;
- /* BRW_NEW_TESS_PROGRAMS */
- const struct gl_program *tcp = brw->programs[MESA_SHADER_TESS_CTRL];
-
- /* BRW_NEW_TCS_PROG_DATA */
- const struct brw_stage_prog_data *prog_data = brw->tcs.base.prog_data;
-
- gfx6_upload_push_constants(brw, tcp, prog_data, stage_state);
-}
-
-static const struct brw_tracked_state genX(tcs_push_constants) = {
- .dirty = {
- .mesa = _NEW_PROGRAM_CONSTANTS,
- .brw = BRW_NEW_BATCH |
- BRW_NEW_BLORP |
- BRW_NEW_DEFAULT_TESS_LEVELS |
- BRW_NEW_TESS_PROGRAMS |
- BRW_NEW_TCS_PROG_DATA,
- },
- .emit = genX(upload_tcs_push_constants),
-};
-
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-#if GFX_VER >= 7
-static void
-genX(upload_cs_push_constants)(struct brw_context *brw)
-{
- struct brw_stage_state *stage_state = &brw->cs.base;
-
- /* BRW_NEW_COMPUTE_PROGRAM */
- const struct gl_program *cp = brw->programs[MESA_SHADER_COMPUTE];
-
- if (cp) {
- /* BRW_NEW_CS_PROG_DATA */
- struct brw_cs_prog_data *cs_prog_data =
- brw_cs_prog_data(brw->cs.base.prog_data);
-
- _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_COMPUTE);
- brw_upload_cs_push_constants(brw, cp, cs_prog_data, stage_state);
- }
-}
-
-const struct brw_tracked_state genX(cs_push_constants) = {
- .dirty = {
- .mesa = _NEW_PROGRAM_CONSTANTS,
- .brw = BRW_NEW_BATCH |
- BRW_NEW_BLORP |
- BRW_NEW_COMPUTE_PROGRAM |
- BRW_NEW_CS_PROG_DATA,
- },
- .emit = genX(upload_cs_push_constants),
-};
-
-/**
- * Creates a new CS constant buffer reflecting the current CS program's
- * constants, if needed by the CS program.
- */
-static void
-genX(upload_cs_pull_constants)(struct brw_context *brw)
-{
- struct brw_stage_state *stage_state = &brw->cs.base;
-
- /* BRW_NEW_COMPUTE_PROGRAM */
- struct brw_program *cp =
- (struct brw_program *) brw->programs[MESA_SHADER_COMPUTE];
-
- /* BRW_NEW_CS_PROG_DATA */
- const struct brw_stage_prog_data *prog_data = brw->cs.base.prog_data;
-
- _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_COMPUTE);
- /* _NEW_PROGRAM_CONSTANTS */
- brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &cp->program,
- stage_state, prog_data);
-}
-
-const struct brw_tracked_state genX(cs_pull_constants) = {
- .dirty = {
- .mesa = _NEW_PROGRAM_CONSTANTS,
- .brw = BRW_NEW_BATCH |
- BRW_NEW_BLORP |
- BRW_NEW_COMPUTE_PROGRAM |
- BRW_NEW_CS_PROG_DATA,
- },
- .emit = genX(upload_cs_pull_constants),
-};
-
-static void
-genX(upload_cs_state)(struct brw_context *brw)
-{
- if (!brw->cs.base.prog_data)
- return;
-
- uint32_t offset;
- uint32_t *desc = (uint32_t*) brw_state_batch(
- brw, GENX(INTERFACE_DESCRIPTOR_DATA_length) * sizeof(uint32_t), 64,
- &offset);
-
- struct brw_stage_state *stage_state = &brw->cs.base;
- struct brw_stage_prog_data *prog_data = stage_state->prog_data;
- struct brw_cs_prog_data *cs_prog_data = brw_cs_prog_data(prog_data);
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- const struct brw_cs_dispatch_info dispatch =
- brw_cs_get_dispatch_info(devinfo, cs_prog_data, brw->compute.group_size);
-
- if (INTEL_DEBUG(DEBUG_SHADER_TIME)) {
- brw_emit_buffer_surface_state(
- brw, &stage_state->surf_offset[
- prog_data->binding_table.shader_time_start],
- brw->shader_time.bo, 0, ISL_FORMAT_RAW,
- brw->shader_time.bo->size, 1,
- RELOC_WRITE);
- }
-
- uint32_t *bind = brw_state_batch(brw, prog_data->binding_table.size_bytes,
- 32, &stage_state->bind_bo_offset);
-
- /* The MEDIA_VFE_STATE documentation for Gfx8+ says:
- *
- * "A stalling PIPE_CONTROL is required before MEDIA_VFE_STATE unless
- * the only bits that are changed are scoreboard related: Scoreboard
- * Enable, Scoreboard Type, Scoreboard Mask, Scoreboard * Delta. For
- * these scoreboard related states, a MEDIA_STATE_FLUSH is sufficient."
- *
- * Earlier generations say "MI_FLUSH" instead of "stalling PIPE_CONTROL",
- * but MI_FLUSH isn't really a thing, so we assume they meant PIPE_CONTROL.
- */
- brw_emit_pipe_control_flush(brw, PIPE_CONTROL_CS_STALL);
-
- brw_batch_emit(brw, GENX(MEDIA_VFE_STATE), vfe) {
- if (prog_data->total_scratch) {
- uint32_t per_thread_scratch_value;
-
- if (GFX_VER >= 8) {
- /* Broadwell's Per Thread Scratch Space is in the range [0, 11]
- * where 0 = 1k, 1 = 2k, 2 = 4k, ..., 11 = 2M.
- */
- per_thread_scratch_value = ffs(stage_state->per_thread_scratch) - 11;
- } else if (GFX_VERx10 == 75) {
- /* Haswell's Per Thread Scratch Space is in the range [0, 10]
- * where 0 = 2k, 1 = 4k, 2 = 8k, ..., 10 = 2M.
- */
- per_thread_scratch_value = ffs(stage_state->per_thread_scratch) - 12;
- } else {
- /* Earlier platforms use the range [0, 11] to mean [1kB, 12kB]
- * where 0 = 1kB, 1 = 2kB, 2 = 3kB, ..., 11 = 12kB.
- */
- per_thread_scratch_value = stage_state->per_thread_scratch / 1024 - 1;
- }
- vfe.ScratchSpaceBasePointer = rw_32_bo(stage_state->scratch_bo, 0);
- vfe.PerThreadScratchSpace = per_thread_scratch_value;
- }
-
- vfe.MaximumNumberofThreads =
- devinfo->max_cs_threads * devinfo->subslice_total - 1;
- vfe.NumberofURBEntries = GFX_VER >= 8 ? 2 : 0;
-#if GFX_VER < 11
- vfe.ResetGatewayTimer =
- Resettingrelativetimerandlatchingtheglobaltimestamp;
-#endif
-#if GFX_VER < 9
- vfe.BypassGatewayControl = BypassingOpenGatewayCloseGatewayprotocol;
-#endif
-#if GFX_VER == 7
- vfe.GPGPUMode = true;
-#endif
-
- /* We are uploading duplicated copies of push constant uniforms for each
- * thread. Although the local id data needs to vary per thread, it won't
- * change for other uniform data. Unfortunately this duplication is
- * required for gfx7. As of Haswell, this duplication can be avoided,
- * but this older mechanism with duplicated data continues to work.
- *
- * FINISHME: As of Haswell, we could make use of the
- * INTERFACE_DESCRIPTOR_DATA "Cross-Thread Constant Data Read Length"
- * field to only store one copy of uniform data.
- *
- * FINISHME: Broadwell adds a new alternative "Indirect Payload Storage"
- * which is described in the GPGPU_WALKER command and in the Broadwell
- * PRM Volume 7: 3D Media GPGPU, under Media GPGPU Pipeline => Mode of
- * Operations => GPGPU Mode => Indirect Payload Storage.
- *
- * Note: The constant data is built in brw_upload_cs_push_constants
- * below.
- */
- vfe.URBEntryAllocationSize = GFX_VER >= 8 ? 2 : 0;
-
- const uint32_t vfe_curbe_allocation =
- ALIGN(cs_prog_data->push.per_thread.regs * dispatch.threads +
- cs_prog_data->push.cross_thread.regs, 2);
- vfe.CURBEAllocationSize = vfe_curbe_allocation;
- }
-
- const unsigned push_const_size =
- brw_cs_push_const_total_size(cs_prog_data, dispatch.threads);
- if (push_const_size > 0) {
- brw_batch_emit(brw, GENX(MEDIA_CURBE_LOAD), curbe) {
- curbe.CURBETotalDataLength = ALIGN(push_const_size, 64);
- curbe.CURBEDataStartAddress = stage_state->push_const_offset;
- }
- }
-
- /* BRW_NEW_SURFACES and BRW_NEW_*_CONSTBUF */
- memcpy(bind, stage_state->surf_offset,
- prog_data->binding_table.size_bytes);
- const uint64_t ksp = brw->cs.base.prog_offset +
- brw_cs_prog_data_prog_offset(cs_prog_data,
- dispatch.simd_size);
- const struct GENX(INTERFACE_DESCRIPTOR_DATA) idd = {
- .KernelStartPointer = ksp,
- .SamplerStatePointer = stage_state->sampler_offset,
- /* Wa_1606682166 */
- .SamplerCount = GFX_VER == 11 ? 0 :
- DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4),
- .BindingTablePointer = stage_state->bind_bo_offset,
- .ConstantURBEntryReadLength = cs_prog_data->push.per_thread.regs,
- .NumberofThreadsinGPGPUThreadGroup = dispatch.threads,
- .SharedLocalMemorySize = encode_slm_size(GFX_VER,
- prog_data->total_shared),
- .BarrierEnable = cs_prog_data->uses_barrier,
-#if GFX_VERx10 >= 75
- .CrossThreadConstantDataReadLength =
- cs_prog_data->push.cross_thread.regs,
-#endif
- };
-
- GENX(INTERFACE_DESCRIPTOR_DATA_pack)(brw, desc, &idd);
-
- brw_batch_emit(brw, GENX(MEDIA_INTERFACE_DESCRIPTOR_LOAD), load) {
- load.InterfaceDescriptorTotalLength =
- GENX(INTERFACE_DESCRIPTOR_DATA_length) * sizeof(uint32_t);
- load.InterfaceDescriptorDataStartAddress = offset;
- }
-}
-
-static const struct brw_tracked_state genX(cs_state) = {
- .dirty = {
- .mesa = _NEW_PROGRAM_CONSTANTS,
- .brw = BRW_NEW_BATCH |
- BRW_NEW_BLORP |
- BRW_NEW_CS_PROG_DATA |
- BRW_NEW_SAMPLER_STATE_TABLE |
- BRW_NEW_SURFACES,
- },
- .emit = genX(upload_cs_state)
-};
-
-#define GPGPU_DISPATCHDIMX 0x2500
-#define GPGPU_DISPATCHDIMY 0x2504
-#define GPGPU_DISPATCHDIMZ 0x2508
-
-#define MI_PREDICATE_SRC0 0x2400
-#define MI_PREDICATE_SRC1 0x2408
-
-static void
-prepare_indirect_gpgpu_walker(struct brw_context *brw)
-{
- GLintptr indirect_offset = brw->compute.num_work_groups_offset;
- struct brw_bo *bo = brw->compute.num_work_groups_bo;
-
- emit_lrm(brw, GPGPU_DISPATCHDIMX, ro_bo(bo, indirect_offset + 0));
- emit_lrm(brw, GPGPU_DISPATCHDIMY, ro_bo(bo, indirect_offset + 4));
- emit_lrm(brw, GPGPU_DISPATCHDIMZ, ro_bo(bo, indirect_offset + 8));
-
-#if GFX_VER <= 7
- /* Clear upper 32-bits of SRC0 and all 64-bits of SRC1 */
- emit_lri(brw, MI_PREDICATE_SRC0 + 4, 0);
- emit_lri(brw, MI_PREDICATE_SRC1 , 0);
- emit_lri(brw, MI_PREDICATE_SRC1 + 4, 0);
-
- /* Load compute_dispatch_indirect_x_size into SRC0 */
- emit_lrm(brw, MI_PREDICATE_SRC0, ro_bo(bo, indirect_offset + 0));
-
- /* predicate = (compute_dispatch_indirect_x_size == 0); */
- brw_batch_emit(brw, GENX(MI_PREDICATE), mip) {
- mip.LoadOperation = LOAD_LOAD;
- mip.CombineOperation = COMBINE_SET;
- mip.CompareOperation = COMPARE_SRCS_EQUAL;
- }
-
- /* Load compute_dispatch_indirect_y_size into SRC0 */
- emit_lrm(brw, MI_PREDICATE_SRC0, ro_bo(bo, indirect_offset + 4));
-
- /* predicate |= (compute_dispatch_indirect_y_size == 0); */
- brw_batch_emit(brw, GENX(MI_PREDICATE), mip) {
- mip.LoadOperation = LOAD_LOAD;
- mip.CombineOperation = COMBINE_OR;
- mip.CompareOperation = COMPARE_SRCS_EQUAL;
- }
-
- /* Load compute_dispatch_indirect_z_size into SRC0 */
- emit_lrm(brw, MI_PREDICATE_SRC0, ro_bo(bo, indirect_offset + 8));
-
- /* predicate |= (compute_dispatch_indirect_z_size == 0); */
- brw_batch_emit(brw, GENX(MI_PREDICATE), mip) {
- mip.LoadOperation = LOAD_LOAD;
- mip.CombineOperation = COMBINE_OR;
- mip.CompareOperation = COMPARE_SRCS_EQUAL;
- }
-
- /* predicate = !predicate; */
-#define COMPARE_FALSE 1
- brw_batch_emit(brw, GENX(MI_PREDICATE), mip) {
- mip.LoadOperation = LOAD_LOADINV;
- mip.CombineOperation = COMBINE_OR;
- mip.CompareOperation = COMPARE_FALSE;
- }
-#endif
-}
-
-static void
-genX(emit_gpgpu_walker)(struct brw_context *brw)
-{
- const GLuint *num_groups = brw->compute.num_work_groups;
-
- bool indirect = brw->compute.num_work_groups_bo != NULL;
- if (indirect)
- prepare_indirect_gpgpu_walker(brw);
-
- const struct brw_cs_dispatch_info dispatch =
- brw_cs_get_dispatch_info(&brw->screen->devinfo,
- brw_cs_prog_data(brw->cs.base.prog_data),
- brw->compute.group_size);
-
- brw_batch_emit(brw, GENX(GPGPU_WALKER), ggw) {
- ggw.IndirectParameterEnable = indirect;
- ggw.PredicateEnable = GFX_VER <= 7 && indirect;
- ggw.SIMDSize = dispatch.simd_size / 16;
- ggw.ThreadDepthCounterMaximum = 0;
- ggw.ThreadHeightCounterMaximum = 0;
- ggw.ThreadWidthCounterMaximum = dispatch.threads - 1;
- ggw.ThreadGroupIDXDimension = num_groups[0];
- ggw.ThreadGroupIDYDimension = num_groups[1];
- ggw.ThreadGroupIDZDimension = num_groups[2];
- ggw.RightExecutionMask = dispatch.right_mask;
- ggw.BottomExecutionMask = 0xffffffff;
- }
-
- brw_batch_emit(brw, GENX(MEDIA_STATE_FLUSH), msf);
-}
-
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-#if GFX_VER >= 8
-static void
-genX(upload_raster)(struct brw_context *brw)
-{
- const struct gl_context *ctx = &brw->ctx;
-
- /* _NEW_BUFFERS */
- const bool flip_y = ctx->DrawBuffer->FlipY;
-
- /* _NEW_POLYGON */
- const struct gl_polygon_attrib *polygon = &ctx->Polygon;
-
- /* _NEW_POINT */
- const struct gl_point_attrib *point = &ctx->Point;
-
- brw_batch_emit(brw, GENX(3DSTATE_RASTER), raster) {
- if (brw->polygon_front_bit != flip_y)
- raster.FrontWinding = CounterClockwise;
-
- if (polygon->CullFlag) {
- switch (polygon->CullFaceMode) {
- case GL_FRONT:
- raster.CullMode = CULLMODE_FRONT;
- break;
- case GL_BACK:
- raster.CullMode = CULLMODE_BACK;
- break;
- case GL_FRONT_AND_BACK:
- raster.CullMode = CULLMODE_BOTH;
- break;
- default:
- unreachable("not reached");
- }
- } else {
- raster.CullMode = CULLMODE_NONE;
- }
-
- raster.SmoothPointEnable = point->SmoothFlag;
-
- raster.DXMultisampleRasterizationEnable =
- _mesa_is_multisample_enabled(ctx);
-
- raster.GlobalDepthOffsetEnableSolid = polygon->OffsetFill;
- raster.GlobalDepthOffsetEnableWireframe = polygon->OffsetLine;
- raster.GlobalDepthOffsetEnablePoint = polygon->OffsetPoint;
-
- switch (polygon->FrontMode) {
- case GL_FILL:
- raster.FrontFaceFillMode = FILL_MODE_SOLID;
- break;
- case GL_LINE:
- raster.FrontFaceFillMode = FILL_MODE_WIREFRAME;
- break;
- case GL_POINT:
- raster.FrontFaceFillMode = FILL_MODE_POINT;
- break;
- default:
- unreachable("not reached");
- }
-
- switch (polygon->BackMode) {
- case GL_FILL:
- raster.BackFaceFillMode = FILL_MODE_SOLID;
- break;
- case GL_LINE:
- raster.BackFaceFillMode = FILL_MODE_WIREFRAME;
- break;
- case GL_POINT:
- raster.BackFaceFillMode = FILL_MODE_POINT;
- break;
- default:
- unreachable("not reached");
- }
-
- /* _NEW_LINE */
- raster.AntialiasingEnable = ctx->Line.SmoothFlag;
-
-#if GFX_VER == 10
- /* _NEW_BUFFERS
- * Antialiasing Enable bit MUST not be set when NUM_MULTISAMPLES > 1.
- */
- const bool multisampled_fbo =
- _mesa_geometric_samples(ctx->DrawBuffer) > 1;
- if (multisampled_fbo)
- raster.AntialiasingEnable = false;
-#endif
-
- /* _NEW_SCISSOR */
- raster.ScissorRectangleEnable = ctx->Scissor.EnableFlags;
-
- /* _NEW_TRANSFORM */
-#if GFX_VER < 9
- if (!(ctx->Transform.DepthClampNear &&
- ctx->Transform.DepthClampFar))
- raster.ViewportZClipTestEnable = true;
-#endif
-
-#if GFX_VER >= 9
- if (!ctx->Transform.DepthClampNear)
- raster.ViewportZNearClipTestEnable = true;
-
- if (!ctx->Transform.DepthClampFar)
- raster.ViewportZFarClipTestEnable = true;
-#endif
-
- /* BRW_NEW_CONSERVATIVE_RASTERIZATION */
-#if GFX_VER >= 9
- raster.ConservativeRasterizationEnable =
- ctx->IntelConservativeRasterization;
-#endif
-
- raster.GlobalDepthOffsetClamp = polygon->OffsetClamp;
- raster.GlobalDepthOffsetScale = polygon->OffsetFactor;
-
- raster.GlobalDepthOffsetConstant = polygon->OffsetUnits * 2;
- }
-}
-
-static const struct brw_tracked_state genX(raster_state) = {
- .dirty = {
- .mesa = _NEW_BUFFERS |
- _NEW_LINE |
- _NEW_MULTISAMPLE |
- _NEW_POINT |
- _NEW_POLYGON |
- _NEW_SCISSOR |
- _NEW_TRANSFORM,
- .brw = BRW_NEW_BLORP |
- BRW_NEW_CONTEXT |
- BRW_NEW_CONSERVATIVE_RASTERIZATION,
- },
- .emit = genX(upload_raster),
-};
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-#if GFX_VER >= 8
-static void
-genX(upload_ps_extra)(struct brw_context *brw)
-{
- UNUSED struct gl_context *ctx = &brw->ctx;
-
- const struct brw_wm_prog_data *prog_data =
- brw_wm_prog_data(brw->wm.base.prog_data);
-
- brw_batch_emit(brw, GENX(3DSTATE_PS_EXTRA), psx) {
- psx.PixelShaderValid = true;
- psx.PixelShaderComputedDepthMode = prog_data->computed_depth_mode;
- psx.PixelShaderKillsPixel = prog_data->uses_kill;
- psx.AttributeEnable = prog_data->num_varying_inputs != 0;
- psx.PixelShaderUsesSourceDepth = prog_data->uses_src_depth;
- psx.PixelShaderUsesSourceW = prog_data->uses_src_w;
- psx.PixelShaderIsPerSample = prog_data->persample_dispatch;
-
- /* _NEW_MULTISAMPLE | BRW_NEW_CONSERVATIVE_RASTERIZATION */
- if (prog_data->uses_sample_mask) {
-#if GFX_VER >= 9
- if (prog_data->post_depth_coverage)
- psx.InputCoverageMaskState = ICMS_DEPTH_COVERAGE;
- else if (prog_data->inner_coverage && ctx->IntelConservativeRasterization)
- psx.InputCoverageMaskState = ICMS_INNER_CONSERVATIVE;
- else
- psx.InputCoverageMaskState = ICMS_NORMAL;
-#else
- psx.PixelShaderUsesInputCoverageMask = true;
-#endif
- }
-
- psx.oMaskPresenttoRenderTarget = prog_data->uses_omask;
-#if GFX_VER >= 9
- psx.PixelShaderPullsBary = prog_data->pulls_bary;
- psx.PixelShaderComputesStencil = prog_data->computed_stencil;
-#endif
-
- /* The stricter cross-primitive coherency guarantees that the hardware
- * gives us with the "Accesses UAV" bit set for at least one shader stage
- * and the "UAV coherency required" bit set on the 3DPRIMITIVE command
- * are redundant within the current image, atomic counter and SSBO GL
- * APIs, which all have very loose ordering and coherency requirements
- * and generally rely on the application to insert explicit barriers when
- * a shader invocation is expected to see the memory writes performed by
- * the invocations of some previous primitive. Regardless of the value
- * of "UAV coherency required", the "Accesses UAV" bits will implicitly
- * cause an in most cases useless DC flush when the lowermost stage with
- * the bit set finishes execution.
- *
- * It would be nice to disable it, but in some cases we can't because on
- * Gfx8+ it also has an influence on rasterization via the PS UAV-only
- * signal (which could be set independently from the coherency mechanism
- * in the 3DSTATE_WM command on Gfx7), and because in some cases it will
- * determine whether the hardware skips execution of the fragment shader
- * or not via the ThreadDispatchEnable signal. However if we know that
- * GFX8_PS_BLEND_HAS_WRITEABLE_RT is going to be set and
- * GFX8_PSX_PIXEL_SHADER_NO_RT_WRITE is not set it shouldn't make any
- * difference so we may just disable it here.
- *
- * Gfx8 hardware tries to compute ThreadDispatchEnable for us but doesn't
- * take into account KillPixels when no depth or stencil writes are
- * enabled. In order for occlusion queries to work correctly with no
- * attachments, we need to force-enable here.
- *
- * BRW_NEW_FS_PROG_DATA | BRW_NEW_FRAGMENT_PROGRAM | _NEW_BUFFERS |
- * _NEW_COLOR
- */
- if ((prog_data->has_side_effects || prog_data->uses_kill) &&
- !brw_color_buffer_write_enabled(brw))
- psx.PixelShaderHasUAV = true;
- }
-}
-
-const struct brw_tracked_state genX(ps_extra) = {
- .dirty = {
- .mesa = _NEW_BUFFERS | _NEW_COLOR,
- .brw = BRW_NEW_BLORP |
- BRW_NEW_CONTEXT |
- BRW_NEW_FRAGMENT_PROGRAM |
- BRW_NEW_FS_PROG_DATA |
- BRW_NEW_CONSERVATIVE_RASTERIZATION,
- },
- .emit = genX(upload_ps_extra),
-};
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-#if GFX_VER >= 8
-static void
-genX(upload_ps_blend)(struct brw_context *brw)
-{
- struct gl_context *ctx = &brw->ctx;
-
- /* _NEW_BUFFERS */
- struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[0];
- const bool buffer0_is_integer = ctx->DrawBuffer->_IntegerBuffers & 0x1;
-
- /* _NEW_COLOR */
- struct gl_colorbuffer_attrib *color = &ctx->Color;
-
- brw_batch_emit(brw, GENX(3DSTATE_PS_BLEND), pb) {
- /* BRW_NEW_FRAGMENT_PROGRAM | _NEW_BUFFERS | _NEW_COLOR */
- pb.HasWriteableRT = brw_color_buffer_write_enabled(brw);
-
- bool alpha_to_one = false;
-
- if (!buffer0_is_integer) {
- /* _NEW_MULTISAMPLE */
-
- if (_mesa_is_multisample_enabled(ctx)) {
- pb.AlphaToCoverageEnable = ctx->Multisample.SampleAlphaToCoverage;
- alpha_to_one = ctx->Multisample.SampleAlphaToOne;
- }
-
- pb.AlphaTestEnable = color->AlphaEnabled;
- }
-
- /* Used for implementing the following bit of GL_EXT_texture_integer:
- * "Per-fragment operations that require floating-point color
- * components, including multisample alpha operations, alpha test,
- * blending, and dithering, have no effect when the corresponding
- * colors are written to an integer color buffer."
- *
- * The OpenGL specification 3.3 (page 196), section 4.1.3 says:
- * "If drawbuffer zero is not NONE and the buffer it references has an
- * integer format, the SAMPLE_ALPHA_TO_COVERAGE and SAMPLE_ALPHA_TO_ONE
- * operations are skipped."
- */
- if (rb && !buffer0_is_integer && (color->BlendEnabled & 1)) {
- GLenum eqRGB = color->Blend[0].EquationRGB;
- GLenum eqA = color->Blend[0].EquationA;
- GLenum srcRGB = color->Blend[0].SrcRGB;
- GLenum dstRGB = color->Blend[0].DstRGB;
- GLenum srcA = color->Blend[0].SrcA;
- GLenum dstA = color->Blend[0].DstA;
-
- if (eqRGB == GL_MIN || eqRGB == GL_MAX)
- srcRGB = dstRGB = GL_ONE;
-
- if (eqA == GL_MIN || eqA == GL_MAX)
- srcA = dstA = GL_ONE;
-
- /* Due to hardware limitations, the destination may have information
- * in an alpha channel even when the format specifies no alpha
- * channel. In order to avoid getting any incorrect blending due to
- * that alpha channel, coerce the blend factors to values that will
- * not read the alpha channel, but will instead use the correct
- * implicit value for alpha.
- */
- if (!_mesa_base_format_has_channel(rb->_BaseFormat,
- GL_TEXTURE_ALPHA_TYPE)) {
- srcRGB = brw_fix_xRGB_alpha(srcRGB);
- srcA = brw_fix_xRGB_alpha(srcA);
- dstRGB = brw_fix_xRGB_alpha(dstRGB);
- dstA = brw_fix_xRGB_alpha(dstA);
- }
-
- /* Alpha to One doesn't work with Dual Color Blending. Override
- * SRC1_ALPHA to ONE and ONE_MINUS_SRC1_ALPHA to ZERO.
- */
- if (alpha_to_one && color->_BlendUsesDualSrc & 0x1) {
- srcRGB = fix_dual_blend_alpha_to_one(srcRGB);
- srcA = fix_dual_blend_alpha_to_one(srcA);
- dstRGB = fix_dual_blend_alpha_to_one(dstRGB);
- dstA = fix_dual_blend_alpha_to_one(dstA);
- }
-
- /* BRW_NEW_FS_PROG_DATA */
- const struct brw_wm_prog_data *wm_prog_data =
- brw_wm_prog_data(brw->wm.base.prog_data);
-
- /* The Dual Source Blending documentation says:
- *
- * "If SRC1 is included in a src/dst blend factor and
- * a DualSource RT Write message is not used, results
- * are UNDEFINED. (This reflects the same restriction in DX APIs,
- * where undefined results are produced if “o1” is not written
- * by a PS – there are no default values defined).
- * If SRC1 is not included in a src/dst blend factor,
- * dual source blending must be disabled."
- *
- * There is no way to gracefully fix this undefined situation
- * so we just disable the blending to prevent possible issues.
- */
- pb.ColorBufferBlendEnable =
- !(color->_BlendUsesDualSrc & 0x1) || wm_prog_data->dual_src_blend;
- pb.SourceAlphaBlendFactor = brw_translate_blend_factor(srcA);
- pb.DestinationAlphaBlendFactor = brw_translate_blend_factor(dstA);
- pb.SourceBlendFactor = brw_translate_blend_factor(srcRGB);
- pb.DestinationBlendFactor = brw_translate_blend_factor(dstRGB);
-
- pb.IndependentAlphaBlendEnable =
- srcA != srcRGB || dstA != dstRGB || eqA != eqRGB;
- }
- }
-}
-
-static const struct brw_tracked_state genX(ps_blend) = {
- .dirty = {
- .mesa = _NEW_BUFFERS |
- _NEW_COLOR |
- _NEW_MULTISAMPLE,
- .brw = BRW_NEW_BLORP |
- BRW_NEW_CONTEXT |
- BRW_NEW_FRAGMENT_PROGRAM |
- BRW_NEW_FS_PROG_DATA,
- },
- .emit = genX(upload_ps_blend)
-};
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-#if GFX_VER >= 8
-static void
-genX(emit_vf_topology)(struct brw_context *brw)
-{
- brw_batch_emit(brw, GENX(3DSTATE_VF_TOPOLOGY), vftopo) {
- vftopo.PrimitiveTopologyType = brw->primitive;
- }
-}
-
-static const struct brw_tracked_state genX(vf_topology) = {
- .dirty = {
- .mesa = 0,
- .brw = BRW_NEW_BLORP |
- BRW_NEW_PRIMITIVE,
- },
- .emit = genX(emit_vf_topology),
-};
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-#if GFX_VER >= 7
-static void
-genX(emit_mi_report_perf_count)(struct brw_context *brw,
- struct brw_bo *bo,
- uint32_t offset_in_bytes,
- uint32_t report_id)
-{
- brw_batch_emit(brw, GENX(MI_REPORT_PERF_COUNT), mi_rpc) {
- mi_rpc.MemoryAddress = ggtt_bo(bo, offset_in_bytes);
- mi_rpc.ReportID = report_id;
- }
-}
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-/**
- * Emit a 3DSTATE_SAMPLER_STATE_POINTERS_{VS,HS,GS,DS,PS} packet.
- */
-static void
-genX(emit_sampler_state_pointers_xs)(UNUSED struct brw_context *brw,
- UNUSED struct brw_stage_state *stage_state)
-{
-#if GFX_VER >= 7
- static const uint16_t packet_headers[] = {
- [MESA_SHADER_VERTEX] = 43,
- [MESA_SHADER_TESS_CTRL] = 44,
- [MESA_SHADER_TESS_EVAL] = 45,
- [MESA_SHADER_GEOMETRY] = 46,
- [MESA_SHADER_FRAGMENT] = 47,
- };
-
- /* Ivybridge requires a workaround flush before VS packets. */
- if (GFX_VERx10 == 70 &&
- stage_state->stage == MESA_SHADER_VERTEX) {
- gfx7_emit_vs_workaround_flush(brw);
- }
-
- brw_batch_emit(brw, GENX(3DSTATE_SAMPLER_STATE_POINTERS_VS), ptr) {
- ptr._3DCommandSubOpcode = packet_headers[stage_state->stage];
- ptr.PointertoVSSamplerState = stage_state->sampler_offset;
- }
-#endif
-}
-
-UNUSED static bool
-has_component(mesa_format format, int i)
-{
- if (_mesa_is_format_color_format(format))
- return _mesa_format_has_color_component(format, i);
-
- /* depth and stencil have only one component */
- return i == 0;
-}
-
-/**
- * Upload SAMPLER_BORDER_COLOR_STATE.
- */
-static void
-genX(upload_default_color)(struct brw_context *brw,
- const struct gl_sampler_object *sampler,
- UNUSED mesa_format format,
- GLenum base_format,
- bool is_integer_format, bool is_stencil_sampling,
- uint32_t *sdc_offset)
-{
- union gl_color_union color;
-
- switch (base_format) {
- case GL_DEPTH_COMPONENT:
- /* GL specs that border color for depth textures is taken from the
- * R channel, while the hardware uses A. Spam R into all the
- * channels for safety.
- */
- color.ui[0] = sampler->Attrib.state.border_color.ui[0];
- color.ui[1] = sampler->Attrib.state.border_color.ui[0];
- color.ui[2] = sampler->Attrib.state.border_color.ui[0];
- color.ui[3] = sampler->Attrib.state.border_color.ui[0];
- break;
- case GL_ALPHA:
- color.ui[0] = 0u;
- color.ui[1] = 0u;
- color.ui[2] = 0u;
- color.ui[3] = sampler->Attrib.state.border_color.ui[3];
- break;
- case GL_INTENSITY:
- color.ui[0] = sampler->Attrib.state.border_color.ui[0];
- color.ui[1] = sampler->Attrib.state.border_color.ui[0];
- color.ui[2] = sampler->Attrib.state.border_color.ui[0];
- color.ui[3] = sampler->Attrib.state.border_color.ui[0];
- break;
- case GL_LUMINANCE:
- color.ui[0] = sampler->Attrib.state.border_color.ui[0];
- color.ui[1] = sampler->Attrib.state.border_color.ui[0];
- color.ui[2] = sampler->Attrib.state.border_color.ui[0];
- color.ui[3] = float_as_int(1.0);
- break;
- case GL_LUMINANCE_ALPHA:
- color.ui[0] = sampler->Attrib.state.border_color.ui[0];
- color.ui[1] = sampler->Attrib.state.border_color.ui[0];
- color.ui[2] = sampler->Attrib.state.border_color.ui[0];
- color.ui[3] = sampler->Attrib.state.border_color.ui[3];
- break;
- default:
- color.ui[0] = sampler->Attrib.state.border_color.ui[0];
- color.ui[1] = sampler->Attrib.state.border_color.ui[1];
- color.ui[2] = sampler->Attrib.state.border_color.ui[2];
- color.ui[3] = sampler->Attrib.state.border_color.ui[3];
- break;
- }
-
- /* In some cases we use an RGBA surface format for GL RGB textures,
- * where we've initialized the A channel to 1.0. We also have to set
- * the border color alpha to 1.0 in that case.
- */
- if (base_format == GL_RGB)
- color.ui[3] = float_as_int(1.0);
-
- int alignment = 32;
- if (GFX_VER >= 8) {
- alignment = 64;
- } else if (GFX_VERx10 == 75 && (is_integer_format || is_stencil_sampling)) {
- alignment = 512;
- }
-
- uint32_t *sdc = brw_state_batch(
- brw, GENX(SAMPLER_BORDER_COLOR_STATE_length) * sizeof(uint32_t),
- alignment, sdc_offset);
-
- struct GENX(SAMPLER_BORDER_COLOR_STATE) state = { 0 };
-
-#define ASSIGN(dst, src) \
- do { \
- dst = src; \
- } while (0)
-
-#define ASSIGNu16(dst, src) \
- do { \
- dst = (uint16_t)src; \
- } while (0)
-
-#define ASSIGNu8(dst, src) \
- do { \
- dst = (uint8_t)src; \
- } while (0)
-
-#define BORDER_COLOR_ATTR(macro, _color_type, src) \
- macro(state.BorderColor ## _color_type ## Red, src[0]); \
- macro(state.BorderColor ## _color_type ## Green, src[1]); \
- macro(state.BorderColor ## _color_type ## Blue, src[2]); \
- macro(state.BorderColor ## _color_type ## Alpha, src[3]);
-
-#if GFX_VER >= 8
- /* On Broadwell, the border color is represented as four 32-bit floats,
- * integers, or unsigned values, interpreted according to the surface
- * format. This matches the sampler->BorderColor union exactly; just
- * memcpy the values.
- */
- BORDER_COLOR_ATTR(ASSIGN, 32bit, color.ui);
-#elif GFX_VERx10 == 75
- if (is_integer_format || is_stencil_sampling) {
- bool stencil = format == MESA_FORMAT_S_UINT8 || is_stencil_sampling;
- const int bits_per_channel =
- _mesa_get_format_bits(format, stencil ? GL_STENCIL_BITS : GL_RED_BITS);
-
- /* From the Haswell PRM, "Command Reference: Structures", Page 36:
- * "If any color channel is missing from the surface format,
- * corresponding border color should be programmed as zero and if
- * alpha channel is missing, corresponding Alpha border color should
- * be programmed as 1."
- */
- unsigned c[4] = { 0, 0, 0, 1 };
- for (int i = 0; i < 4; i++) {
- if (has_component(format, i))
- c[i] = color.ui[i];
- }
-
- switch (bits_per_channel) {
- case 8:
- /* Copy RGBA in order. */
- BORDER_COLOR_ATTR(ASSIGNu8, 8bit, c);
- break;
- case 10:
- /* R10G10B10A2_UINT is treated like a 16-bit format. */
- case 16:
- BORDER_COLOR_ATTR(ASSIGNu16, 16bit, c);
- break;
- case 32:
- if (base_format == GL_RG) {
- /* Careful inspection of the tables reveals that for RG32 formats,
- * the green channel needs to go where blue normally belongs.
- */
- state.BorderColor32bitRed = c[0];
- state.BorderColor32bitBlue = c[1];
- state.BorderColor32bitAlpha = 1;
- } else {
- /* Copy RGBA in order. */
- BORDER_COLOR_ATTR(ASSIGN, 32bit, c);
- }
- break;
- default:
- assert(!"Invalid number of bits per channel in integer format.");
- break;
- }
- } else {
- BORDER_COLOR_ATTR(ASSIGN, Float, color.f);
- }
-#elif GFX_VER == 5 || GFX_VER == 6
- BORDER_COLOR_ATTR(UNCLAMPED_FLOAT_TO_UBYTE, Unorm, color.f);
- BORDER_COLOR_ATTR(UNCLAMPED_FLOAT_TO_USHORT, Unorm16, color.f);
- BORDER_COLOR_ATTR(UNCLAMPED_FLOAT_TO_SHORT, Snorm16, color.f);
-
-#define MESA_FLOAT_TO_HALF(dst, src) \
- dst = _mesa_float_to_half(src);
-
- BORDER_COLOR_ATTR(MESA_FLOAT_TO_HALF, Float16, color.f);
-
-#undef MESA_FLOAT_TO_HALF
-
- state.BorderColorSnorm8Red = state.BorderColorSnorm16Red >> 8;
- state.BorderColorSnorm8Green = state.BorderColorSnorm16Green >> 8;
- state.BorderColorSnorm8Blue = state.BorderColorSnorm16Blue >> 8;
- state.BorderColorSnorm8Alpha = state.BorderColorSnorm16Alpha >> 8;
-
- BORDER_COLOR_ATTR(ASSIGN, Float, color.f);
-#elif GFX_VER == 4
- BORDER_COLOR_ATTR(ASSIGN, , color.f);
-#else
- BORDER_COLOR_ATTR(ASSIGN, Float, color.f);
-#endif
-
-#undef ASSIGN
-#undef BORDER_COLOR_ATTR
-
- GENX(SAMPLER_BORDER_COLOR_STATE_pack)(brw, sdc, &state);
-}
-
-static uint32_t
-translate_wrap_mode(GLenum wrap, UNUSED bool using_nearest)
-{
- switch (wrap) {
- case GL_REPEAT:
- return TCM_WRAP;
- case GL_CLAMP:
-#if GFX_VER >= 8
- /* GL_CLAMP is the weird mode where coordinates are clamped to
- * [0.0, 1.0], so linear filtering of coordinates outside of
- * [0.0, 1.0] give you half edge texel value and half border
- * color.
- *
- * Gfx8+ supports this natively.
- */
- return TCM_HALF_BORDER;
-#else
- /* On Gfx4-7.5, we clamp the coordinates in the fragment shader
- * and set clamp_border here, which gets the result desired.
- * We just use clamp(_to_edge) for nearest, because for nearest
- * clamping to 1.0 gives border color instead of the desired
- * edge texels.
- */
- if (using_nearest)
- return TCM_CLAMP;
- else
- return TCM_CLAMP_BORDER;
-#endif
- case GL_CLAMP_TO_EDGE:
- return TCM_CLAMP;
- case GL_CLAMP_TO_BORDER:
- return TCM_CLAMP_BORDER;
- case GL_MIRRORED_REPEAT:
- return TCM_MIRROR;
- case GL_MIRROR_CLAMP_TO_EDGE:
- return TCM_MIRROR_ONCE;
- default:
- return TCM_WRAP;
- }
-}
-
-/**
- * Return true if the given wrap mode requires the border color to exist.
- */
-static bool
-wrap_mode_needs_border_color(unsigned wrap_mode)
-{
-#if GFX_VER >= 8
- return wrap_mode == TCM_CLAMP_BORDER ||
- wrap_mode == TCM_HALF_BORDER;
-#else
- return wrap_mode == TCM_CLAMP_BORDER;
-#endif
-}
-
-/**
- * Sets the sampler state for a single unit based off of the sampler key
- * entry.
- */
-static void
-genX(update_sampler_state)(struct brw_context *brw,
- GLenum target, bool tex_cube_map_seamless,
- GLfloat tex_unit_lod_bias,
- mesa_format format, GLenum base_format,
- const struct gl_texture_object *texObj,
- const struct gl_sampler_object *sampler,
- uint32_t *sampler_state)
-{
- struct GENX(SAMPLER_STATE) samp_st = { 0 };
-
- /* Select min and mip filters. */
- switch (sampler->Attrib.MinFilter) {
- case GL_NEAREST:
- samp_st.MinModeFilter = MAPFILTER_NEAREST;
- samp_st.MipModeFilter = MIPFILTER_NONE;
- break;
- case GL_LINEAR:
- samp_st.MinModeFilter = MAPFILTER_LINEAR;
- samp_st.MipModeFilter = MIPFILTER_NONE;
- break;
- case GL_NEAREST_MIPMAP_NEAREST:
- samp_st.MinModeFilter = MAPFILTER_NEAREST;
- samp_st.MipModeFilter = MIPFILTER_NEAREST;
- break;
- case GL_LINEAR_MIPMAP_NEAREST:
- samp_st.MinModeFilter = MAPFILTER_LINEAR;
- samp_st.MipModeFilter = MIPFILTER_NEAREST;
- break;
- case GL_NEAREST_MIPMAP_LINEAR:
- samp_st.MinModeFilter = MAPFILTER_NEAREST;
- samp_st.MipModeFilter = MIPFILTER_LINEAR;
- break;
- case GL_LINEAR_MIPMAP_LINEAR:
- samp_st.MinModeFilter = MAPFILTER_LINEAR;
- samp_st.MipModeFilter = MIPFILTER_LINEAR;
- break;
- default:
- unreachable("not reached");
- }
-
- /* Select mag filter. */
- samp_st.MagModeFilter = sampler->Attrib.MagFilter == GL_LINEAR ?
- MAPFILTER_LINEAR : MAPFILTER_NEAREST;
-
- /* Enable anisotropic filtering if desired. */
- samp_st.MaximumAnisotropy = RATIO21;
-
- if (sampler->Attrib.MaxAnisotropy > 1.0f) {
- if (samp_st.MinModeFilter == MAPFILTER_LINEAR)
- samp_st.MinModeFilter = MAPFILTER_ANISOTROPIC;
- if (samp_st.MagModeFilter == MAPFILTER_LINEAR)
- samp_st.MagModeFilter = MAPFILTER_ANISOTROPIC;
-
- if (sampler->Attrib.MaxAnisotropy > 2.0f) {
- samp_st.MaximumAnisotropy =
- MIN2((sampler->Attrib.MaxAnisotropy - 2) / 2, RATIO161);
- }
- }
-
- /* Set address rounding bits if not using nearest filtering. */
- if (samp_st.MinModeFilter != MAPFILTER_NEAREST) {
- samp_st.UAddressMinFilterRoundingEnable = true;
- samp_st.VAddressMinFilterRoundingEnable = true;
- samp_st.RAddressMinFilterRoundingEnable = true;
- }
-
- if (samp_st.MagModeFilter != MAPFILTER_NEAREST) {
- samp_st.UAddressMagFilterRoundingEnable = true;
- samp_st.VAddressMagFilterRoundingEnable = true;
- samp_st.RAddressMagFilterRoundingEnable = true;
- }
-
- bool either_nearest =
- sampler->Attrib.MinFilter == GL_NEAREST || sampler->Attrib.MagFilter == GL_NEAREST;
- unsigned wrap_s = translate_wrap_mode(sampler->Attrib.WrapS, either_nearest);
- unsigned wrap_t = translate_wrap_mode(sampler->Attrib.WrapT, either_nearest);
- unsigned wrap_r = translate_wrap_mode(sampler->Attrib.WrapR, either_nearest);
-
- if (target == GL_TEXTURE_CUBE_MAP ||
- target == GL_TEXTURE_CUBE_MAP_ARRAY) {
- /* Cube maps must use the same wrap mode for all three coordinate
- * dimensions. Prior to Haswell, only CUBE and CLAMP are valid.
- *
- * Ivybridge and Baytrail seem to have problems with CUBE mode and
- * integer formats. Fall back to CLAMP for now.
- */
- if ((tex_cube_map_seamless || sampler->Attrib.CubeMapSeamless) &&
- !(GFX_VERx10 == 70 && texObj->_IsIntegerFormat)) {
- wrap_s = TCM_CUBE;
- wrap_t = TCM_CUBE;
- wrap_r = TCM_CUBE;
- } else {
- wrap_s = TCM_CLAMP;
- wrap_t = TCM_CLAMP;
- wrap_r = TCM_CLAMP;
- }
- } else if (target == GL_TEXTURE_1D) {
- /* There's a bug in 1D texture sampling - it actually pays
- * attention to the wrap_t value, though it should not.
- * Override the wrap_t value here to GL_REPEAT to keep
- * any nonexistent border pixels from floating in.
- */
- wrap_t = TCM_WRAP;
- }
-
- samp_st.TCXAddressControlMode = wrap_s;
- samp_st.TCYAddressControlMode = wrap_t;
- samp_st.TCZAddressControlMode = wrap_r;
-
- samp_st.ShadowFunction =
- sampler->Attrib.CompareMode == GL_COMPARE_R_TO_TEXTURE_ARB ?
- brw_translate_shadow_compare_func(sampler->Attrib.CompareFunc) : 0;
-
-#if GFX_VER >= 7
- /* Set shadow function. */
- samp_st.AnisotropicAlgorithm =
- samp_st.MinModeFilter == MAPFILTER_ANISOTROPIC ?
- EWAApproximation : LEGACY;
-#endif
-
-#if GFX_VER >= 6
- samp_st.NonnormalizedCoordinateEnable = target == GL_TEXTURE_RECTANGLE;
-#endif
-
- const float hw_max_lod = GFX_VER >= 7 ? 14 : 13;
- samp_st.MinLOD = CLAMP(sampler->Attrib.MinLod, 0, hw_max_lod);
- samp_st.MaxLOD = CLAMP(sampler->Attrib.MaxLod, 0, hw_max_lod);
- samp_st.TextureLODBias =
- CLAMP(tex_unit_lod_bias + sampler->Attrib.LodBias, -16, 15);
-
-#if GFX_VER == 6
- samp_st.BaseMipLevel =
- CLAMP(texObj->Attrib.MinLevel + texObj->Attrib.BaseLevel, 0, hw_max_lod);
- samp_st.MinandMagStateNotEqual =
- samp_st.MinModeFilter != samp_st.MagModeFilter;
-#endif
-
- /* Upload the border color if necessary. If not, just point it at
- * offset 0 (the start of the batch) - the color should be ignored,
- * but that address won't fault in case something reads it anyway.
- */
- uint32_t border_color_offset = 0;
- if (wrap_mode_needs_border_color(wrap_s) ||
- wrap_mode_needs_border_color(wrap_t) ||
- wrap_mode_needs_border_color(wrap_r)) {
- genX(upload_default_color)(brw, sampler, format, base_format,
- texObj->_IsIntegerFormat,
- texObj->StencilSampling,
- &border_color_offset);
- }
-#if GFX_VER < 6
- samp_st.BorderColorPointer =
- ro_bo(brw->batch.state.bo, border_color_offset);
-#else
- samp_st.BorderColorPointer = border_color_offset;
-#endif
-
-#if GFX_VER >= 8
- samp_st.LODPreClampMode = CLAMP_MODE_OGL;
-#else
- samp_st.LODPreClampEnable = true;
-#endif
-
- GENX(SAMPLER_STATE_pack)(brw, sampler_state, &samp_st);
-}
-
-static void
-update_sampler_state(struct brw_context *brw,
- int unit,
- uint32_t *sampler_state)
-{
- struct gl_context *ctx = &brw->ctx;
- const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
- const struct gl_texture_object *texObj = texUnit->_Current;
- const struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
-
- /* These don't use samplers at all. */
- if (texObj->Target == GL_TEXTURE_BUFFER)
- return;
-
- struct gl_texture_image *firstImage = texObj->Image[0][texObj->Attrib.BaseLevel];
- genX(update_sampler_state)(brw, texObj->Target,
- ctx->Texture.CubeMapSeamless,
- texUnit->LodBias,
- firstImage->TexFormat, firstImage->_BaseFormat,
- texObj, sampler,
- sampler_state);
-}
-
-static void
-genX(upload_sampler_state_table)(struct brw_context *brw,
- struct gl_program *prog,
- struct brw_stage_state *stage_state)
-{
- struct gl_context *ctx = &brw->ctx;
- uint32_t sampler_count = stage_state->sampler_count;
-
- GLbitfield SamplersUsed = prog->SamplersUsed;
-
- if (sampler_count == 0)
- return;
-
- /* SAMPLER_STATE is 4 DWords on all platforms. */
- const int dwords = GENX(SAMPLER_STATE_length);
- const int size_in_bytes = dwords * sizeof(uint32_t);
-
- uint32_t *sampler_state = brw_state_batch(brw,
- sampler_count * size_in_bytes,
- 32, &stage_state->sampler_offset);
- /* memset(sampler_state, 0, sampler_count * size_in_bytes); */
-
- for (unsigned s = 0; s < sampler_count; s++) {
- if (SamplersUsed & (1 << s)) {
- const unsigned unit = prog->SamplerUnits[s];
- if (ctx->Texture.Unit[unit]._Current) {
- update_sampler_state(brw, unit, sampler_state);
- }
- }
-
- sampler_state += dwords;
- }
-
- if (GFX_VER >= 7 && stage_state->stage != MESA_SHADER_COMPUTE) {
- /* Emit a 3DSTATE_SAMPLER_STATE_POINTERS_XS packet. */
- genX(emit_sampler_state_pointers_xs)(brw, stage_state);
- } else {
- /* Flag that the sampler state table pointer has changed; later atoms
- * will handle it.
- */
- brw->ctx.NewDriverState |= BRW_NEW_SAMPLER_STATE_TABLE;
- }
-}
-
-static void
-genX(upload_fs_samplers)(struct brw_context *brw)
-{
- /* BRW_NEW_FRAGMENT_PROGRAM */
- struct gl_program *fs = brw->programs[MESA_SHADER_FRAGMENT];
- genX(upload_sampler_state_table)(brw, fs, &brw->wm.base);
-}
-
-static const struct brw_tracked_state genX(fs_samplers) = {
- .dirty = {
- .mesa = _NEW_TEXTURE,
- .brw = BRW_NEW_BATCH |
- BRW_NEW_BLORP |
- BRW_NEW_FRAGMENT_PROGRAM,
- },
- .emit = genX(upload_fs_samplers),
-};
-
-static void
-genX(upload_vs_samplers)(struct brw_context *brw)
-{
- /* BRW_NEW_VERTEX_PROGRAM */
- struct gl_program *vs = brw->programs[MESA_SHADER_VERTEX];
- genX(upload_sampler_state_table)(brw, vs, &brw->vs.base);
-}
-
-static const struct brw_tracked_state genX(vs_samplers) = {
- .dirty = {
- .mesa = _NEW_TEXTURE,
- .brw = BRW_NEW_BATCH |
- BRW_NEW_BLORP |
- BRW_NEW_VERTEX_PROGRAM,
- },
- .emit = genX(upload_vs_samplers),
-};
-
-#if GFX_VER >= 6
-static void
-genX(upload_gs_samplers)(struct brw_context *brw)
-{
- /* BRW_NEW_GEOMETRY_PROGRAM */
- struct gl_program *gs = brw->programs[MESA_SHADER_GEOMETRY];
- if (!gs)
- return;
-
- genX(upload_sampler_state_table)(brw, gs, &brw->gs.base);
-}
-
-
-static const struct brw_tracked_state genX(gs_samplers) = {
- .dirty = {
- .mesa = _NEW_TEXTURE,
- .brw = BRW_NEW_BATCH |
- BRW_NEW_BLORP |
- BRW_NEW_GEOMETRY_PROGRAM,
- },
- .emit = genX(upload_gs_samplers),
-};
-#endif
-
-#if GFX_VER >= 7
-static void
-genX(upload_tcs_samplers)(struct brw_context *brw)
-{
- /* BRW_NEW_TESS_PROGRAMS */
- struct gl_program *tcs = brw->programs[MESA_SHADER_TESS_CTRL];
- if (!tcs)
- return;
-
- genX(upload_sampler_state_table)(brw, tcs, &brw->tcs.base);
-}
-
-static const struct brw_tracked_state genX(tcs_samplers) = {
- .dirty = {
- .mesa = _NEW_TEXTURE,
- .brw = BRW_NEW_BATCH |
- BRW_NEW_BLORP |
- BRW_NEW_TESS_PROGRAMS,
- },
- .emit = genX(upload_tcs_samplers),
-};
-#endif
-
-#if GFX_VER >= 7
-static void
-genX(upload_tes_samplers)(struct brw_context *brw)
-{
- /* BRW_NEW_TESS_PROGRAMS */
- struct gl_program *tes = brw->programs[MESA_SHADER_TESS_EVAL];
- if (!tes)
- return;
-
- genX(upload_sampler_state_table)(brw, tes, &brw->tes.base);
-}
-
-static const struct brw_tracked_state genX(tes_samplers) = {
- .dirty = {
- .mesa = _NEW_TEXTURE,
- .brw = BRW_NEW_BATCH |
- BRW_NEW_BLORP |
- BRW_NEW_TESS_PROGRAMS,
- },
- .emit = genX(upload_tes_samplers),
-};
-#endif
-
-#if GFX_VER >= 7
-static void
-genX(upload_cs_samplers)(struct brw_context *brw)
-{
- /* BRW_NEW_COMPUTE_PROGRAM */
- struct gl_program *cs = brw->programs[MESA_SHADER_COMPUTE];
- if (!cs)
- return;
-
- genX(upload_sampler_state_table)(brw, cs, &brw->cs.base);
-}
-
-const struct brw_tracked_state genX(cs_samplers) = {
- .dirty = {
- .mesa = _NEW_TEXTURE,
- .brw = BRW_NEW_BATCH |
- BRW_NEW_BLORP |
- BRW_NEW_COMPUTE_PROGRAM,
- },
- .emit = genX(upload_cs_samplers),
-};
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-#if GFX_VER <= 5
-
-static void genX(upload_blend_constant_color)(struct brw_context *brw)
-{
- struct gl_context *ctx = &brw->ctx;
-
- brw_batch_emit(brw, GENX(3DSTATE_CONSTANT_COLOR), blend_cc) {
- blend_cc.BlendConstantColorRed = ctx->Color.BlendColorUnclamped[0];
- blend_cc.BlendConstantColorGreen = ctx->Color.BlendColorUnclamped[1];
- blend_cc.BlendConstantColorBlue = ctx->Color.BlendColorUnclamped[2];
- blend_cc.BlendConstantColorAlpha = ctx->Color.BlendColorUnclamped[3];
- }
-}
-
-static const struct brw_tracked_state genX(blend_constant_color) = {
- .dirty = {
- .mesa = _NEW_COLOR,
- .brw = BRW_NEW_CONTEXT |
- BRW_NEW_BLORP,
- },
- .emit = genX(upload_blend_constant_color)
-};
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-void
-genX(init_atoms)(struct brw_context *brw)
-{
-#if GFX_VER < 6
- static const struct brw_tracked_state *render_atoms[] =
- {
- &genX(vf_statistics),
-
- /* Once all the programs are done, we know how large urb entry
- * sizes need to be and can decide if we need to change the urb
- * layout.
- */
- &brw_curbe_offsets,
- &brw_recalculate_urb_fence,
-
- &genX(cc_vp),
- &genX(color_calc_state),
-
- /* Surface state setup. Must come before the VS/WM unit. The binding
- * table upload must be last.
- */
- &brw_vs_pull_constants,
- &brw_wm_pull_constants,
- &brw_renderbuffer_surfaces,
- &brw_renderbuffer_read_surfaces,
- &brw_texture_surfaces,
- &brw_vs_binding_table,
- &brw_wm_binding_table,
-
- &genX(fs_samplers),
- &genX(vs_samplers),
-
- /* These set up state for brw_psp_urb_cbs */
- &genX(wm_state),
- &genX(sf_clip_viewport),
- &genX(sf_state),
- &genX(vs_state), /* always required, enabled or not */
- &genX(clip_state),
- &genX(gs_state),
-
- /* Command packets:
- */
- &brw_binding_table_pointers,
- &genX(blend_constant_color),
-
- &brw_depthbuffer,
-
- &genX(polygon_stipple),
- &genX(polygon_stipple_offset),
-
- &genX(line_stipple),
-
- &brw_psp_urb_cbs,
-
- &genX(drawing_rect),
- &brw_indices, /* must come before brw_vertices */
- &genX(index_buffer),
- &genX(vertices),
-
- &brw_constant_buffer
- };
-#elif GFX_VER == 6
- static const struct brw_tracked_state *render_atoms[] =
- {
- &genX(vf_statistics),
-
- &genX(sf_clip_viewport),
-
- /* Command packets: */
-
- &genX(cc_vp),
-
- &gfx6_urb,
- &genX(blend_state), /* must do before cc unit */
- &genX(color_calc_state), /* must do before cc unit */
- &genX(depth_stencil_state), /* must do before cc unit */
-
- &genX(vs_push_constants), /* Before vs_state */
- &genX(gs_push_constants), /* Before gs_state */
- &genX(wm_push_constants), /* Before wm_state */
-
- /* Surface state setup. Must come before the VS/WM unit. The binding
- * table upload must be last.
- */
- &brw_vs_pull_constants,
- &brw_vs_ubo_surfaces,
- &brw_gs_pull_constants,
- &brw_gs_ubo_surfaces,
- &brw_wm_pull_constants,
- &brw_wm_ubo_surfaces,
- &gfx6_renderbuffer_surfaces,
- &brw_renderbuffer_read_surfaces,
- &brw_texture_surfaces,
- &gfx6_sol_surface,
- &brw_vs_binding_table,
- &gfx6_gs_binding_table,
- &brw_wm_binding_table,
-
- &genX(fs_samplers),
- &genX(vs_samplers),
- &genX(gs_samplers),
- &gfx6_sampler_state,
- &genX(multisample_state),
-
- &genX(vs_state),
- &genX(gs_state),
- &genX(clip_state),
- &genX(sf_state),
- &genX(wm_state),
-
- &genX(scissor_state),
-
- &gfx6_binding_table_pointers,
-
- &brw_depthbuffer,
-
- &genX(polygon_stipple),
- &genX(polygon_stipple_offset),
-
- &genX(line_stipple),
-
- &genX(drawing_rect),
-
- &brw_indices, /* must come before brw_vertices */
- &genX(index_buffer),
- &genX(vertices),
- };
-#elif GFX_VER == 7
- static const struct brw_tracked_state *render_atoms[] =
- {
- &genX(vf_statistics),
-
- /* Command packets: */
-
- &genX(cc_vp),
- &genX(sf_clip_viewport),
-
- &gfx7_l3_state,
- &gfx7_push_constant_space,
- &gfx7_urb,
-#if GFX_VERx10 == 75
- &genX(cc_and_blend_state),
-#else
- &genX(blend_state), /* must do before cc unit */
- &genX(color_calc_state), /* must do before cc unit */
-#endif
- &genX(depth_stencil_state), /* must do before cc unit */
-
- &brw_vs_image_surfaces, /* Before vs push/pull constants and binding table */
- &brw_tcs_image_surfaces, /* Before tcs push/pull constants and binding table */
- &brw_tes_image_surfaces, /* Before tes push/pull constants and binding table */
- &brw_gs_image_surfaces, /* Before gs push/pull constants and binding table */
- &brw_wm_image_surfaces, /* Before wm push/pull constants and binding table */
-
- &genX(vs_push_constants), /* Before vs_state */
- &genX(tcs_push_constants),
- &genX(tes_push_constants),
- &genX(gs_push_constants), /* Before gs_state */
- &genX(wm_push_constants), /* Before wm_surfaces and constant_buffer */
-
- /* Surface state setup. Must come before the VS/WM unit. The binding
- * table upload must be last.
- */
- &brw_vs_pull_constants,
- &brw_vs_ubo_surfaces,
- &brw_tcs_pull_constants,
- &brw_tcs_ubo_surfaces,
- &brw_tes_pull_constants,
- &brw_tes_ubo_surfaces,
- &brw_gs_pull_constants,
- &brw_gs_ubo_surfaces,
- &brw_wm_pull_constants,
- &brw_wm_ubo_surfaces,
- &gfx6_renderbuffer_surfaces,
- &brw_renderbuffer_read_surfaces,
- &brw_texture_surfaces,
-
- &genX(push_constant_packets),
-
- &brw_vs_binding_table,
- &brw_tcs_binding_table,
- &brw_tes_binding_table,
- &brw_gs_binding_table,
- &brw_wm_binding_table,
-
- &genX(fs_samplers),
- &genX(vs_samplers),
- &genX(tcs_samplers),
- &genX(tes_samplers),
- &genX(gs_samplers),
- &genX(multisample_state),
-
- &genX(vs_state),
- &genX(hs_state),
- &genX(te_state),
- &genX(ds_state),
- &genX(gs_state),
- &genX(sol_state),
- &genX(clip_state),
- &genX(sbe_state),
- &genX(sf_state),
- &genX(wm_state),
- &genX(ps_state),
-
- &genX(scissor_state),
-
- &brw_depthbuffer,
-
- &genX(polygon_stipple),
- &genX(polygon_stipple_offset),
-
- &genX(line_stipple),
-
- &genX(drawing_rect),
-
- &brw_indices, /* must come before brw_vertices */
- &genX(index_buffer),
- &genX(vertices),
-
-#if GFX_VERx10 == 75
- &genX(cut_index),
-#endif
- };
-#elif GFX_VER >= 8
- static const struct brw_tracked_state *render_atoms[] =
- {
- &genX(vf_statistics),
-
- &genX(cc_vp),
- &genX(sf_clip_viewport),
-
- &gfx7_l3_state,
- &gfx7_push_constant_space,
- &gfx7_urb,
- &genX(blend_state),
- &genX(color_calc_state),
-
- &brw_vs_image_surfaces, /* Before vs push/pull constants and binding table */
- &brw_tcs_image_surfaces, /* Before tcs push/pull constants and binding table */
- &brw_tes_image_surfaces, /* Before tes push/pull constants and binding table */
- &brw_gs_image_surfaces, /* Before gs push/pull constants and binding table */
- &brw_wm_image_surfaces, /* Before wm push/pull constants and binding table */
-
- &genX(vs_push_constants), /* Before vs_state */
- &genX(tcs_push_constants),
- &genX(tes_push_constants),
- &genX(gs_push_constants), /* Before gs_state */
- &genX(wm_push_constants), /* Before wm_surfaces and constant_buffer */
-
- /* Surface state setup. Must come before the VS/WM unit. The binding
- * table upload must be last.
- */
- &brw_vs_pull_constants,
- &brw_vs_ubo_surfaces,
- &brw_tcs_pull_constants,
- &brw_tcs_ubo_surfaces,
- &brw_tes_pull_constants,
- &brw_tes_ubo_surfaces,
- &brw_gs_pull_constants,
- &brw_gs_ubo_surfaces,
- &brw_wm_pull_constants,
- &brw_wm_ubo_surfaces,
- &gfx6_renderbuffer_surfaces,
- &brw_renderbuffer_read_surfaces,
- &brw_texture_surfaces,
-
- &genX(push_constant_packets),
-
- &brw_vs_binding_table,
- &brw_tcs_binding_table,
- &brw_tes_binding_table,
- &brw_gs_binding_table,
- &brw_wm_binding_table,
-
- &genX(fs_samplers),
- &genX(vs_samplers),
- &genX(tcs_samplers),
- &genX(tes_samplers),
- &genX(gs_samplers),
- &genX(multisample_state),
-
- &genX(vs_state),
- &genX(hs_state),
- &genX(te_state),
- &genX(ds_state),
- &genX(gs_state),
- &genX(sol_state),
- &genX(clip_state),
- &genX(raster_state),
- &genX(sbe_state),
- &genX(sf_state),
- &genX(ps_blend),
- &genX(ps_extra),
- &genX(ps_state),
- &genX(depth_stencil_state),
- &genX(wm_state),
-
- &genX(scissor_state),
-
- &brw_depthbuffer,
-
- &genX(polygon_stipple),
- &genX(polygon_stipple_offset),
-
- &genX(line_stipple),
-
- &genX(drawing_rect),
-
- &genX(vf_topology),
-
- &brw_indices,
- &genX(index_buffer),
- &genX(vertices),
-
- &genX(cut_index),
- &gfx8_pma_fix,
- };
-#endif
-
- STATIC_ASSERT(ARRAY_SIZE(render_atoms) <= ARRAY_SIZE(brw->render_atoms));
- brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
- render_atoms, ARRAY_SIZE(render_atoms));
-
-#if GFX_VER >= 7
- static const struct brw_tracked_state *compute_atoms[] =
- {
- &gfx7_l3_state,
- &brw_cs_image_surfaces,
- &genX(cs_push_constants),
- &genX(cs_pull_constants),
- &brw_cs_ubo_surfaces,
- &brw_cs_texture_surfaces,
- &brw_cs_work_groups_surface,
- &genX(cs_samplers),
- &genX(cs_state),
- };
-
- STATIC_ASSERT(ARRAY_SIZE(compute_atoms) <= ARRAY_SIZE(brw->compute_atoms));
- brw_copy_pipeline_atoms(brw, BRW_COMPUTE_PIPELINE,
- compute_atoms, ARRAY_SIZE(compute_atoms));
-
- brw->vtbl.emit_mi_report_perf_count = genX(emit_mi_report_perf_count);
- brw->vtbl.emit_compute_walker = genX(emit_gpgpu_walker);
-#endif
-
- brw->vtbl.emit_state_base_address = genX(emit_state_base_address);
-
- assert(brw->screen->devinfo.verx10 == GFX_VERx10);
-}
+++ /dev/null
-/*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-static inline struct blorp_address
-dynamic_state_address(struct blorp_batch *batch, uint32_t offset)
-{
- assert(batch->blorp->driver_ctx == batch->driver_batch);
- struct brw_context *brw = batch->driver_batch;
-
- return (struct blorp_address) {
- .buffer = brw->batch.state.bo,
- .offset = offset,
- };
-}
-
-static inline struct blorp_address
-instruction_state_address(struct blorp_batch *batch, uint32_t offset)
-{
- assert(batch->blorp->driver_ctx == batch->driver_batch);
- struct brw_context *brw = batch->driver_batch;
-
- return (struct blorp_address) {
- .buffer = brw->cache.bo,
- .offset = offset,
- };
-}
-
-static struct blorp_address
-blorp_emit_vs_state(struct blorp_batch *batch)
-{
- assert(batch->blorp->driver_ctx == batch->driver_batch);
- struct brw_context *brw = batch->driver_batch;
-
- uint32_t offset;
- blorp_emit_dynamic(batch, GENX(VS_STATE), vs, 64, &offset) {
- vs.Enable = false;
- vs.URBEntryAllocationSize = brw->urb.vsize - 1;
-#if GFX_VER == 5
- vs.NumberofURBEntries = brw->urb.nr_vs_entries >> 2;
-#else
- vs.NumberofURBEntries = brw->urb.nr_vs_entries;
-#endif
- }
-
- return dynamic_state_address(batch, offset);
-}
-
-static struct blorp_address
-blorp_emit_sf_state(struct blorp_batch *batch,
- const struct blorp_params *params)
-{
- assert(batch->blorp->driver_ctx == batch->driver_batch);
- struct brw_context *brw = batch->driver_batch;
- const struct brw_sf_prog_data *prog_data = params->sf_prog_data;
-
- uint32_t offset;
- blorp_emit_dynamic(batch, GENX(SF_STATE), sf, 64, &offset) {
-#if GFX_VER == 4
- sf.KernelStartPointer =
- instruction_state_address(batch, params->sf_prog_kernel);
-#else
- sf.KernelStartPointer = params->sf_prog_kernel;
-#endif
- sf.GRFRegisterCount = DIV_ROUND_UP(prog_data->total_grf, 16) - 1;
- sf.VertexURBEntryReadLength = prog_data->urb_read_length;
- sf.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET;
- sf.DispatchGRFStartRegisterForURBData = 3;
-
- sf.URBEntryAllocationSize = brw->urb.sfsize - 1;
- sf.NumberofURBEntries = brw->urb.nr_sf_entries;
-
-#if GFX_VER == 5
- sf.MaximumNumberofThreads = MIN2(48, brw->urb.nr_sf_entries) - 1;
-#else
- sf.MaximumNumberofThreads = MIN2(24, brw->urb.nr_sf_entries) - 1;
-#endif
-
- sf.ViewportTransformEnable = false;
-
- sf.CullMode = CULLMODE_NONE;
- }
-
- return dynamic_state_address(batch, offset);
-}
-
-static struct blorp_address
-blorp_emit_wm_state(struct blorp_batch *batch,
- const struct blorp_params *params)
-{
- const struct brw_wm_prog_data *prog_data = params->wm_prog_data;
-
- uint32_t offset;
- blorp_emit_dynamic(batch, GENX(WM_STATE), wm, 64, &offset) {
- if (params->src.enabled) {
- /* Iron Lake can't do sampler prefetch */
- wm.SamplerCount = (GFX_VER != 5);
- wm.BindingTableEntryCount = 2;
- uint32_t sampler = blorp_emit_sampler_state(batch);
- wm.SamplerStatePointer = dynamic_state_address(batch, sampler);
- }
-
- if (prog_data) {
- wm.DispatchGRFStartRegisterForConstantSetupData0 =
- prog_data->base.dispatch_grf_start_reg;
- wm.SetupURBEntryReadLength = prog_data->num_varying_inputs * 2;
- wm.SetupURBEntryReadOffset = 0;
-
- wm.DepthCoefficientURBReadOffset = 1;
- wm.PixelShaderKillsPixel = prog_data->uses_kill;
- wm.ThreadDispatchEnable = true;
- wm.EarlyDepthTestEnable = true;
-
- wm._8PixelDispatchEnable = prog_data->dispatch_8;
- wm._16PixelDispatchEnable = prog_data->dispatch_16;
- wm._32PixelDispatchEnable = prog_data->dispatch_32;
-
-#if GFX_VER == 4
- wm.KernelStartPointer0 =
- instruction_state_address(batch, params->wm_prog_kernel);
- wm.GRFRegisterCount0 = brw_wm_prog_data_reg_blocks(prog_data, wm, 0);
-#else
- wm.KernelStartPointer0 = params->wm_prog_kernel +
- brw_wm_prog_data_prog_offset(prog_data, wm, 0);
- wm.KernelStartPointer1 = params->wm_prog_kernel +
- brw_wm_prog_data_prog_offset(prog_data, wm, 1);
- wm.KernelStartPointer2 = params->wm_prog_kernel +
- brw_wm_prog_data_prog_offset(prog_data, wm, 2);
- wm.GRFRegisterCount0 = brw_wm_prog_data_reg_blocks(prog_data, wm, 0);
- wm.GRFRegisterCount1 = brw_wm_prog_data_reg_blocks(prog_data, wm, 1);
- wm.GRFRegisterCount2 = brw_wm_prog_data_reg_blocks(prog_data, wm, 2);
-#endif
- }
-
- wm.MaximumNumberofThreads =
- batch->blorp->compiler->devinfo->max_wm_threads - 1;
- }
-
- return dynamic_state_address(batch, offset);
-}
-
-static struct blorp_address
-blorp_emit_color_calc_state(struct blorp_batch *batch)
-{
- uint32_t cc_viewport = blorp_emit_cc_viewport(batch);
-
- uint32_t offset;
- blorp_emit_dynamic(batch, GENX(COLOR_CALC_STATE), cc, 64, &offset) {
- cc.CCViewportStatePointer = dynamic_state_address(batch, cc_viewport);
- }
-
- return dynamic_state_address(batch, offset);
-}
-
-static void
-blorp_emit_pipeline(struct blorp_batch *batch,
- const struct blorp_params *params)
-{
- assert(batch->blorp->driver_ctx == batch->driver_batch);
- struct brw_context *brw = batch->driver_batch;
-
- emit_urb_config(batch, params, NULL);
-
- blorp_emit(batch, GENX(3DSTATE_PIPELINED_POINTERS), pp) {
- pp.PointertoVSState = blorp_emit_vs_state(batch);
- pp.GSEnable = false;
- pp.ClipEnable = false;
- pp.PointertoSFState = blorp_emit_sf_state(batch, params);
- pp.PointertoWMState = blorp_emit_wm_state(batch, params);
- pp.PointertoColorCalcState = blorp_emit_color_calc_state(batch);
- }
-
- brw_upload_urb_fence(brw);
-
- blorp_emit(batch, GENX(CS_URB_STATE), curb);
- blorp_emit(batch, GENX(CONSTANT_BUFFER), curb);
-}
+++ /dev/null
-/*
- * Copyright © 2009 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- * Eric Anholt <eric@anholt.net>
- *
- */
-
-#include "brw_context.h"
-#include "brw_state.h"
-#include "brw_defines.h"
-#include "compiler/brw_eu_defines.h"
-#include "brw_util.h"
-#include "brw_batch.h"
-#include "main/fbobject.h"
-#include "main/framebuffer.h"
-
-bool
-brw_is_drawing_points(const struct brw_context *brw)
-{
- /* Determine if the primitives *reaching the SF* are points */
- /* _NEW_POLYGON */
- if (brw->ctx.Polygon.FrontMode == GL_POINT ||
- brw->ctx.Polygon.BackMode == GL_POINT) {
- return true;
- }
-
- if (brw->gs.base.prog_data) {
- /* BRW_NEW_GS_PROG_DATA */
- return brw_gs_prog_data(brw->gs.base.prog_data)->output_topology ==
- _3DPRIM_POINTLIST;
- } else if (brw->tes.base.prog_data) {
- /* BRW_NEW_TES_PROG_DATA */
- return brw_tes_prog_data(brw->tes.base.prog_data)->output_topology ==
- BRW_TESS_OUTPUT_TOPOLOGY_POINT;
- } else {
- /* BRW_NEW_PRIMITIVE */
- return brw->primitive == _3DPRIM_POINTLIST;
- }
-}
-
-bool
-brw_is_drawing_lines(const struct brw_context *brw)
-{
- /* Determine if the primitives *reaching the SF* are points */
- /* _NEW_POLYGON */
- if (brw->ctx.Polygon.FrontMode == GL_LINE ||
- brw->ctx.Polygon.BackMode == GL_LINE) {
- return true;
- }
-
- if (brw->gs.base.prog_data) {
- /* BRW_NEW_GS_PROG_DATA */
- return brw_gs_prog_data(brw->gs.base.prog_data)->output_topology ==
- _3DPRIM_LINESTRIP;
- } else if (brw->tes.base.prog_data) {
- /* BRW_NEW_TES_PROG_DATA */
- return brw_tes_prog_data(brw->tes.base.prog_data)->output_topology ==
- BRW_TESS_OUTPUT_TOPOLOGY_LINE;
- } else {
- /* BRW_NEW_PRIMITIVE */
- switch (brw->primitive) {
- case _3DPRIM_LINELIST:
- case _3DPRIM_LINESTRIP:
- case _3DPRIM_LINELOOP:
- return true;
- }
- }
- return false;
-}
+++ /dev/null
-/*
- * Copyright © 2015 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "brw_context.h"
-#include "brw_cs.h"
-#include "brw_state.h"
-#include "brw_defines.h"
-#include "brw_program.h"
-#include "brw_batch.h"
-#include "brw_buffer_objects.h"
-#include "program/prog_parameter.h"
-#include "main/shaderapi.h"
-
-static uint32_t
-f_as_u32(float f)
-{
- union fi fi = { .f = f };
- return fi.ui;
-}
-
-static uint32_t
-brw_param_value(struct brw_context *brw,
- const struct gl_program *prog,
- const struct brw_stage_state *stage_state,
- uint32_t param)
-{
- struct gl_context *ctx = &brw->ctx;
-
- switch (BRW_PARAM_DOMAIN(param)) {
- case BRW_PARAM_DOMAIN_BUILTIN:
- if (param == BRW_PARAM_BUILTIN_ZERO) {
- return 0;
- } else if (BRW_PARAM_BUILTIN_IS_CLIP_PLANE(param)) {
- gl_clip_plane *clip_planes = brw_select_clip_planes(ctx);
- unsigned idx = BRW_PARAM_BUILTIN_CLIP_PLANE_IDX(param);
- unsigned comp = BRW_PARAM_BUILTIN_CLIP_PLANE_COMP(param);
- return ((uint32_t *)clip_planes[idx])[comp];
- } else if (param >= BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X &&
- param <= BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_W) {
- unsigned i = param - BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X;
- return f_as_u32(ctx->TessCtrlProgram.patch_default_outer_level[i]);
- } else if (param == BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X) {
- return f_as_u32(ctx->TessCtrlProgram.patch_default_inner_level[0]);
- } else if (param == BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_Y) {
- return f_as_u32(ctx->TessCtrlProgram.patch_default_inner_level[1]);
- } else if (param >= BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_X &&
- param <= BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_Z) {
- unsigned i = param - BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_X;
- return brw->compute.group_size[i];
- } else {
- unreachable("Invalid param builtin");
- }
-
- case BRW_PARAM_DOMAIN_PARAMETER: {
- unsigned idx = BRW_PARAM_PARAMETER_IDX(param);
- unsigned offset = prog->Parameters->Parameters[idx].ValueOffset;
- unsigned comp = BRW_PARAM_PARAMETER_COMP(param);
- assert(idx < prog->Parameters->NumParameters);
- return prog->Parameters->ParameterValues[offset + comp].u;
- }
-
- case BRW_PARAM_DOMAIN_UNIFORM: {
- unsigned idx = BRW_PARAM_UNIFORM_IDX(param);
- assert(idx < prog->sh.data->NumUniformDataSlots);
- return prog->sh.data->UniformDataSlots[idx].u;
- }
-
- case BRW_PARAM_DOMAIN_IMAGE: {
- unsigned idx = BRW_PARAM_IMAGE_IDX(param);
- unsigned offset = BRW_PARAM_IMAGE_OFFSET(param);
- assert(offset < ARRAY_SIZE(stage_state->image_param));
- return ((uint32_t *)&stage_state->image_param[idx])[offset];
- }
-
- default:
- unreachable("Invalid param domain");
- }
-}
-
-
-void
-brw_populate_constant_data(struct brw_context *brw,
- const struct gl_program *prog,
- const struct brw_stage_state *stage_state,
- void *void_dst,
- const uint32_t *param,
- unsigned nr_params)
-{
- uint32_t *dst = void_dst;
- for (unsigned i = 0; i < nr_params; i++)
- dst[i] = brw_param_value(brw, prog, stage_state, param[i]);
-}
-
-
-/**
- * Creates a streamed BO containing the push constants for the VS or GS on
- * gfx6+.
- *
- * Push constants are constant values (such as GLSL uniforms) that are
- * pre-loaded into a shader stage's register space at thread spawn time.
- *
- * Not all GLSL uniforms will be uploaded as push constants: The hardware has
- * a limitation of 32 or 64 EU registers (256 or 512 floats) per stage to be
- * uploaded as push constants, while GL 4.4 requires at least 1024 components
- * to be usable for the VS. Plus, currently we always use pull constants
- * instead of push constants when doing variable-index array access.
- *
- * See brw_curbe.c for the equivalent gfx4/5 code.
- */
-void
-gfx6_upload_push_constants(struct brw_context *brw,
- const struct gl_program *prog,
- const struct brw_stage_prog_data *prog_data,
- struct brw_stage_state *stage_state)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- struct gl_context *ctx = &brw->ctx;
-
- bool active = prog_data &&
- (stage_state->stage != MESA_SHADER_TESS_CTRL ||
- brw->programs[MESA_SHADER_TESS_EVAL]);
-
- if (active)
- _mesa_shader_write_subroutine_indices(ctx, stage_state->stage);
-
- if (!active || prog_data->nr_params == 0) {
- stage_state->push_const_size = 0;
- } else {
- /* Updates the ParamaterValues[i] pointers for all parameters of the
- * basic type of PROGRAM_STATE_VAR.
- */
- /* XXX: Should this happen somewhere before to get our state flag set? */
- if (prog)
- _mesa_load_state_parameters(ctx, prog->Parameters);
-
- int i;
- const int size = prog_data->nr_params * sizeof(gl_constant_value);
- gl_constant_value *param;
- if (devinfo->verx10 >= 75) {
- param = brw_upload_space(&brw->upload, size, 32,
- &stage_state->push_const_bo,
- &stage_state->push_const_offset);
- } else {
- param = brw_state_batch(brw, size, 32,
- &stage_state->push_const_offset);
- }
-
- STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float));
-
- /* _NEW_PROGRAM_CONSTANTS
- *
- * Also _NEW_TRANSFORM -- we may reference clip planes other than as a
- * side effect of dereferencing uniforms, so _NEW_PROGRAM_CONSTANTS
- * wouldn't be set for them.
- */
- brw_populate_constant_data(brw, prog, stage_state, param,
- prog_data->param,
- prog_data->nr_params);
-
- if (0) {
- fprintf(stderr, "%s constants:\n",
- _mesa_shader_stage_to_string(stage_state->stage));
- for (i = 0; i < prog_data->nr_params; i++) {
- if ((i & 7) == 0)
- fprintf(stderr, "g%d: ",
- prog_data->dispatch_grf_start_reg + i / 8);
- fprintf(stderr, "%8f ", param[i].f);
- if ((i & 7) == 7)
- fprintf(stderr, "\n");
- }
- if ((i & 7) != 0)
- fprintf(stderr, "\n");
- fprintf(stderr, "\n");
- }
-
- stage_state->push_const_size = ALIGN(prog_data->nr_params, 8) / 8;
- /* We can only push 32 registers of constants at a time. */
-
- /* From the SNB PRM (vol2, part 1, section 3.2.1.4: 3DSTATE_CONSTANT_VS:
- *
- * "The sum of all four read length fields (each incremented to
- * represent the actual read length) must be less than or equal to
- * 32"
- *
- * From the IVB PRM (vol2, part 1, section 3.2.1.3: 3DSTATE_CONSTANT_VS:
- *
- * "The sum of all four read length fields must be less than or
- * equal to the size of 64"
- *
- * The other shader stages all match the VS's limits.
- */
- assert(stage_state->push_const_size <= 32);
- }
-
- stage_state->push_constants_dirty = true;
-}
-
-
-/**
- * Creates a temporary BO containing the pull constant data for the shader
- * stage, and the SURFACE_STATE struct that points at it.
- *
- * Pull constants are GLSL uniforms (and other constant data) beyond what we
- * could fit as push constants, or that have variable-index array access
- * (which is easiest to support using pull constants, and avoids filling
- * register space with mostly-unused data).
- *
- * Compare this path to brw_curbe.c for gfx4/5 push constants, and
- * gfx6_vs_state.c for gfx6+ push constants.
- */
-void
-brw_upload_pull_constants(struct brw_context *brw,
- GLbitfield64 brw_new_constbuf,
- const struct gl_program *prog,
- struct brw_stage_state *stage_state,
- const struct brw_stage_prog_data *prog_data)
-{
- unsigned i;
- uint32_t surf_index = prog_data->binding_table.pull_constants_start;
-
- if (!prog_data->nr_pull_params) {
- if (stage_state->surf_offset[surf_index]) {
- stage_state->surf_offset[surf_index] = 0;
- brw->ctx.NewDriverState |= brw_new_constbuf;
- }
- return;
- }
-
- /* Updates the ParamaterValues[i] pointers for all parameters of the
- * basic type of PROGRAM_STATE_VAR.
- */
- _mesa_load_state_parameters(&brw->ctx, prog->Parameters);
-
- /* BRW_NEW_*_PROG_DATA | _NEW_PROGRAM_CONSTANTS */
- uint32_t size = prog_data->nr_pull_params * 4;
- struct brw_bo *const_bo = NULL;
- uint32_t const_offset;
- gl_constant_value *constants = brw_upload_space(&brw->upload, size, 64,
- &const_bo, &const_offset);
-
- STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float));
-
- brw_populate_constant_data(brw, prog, stage_state, constants,
- prog_data->pull_param,
- prog_data->nr_pull_params);
-
- if (0) {
- for (i = 0; i < ALIGN(prog_data->nr_pull_params, 4) / 4; i++) {
- const gl_constant_value *row = &constants[i * 4];
- fprintf(stderr, "const surface %3d: %4.3f %4.3f %4.3f %4.3f\n",
- i, row[0].f, row[1].f, row[2].f, row[3].f);
- }
- }
-
- brw_emit_buffer_surface_state(brw, &stage_state->surf_offset[surf_index],
- const_bo, const_offset,
- ISL_FORMAT_R32G32B32A32_FLOAT,
- size, 1, 0);
-
- brw_bo_unreference(const_bo);
-
- brw->ctx.NewDriverState |= brw_new_constbuf;
-}
-
-/**
- * Creates a region containing the push constants for the CS on gfx7+.
- *
- * Push constants are constant values (such as GLSL uniforms) that are
- * pre-loaded into a shader stage's register space at thread spawn time.
- *
- * For other stages, see brw_curbe.c:brw_upload_constant_buffer for the
- * equivalent gfx4/5 code and gfx6_vs_state.c:gfx6_upload_push_constants for
- * gfx6+.
- */
-void
-brw_upload_cs_push_constants(struct brw_context *brw,
- const struct gl_program *prog,
- const struct brw_cs_prog_data *cs_prog_data,
- struct brw_stage_state *stage_state)
-{
- struct gl_context *ctx = &brw->ctx;
- const struct brw_stage_prog_data *prog_data =
- (struct brw_stage_prog_data*) cs_prog_data;
-
- /* Updates the ParamaterValues[i] pointers for all parameters of the
- * basic type of PROGRAM_STATE_VAR.
- */
- /* XXX: Should this happen somewhere before to get our state flag set? */
- _mesa_load_state_parameters(ctx, prog->Parameters);
-
- const struct brw_cs_dispatch_info dispatch =
- brw_cs_get_dispatch_info(&brw->screen->devinfo, cs_prog_data,
- brw->compute.group_size);
- const unsigned push_const_size =
- brw_cs_push_const_total_size(cs_prog_data, dispatch.threads);
-
- if (push_const_size == 0) {
- stage_state->push_const_size = 0;
- return;
- }
-
-
- uint32_t *param =
- brw_state_batch(brw, ALIGN(push_const_size, 64),
- 64, &stage_state->push_const_offset);
- assert(param);
-
- STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float));
-
- if (cs_prog_data->push.cross_thread.size > 0) {
- uint32_t *param_copy = param;
- for (unsigned i = 0;
- i < cs_prog_data->push.cross_thread.dwords;
- i++) {
- assert(prog_data->param[i] != BRW_PARAM_BUILTIN_SUBGROUP_ID);
- param_copy[i] = brw_param_value(brw, prog, stage_state,
- prog_data->param[i]);
- }
- }
-
- if (cs_prog_data->push.per_thread.size > 0) {
- for (unsigned t = 0; t < dispatch.threads; t++) {
- unsigned dst =
- 8 * (cs_prog_data->push.per_thread.regs * t +
- cs_prog_data->push.cross_thread.regs);
- unsigned src = cs_prog_data->push.cross_thread.dwords;
- for ( ; src < prog_data->nr_params; src++, dst++) {
- if (prog_data->param[src] == BRW_PARAM_BUILTIN_SUBGROUP_ID) {
- param[dst] = t;
- } else {
- param[dst] = brw_param_value(brw, prog, stage_state,
- prog_data->param[src]);
- }
- }
- }
- }
-
- stage_state->push_const_size =
- cs_prog_data->push.cross_thread.regs +
- cs_prog_data->push.per_thread.regs;
-}
+++ /dev/null
-/*
- * Copyright © 2012 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "brw_batch.h"
-
-#include "brw_context.h"
-#include "brw_defines.h"
-#include "brw_multisample_state.h"
-#include "main/framebuffer.h"
-
-void
-gfx6_get_sample_position(struct gl_context *ctx,
- struct gl_framebuffer *fb,
- GLuint index, GLfloat *result)
-{
- uint8_t bits;
-
- switch (_mesa_geometric_samples(fb)) {
- case 1:
- result[0] = result[1] = 0.5f;
- return;
- case 2:
- bits = brw_multisample_positions_1x_2x >> (8 * index);
- break;
- case 4:
- bits = brw_multisample_positions_4x >> (8 * index);
- break;
- case 8:
- bits = brw_multisample_positions_8x[index >> 2] >> (8 * (index & 3));
- break;
- case 16:
- bits = brw_multisample_positions_16x[index >> 2] >> (8 * (index & 3));
- break;
- default:
- unreachable("Not implemented");
- }
-
- /* Convert from U0.4 back to a floating point coordinate. */
- result[0] = ((bits >> 4) & 0xf) / 16.0f;
- result[1] = (bits & 0xf) / 16.0f;
-}
+++ /dev/null
-/*
- * Copyright © 2008 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- * Eric Anholt <eric@anholt.net>
- * Kenneth Graunke <kenneth@whitecape.org>
- */
-
-/** @file gfx6_queryobj.c
- *
- * Support for query objects (GL_ARB_occlusion_query, GL_ARB_timer_query,
- * GL_EXT_transform_feedback, and friends) on platforms that support
- * hardware contexts (Gfx6+).
- */
-#include "brw_context.h"
-#include "brw_defines.h"
-#include "brw_state.h"
-#include "perf/intel_perf_regs.h"
-#include "brw_batch.h"
-#include "brw_buffer_objects.h"
-
-static inline void
-set_query_availability(struct brw_context *brw, struct brw_query_object *query,
- bool available)
-{
- /* For platforms that support ARB_query_buffer_object, we write the
- * query availability for "pipelined" queries.
- *
- * Most counter snapshots are written by the command streamer, by
- * doing a CS stall and then MI_STORE_REGISTER_MEM. For these
- * counters, the CS stall guarantees that the results will be
- * available when subsequent CS commands run. So we don't need to
- * do any additional tracking.
- *
- * Other counters (occlusion queries and timestamp) are written by
- * PIPE_CONTROL, without a CS stall. This means that we can't be
- * sure whether the writes have landed yet or not. Performing a
- * PIPE_CONTROL with an immediate write will synchronize with
- * those earlier writes, so we write 1 when the value has landed.
- */
- if (brw->ctx.Extensions.ARB_query_buffer_object &&
- brw_is_query_pipelined(query)) {
- unsigned flags = PIPE_CONTROL_WRITE_IMMEDIATE;
-
- if (available) {
- /* Order available *after* the query results. */
- flags |= PIPE_CONTROL_FLUSH_ENABLE;
- } else {
- /* Make it unavailable *before* any pipelined reads. */
- flags |= PIPE_CONTROL_CS_STALL;
- }
-
- brw_emit_pipe_control_write(brw, flags,
- query->bo, 2 * sizeof(uint64_t),
- available);
- }
-}
-
-static void
-write_primitives_generated(struct brw_context *brw,
- struct brw_bo *query_bo, int stream, int idx)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- brw_emit_mi_flush(brw);
-
- if (devinfo->ver >= 7 && stream > 0) {
- brw_store_register_mem64(brw, query_bo,
- GFX7_SO_PRIM_STORAGE_NEEDED(stream),
- idx * sizeof(uint64_t));
- } else {
- brw_store_register_mem64(brw, query_bo, CL_INVOCATION_COUNT,
- idx * sizeof(uint64_t));
- }
-}
-
-static void
-write_xfb_primitives_written(struct brw_context *brw,
- struct brw_bo *bo, int stream, int idx)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- brw_emit_mi_flush(brw);
-
- if (devinfo->ver >= 7) {
- brw_store_register_mem64(brw, bo, GFX7_SO_NUM_PRIMS_WRITTEN(stream),
- idx * sizeof(uint64_t));
- } else {
- brw_store_register_mem64(brw, bo, GFX6_SO_NUM_PRIMS_WRITTEN,
- idx * sizeof(uint64_t));
- }
-}
-
-static void
-write_xfb_overflow_streams(struct gl_context *ctx,
- struct brw_bo *bo, int stream, int count,
- int idx)
-{
- struct brw_context *brw = brw_context(ctx);
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- brw_emit_mi_flush(brw);
-
- for (int i = 0; i < count; i++) {
- int w_idx = 4 * i + idx;
- int g_idx = 4 * i + idx + 2;
-
- if (devinfo->ver >= 7) {
- brw_store_register_mem64(brw, bo,
- GFX7_SO_NUM_PRIMS_WRITTEN(stream + i),
- g_idx * sizeof(uint64_t));
- brw_store_register_mem64(brw, bo,
- GFX7_SO_PRIM_STORAGE_NEEDED(stream + i),
- w_idx * sizeof(uint64_t));
- } else {
- brw_store_register_mem64(brw, bo,
- GFX6_SO_NUM_PRIMS_WRITTEN,
- g_idx * sizeof(uint64_t));
- brw_store_register_mem64(brw, bo,
- GFX6_SO_PRIM_STORAGE_NEEDED,
- w_idx * sizeof(uint64_t));
- }
- }
-}
-
-static bool
-check_xfb_overflow_streams(uint64_t *results, int count)
-{
- bool overflow = false;
-
- for (int i = 0; i < count; i++) {
- uint64_t *result_i = &results[4 * i];
-
- if ((result_i[3] - result_i[2]) != (result_i[1] - result_i[0])) {
- overflow = true;
- break;
- }
- }
-
- return overflow;
-}
-
-static inline int
-pipeline_target_to_index(int target)
-{
- if (target == GL_GEOMETRY_SHADER_INVOCATIONS)
- return MAX_PIPELINE_STATISTICS - 1;
- else
- return target - GL_VERTICES_SUBMITTED_ARB;
-}
-
-static void
-emit_pipeline_stat(struct brw_context *brw, struct brw_bo *bo,
- int stream, int target, int idx)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- /* One source of confusion is the tessellation shader statistics. The
- * hardware has no statistics specific to the TE unit. Ideally we could have
- * the HS primitives for TESS_CONTROL_SHADER_PATCHES_ARB, and the DS
- * invocations as the register for TESS_CONTROL_SHADER_PATCHES_ARB.
- * Unfortunately we don't have HS primitives, we only have HS invocations.
- */
-
- /* Everything except GEOMETRY_SHADER_INVOCATIONS can be kept in a simple
- * lookup table
- */
- static const uint32_t target_to_register[] = {
- IA_VERTICES_COUNT, /* VERTICES_SUBMITTED */
- IA_PRIMITIVES_COUNT, /* PRIMITIVES_SUBMITTED */
- VS_INVOCATION_COUNT, /* VERTEX_SHADER_INVOCATIONS */
- HS_INVOCATION_COUNT, /* TESS_CONTROL_SHADER_PATCHES */
- DS_INVOCATION_COUNT, /* TESS_EVALUATION_SHADER_INVOCATIONS */
- GS_PRIMITIVES_COUNT, /* GEOMETRY_SHADER_PRIMITIVES_EMITTED */
- PS_INVOCATION_COUNT, /* FRAGMENT_SHADER_INVOCATIONS */
- CS_INVOCATION_COUNT, /* COMPUTE_SHADER_INVOCATIONS */
- CL_INVOCATION_COUNT, /* CLIPPING_INPUT_PRIMITIVES */
- CL_PRIMITIVES_COUNT, /* CLIPPING_OUTPUT_PRIMITIVES */
- GS_INVOCATION_COUNT /* This one is special... */
- };
- STATIC_ASSERT(ARRAY_SIZE(target_to_register) == MAX_PIPELINE_STATISTICS);
- uint32_t reg = target_to_register[pipeline_target_to_index(target)];
- /* Gfx6 GS code counts full primitives, that is, it won't count individual
- * triangles in a triangle strip. Use CL_INVOCATION_COUNT for that.
- */
- if (devinfo->ver == 6 && target == GL_GEOMETRY_SHADER_PRIMITIVES_EMITTED_ARB)
- reg = CL_INVOCATION_COUNT;
- assert(reg != 0);
-
- /* Emit a flush to make sure various parts of the pipeline are complete and
- * we get an accurate value
- */
- brw_emit_mi_flush(brw);
-
- brw_store_register_mem64(brw, bo, reg, idx * sizeof(uint64_t));
-}
-
-
-/**
- * Wait on the query object's BO and calculate the final result.
- */
-static void
-gfx6_queryobj_get_results(struct gl_context *ctx,
- struct brw_query_object *query)
-{
- struct brw_context *brw = brw_context(ctx);
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- if (query->bo == NULL)
- return;
-
- uint64_t *results = brw_bo_map(brw, query->bo, MAP_READ);
- switch (query->Base.Target) {
- case GL_TIME_ELAPSED:
- /* The query BO contains the starting and ending timestamps.
- * Subtract the two and convert to nanoseconds.
- */
- query->Base.Result = brw_raw_timestamp_delta(brw, results[0], results[1]);
- query->Base.Result = intel_device_info_timebase_scale(devinfo, query->Base.Result);
- break;
-
- case GL_TIMESTAMP:
- /* The query BO contains a single timestamp value in results[0]. */
- query->Base.Result = intel_device_info_timebase_scale(devinfo, results[0]);
-
- /* Ensure the scaled timestamp overflows according to
- * GL_QUERY_COUNTER_BITS
- */
- query->Base.Result &= (1ull << ctx->Const.QueryCounterBits.Timestamp) - 1;
- break;
-
- case GL_SAMPLES_PASSED_ARB:
- /* We need to use += rather than = here since some BLT-based operations
- * may have added additional samples to our occlusion query value.
- */
- query->Base.Result += results[1] - results[0];
- break;
-
- case GL_ANY_SAMPLES_PASSED:
- case GL_ANY_SAMPLES_PASSED_CONSERVATIVE:
- if (results[0] != results[1])
- query->Base.Result = true;
- break;
-
- case GL_PRIMITIVES_GENERATED:
- case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
- case GL_VERTICES_SUBMITTED_ARB:
- case GL_PRIMITIVES_SUBMITTED_ARB:
- case GL_VERTEX_SHADER_INVOCATIONS_ARB:
- case GL_GEOMETRY_SHADER_INVOCATIONS:
- case GL_GEOMETRY_SHADER_PRIMITIVES_EMITTED_ARB:
- case GL_CLIPPING_INPUT_PRIMITIVES_ARB:
- case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB:
- case GL_COMPUTE_SHADER_INVOCATIONS_ARB:
- case GL_TESS_CONTROL_SHADER_PATCHES_ARB:
- case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB:
- query->Base.Result = results[1] - results[0];
- break;
-
- case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB:
- query->Base.Result = check_xfb_overflow_streams(results, 1);
- break;
-
- case GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB:
- query->Base.Result = check_xfb_overflow_streams(results, MAX_VERTEX_STREAMS);
- break;
-
- case GL_FRAGMENT_SHADER_INVOCATIONS_ARB:
- query->Base.Result = (results[1] - results[0]);
- /* Implement the "WaDividePSInvocationCountBy4:HSW,BDW" workaround:
- * "Invocation counter is 4 times actual. WA: SW to divide HW reported
- * PS Invocations value by 4."
- *
- * Prior to Haswell, invocation count was counted by the WM, and it
- * buggily counted invocations in units of subspans (2x2 unit). To get the
- * correct value, the CS multiplied this by 4. With HSW the logic moved,
- * and correctly emitted the number of pixel shader invocations, but,
- * whomever forgot to undo the multiply by 4.
- */
- if (devinfo->ver == 8 || devinfo->verx10 == 75)
- query->Base.Result /= 4;
- break;
-
- default:
- unreachable("Unrecognized query target in brw_queryobj_get_results()");
- }
- brw_bo_unmap(query->bo);
-
- /* Now that we've processed the data stored in the query's buffer object,
- * we can release it.
- */
- brw_bo_unreference(query->bo);
- query->bo = NULL;
-
- query->Base.Ready = true;
-}
-
-/**
- * Driver hook for glBeginQuery().
- *
- * Initializes driver structures and emits any GPU commands required to begin
- * recording data for the query.
- */
-static void
-gfx6_begin_query(struct gl_context *ctx, struct gl_query_object *q)
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_query_object *query = (struct brw_query_object *)q;
-
- /* Since we're starting a new query, we need to throw away old results. */
- brw_bo_unreference(query->bo);
- query->bo =
- brw_bo_alloc(brw->bufmgr, "query results", 4096, BRW_MEMZONE_OTHER);
-
- /* For ARB_query_buffer_object: The result is not available */
- set_query_availability(brw, query, false);
-
- switch (query->Base.Target) {
- case GL_TIME_ELAPSED:
- /* For timestamp queries, we record the starting time right away so that
- * we measure the full time between BeginQuery and EndQuery. There's
- * some debate about whether this is the right thing to do. Our decision
- * is based on the following text from the ARB_timer_query extension:
- *
- * "(5) Should the extension measure total time elapsed between the full
- * completion of the BeginQuery and EndQuery commands, or just time
- * spent in the graphics library?
- *
- * RESOLVED: This extension will measure the total time elapsed
- * between the full completion of these commands. Future extensions
- * may implement a query to determine time elapsed at different stages
- * of the graphics pipeline."
- *
- * We write a starting timestamp now (at index 0). At EndQuery() time,
- * we'll write a second timestamp (at index 1), and subtract the two to
- * obtain the time elapsed. Notably, this includes time elapsed while
- * the system was doing other work, such as running other applications.
- */
- brw_write_timestamp(brw, query->bo, 0);
- break;
-
- case GL_ANY_SAMPLES_PASSED:
- case GL_ANY_SAMPLES_PASSED_CONSERVATIVE:
- case GL_SAMPLES_PASSED_ARB:
- brw_write_depth_count(brw, query->bo, 0);
- break;
-
- case GL_PRIMITIVES_GENERATED:
- write_primitives_generated(brw, query->bo, query->Base.Stream, 0);
- if (query->Base.Stream == 0)
- ctx->NewDriverState |= BRW_NEW_RASTERIZER_DISCARD;
- break;
-
- case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
- write_xfb_primitives_written(brw, query->bo, query->Base.Stream, 0);
- break;
-
- case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB:
- write_xfb_overflow_streams(ctx, query->bo, query->Base.Stream, 1, 0);
- break;
-
- case GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB:
- write_xfb_overflow_streams(ctx, query->bo, 0, MAX_VERTEX_STREAMS, 0);
- break;
-
- case GL_VERTICES_SUBMITTED_ARB:
- case GL_PRIMITIVES_SUBMITTED_ARB:
- case GL_VERTEX_SHADER_INVOCATIONS_ARB:
- case GL_GEOMETRY_SHADER_INVOCATIONS:
- case GL_GEOMETRY_SHADER_PRIMITIVES_EMITTED_ARB:
- case GL_FRAGMENT_SHADER_INVOCATIONS_ARB:
- case GL_CLIPPING_INPUT_PRIMITIVES_ARB:
- case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB:
- case GL_COMPUTE_SHADER_INVOCATIONS_ARB:
- case GL_TESS_CONTROL_SHADER_PATCHES_ARB:
- case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB:
- emit_pipeline_stat(brw, query->bo, query->Base.Stream, query->Base.Target, 0);
- break;
-
- default:
- unreachable("Unrecognized query target in brw_begin_query()");
- }
-}
-
-/**
- * Driver hook for glEndQuery().
- *
- * Emits GPU commands to record a final query value, ending any data capturing.
- * However, the final result isn't necessarily available until the GPU processes
- * those commands. brw_queryobj_get_results() processes the captured data to
- * produce the final result.
- */
-static void
-gfx6_end_query(struct gl_context *ctx, struct gl_query_object *q)
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_query_object *query = (struct brw_query_object *)q;
-
- switch (query->Base.Target) {
- case GL_TIME_ELAPSED:
- brw_write_timestamp(brw, query->bo, 1);
- break;
-
- case GL_ANY_SAMPLES_PASSED:
- case GL_ANY_SAMPLES_PASSED_CONSERVATIVE:
- case GL_SAMPLES_PASSED_ARB:
- brw_write_depth_count(brw, query->bo, 1);
- break;
-
- case GL_PRIMITIVES_GENERATED:
- write_primitives_generated(brw, query->bo, query->Base.Stream, 1);
- if (query->Base.Stream == 0)
- ctx->NewDriverState |= BRW_NEW_RASTERIZER_DISCARD;
- break;
-
- case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
- write_xfb_primitives_written(brw, query->bo, query->Base.Stream, 1);
- break;
-
- case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB:
- write_xfb_overflow_streams(ctx, query->bo, query->Base.Stream, 1, 1);
- break;
-
- case GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB:
- write_xfb_overflow_streams(ctx, query->bo, 0, MAX_VERTEX_STREAMS, 1);
- break;
-
- /* calculate overflow here */
- case GL_VERTICES_SUBMITTED_ARB:
- case GL_PRIMITIVES_SUBMITTED_ARB:
- case GL_VERTEX_SHADER_INVOCATIONS_ARB:
- case GL_GEOMETRY_SHADER_PRIMITIVES_EMITTED_ARB:
- case GL_FRAGMENT_SHADER_INVOCATIONS_ARB:
- case GL_COMPUTE_SHADER_INVOCATIONS_ARB:
- case GL_CLIPPING_INPUT_PRIMITIVES_ARB:
- case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB:
- case GL_GEOMETRY_SHADER_INVOCATIONS:
- case GL_TESS_CONTROL_SHADER_PATCHES_ARB:
- case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB:
- emit_pipeline_stat(brw, query->bo,
- query->Base.Stream, query->Base.Target, 1);
- break;
-
- default:
- unreachable("Unrecognized query target in brw_end_query()");
- }
-
- /* The current batch contains the commands to handle EndQuery(),
- * but they won't actually execute until it is flushed.
- */
- query->flushed = false;
-
- /* For ARB_query_buffer_object: The result is now available */
- set_query_availability(brw, query, true);
-}
-
-/**
- * Flush the batch if it still references the query object BO.
- */
-static void
-flush_batch_if_needed(struct brw_context *brw, struct brw_query_object *query)
-{
- /* If the batch doesn't reference the BO, it must have been flushed
- * (for example, due to being full). Record that it's been flushed.
- */
- query->flushed = query->flushed ||
- !brw_batch_references(&brw->batch, query->bo);
-
- if (!query->flushed)
- brw_batch_flush(brw);
-}
-
-/**
- * The WaitQuery() driver hook.
- *
- * Wait for a query result to become available and return it. This is the
- * backing for glGetQueryObjectiv() with the GL_QUERY_RESULT pname.
- */
-static void gfx6_wait_query(struct gl_context *ctx, struct gl_query_object *q)
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_query_object *query = (struct brw_query_object *)q;
-
- /* If the application has requested the query result, but this batch is
- * still contributing to it, flush it now to finish that work so the
- * result will become available (eventually).
- */
- flush_batch_if_needed(brw, query);
-
- gfx6_queryobj_get_results(ctx, query);
-}
-
-/**
- * The CheckQuery() driver hook.
- *
- * Checks whether a query result is ready yet. If not, flushes.
- * This is the backing for glGetQueryObjectiv()'s QUERY_RESULT_AVAILABLE pname.
- */
-static void gfx6_check_query(struct gl_context *ctx, struct gl_query_object *q)
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_query_object *query = (struct brw_query_object *)q;
-
- /* If query->bo is NULL, we've already gathered the results - this is a
- * redundant CheckQuery call. Ignore it.
- */
- if (query->bo == NULL)
- return;
-
- /* From the GL_ARB_occlusion_query spec:
- *
- * "Instead of allowing for an infinite loop, performing a
- * QUERY_RESULT_AVAILABLE_ARB will perform a flush if the result is
- * not ready yet on the first time it is queried. This ensures that
- * the async query will return true in finite time.
- */
- flush_batch_if_needed(brw, query);
-
- if (!brw_bo_busy(query->bo)) {
- gfx6_queryobj_get_results(ctx, query);
- }
-}
-
-static void
-gfx6_query_counter(struct gl_context *ctx, struct gl_query_object *q)
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_query_object *query = (struct brw_query_object *)q;
- brw_query_counter(ctx, q);
- set_query_availability(brw, query, true);
-}
-
-/* Initialize Gfx6+-specific query object functions. */
-void gfx6_init_queryobj_functions(struct dd_function_table *functions)
-{
- functions->BeginQuery = gfx6_begin_query;
- functions->EndQuery = gfx6_end_query;
- functions->CheckQuery = gfx6_check_query;
- functions->WaitQuery = gfx6_wait_query;
- functions->QueryCounter = gfx6_query_counter;
-}
+++ /dev/null
-/*
- * Copyright © 2010 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- * Eric Anholt <eric@anholt.net>
- *
- */
-
-#include "brw_context.h"
-#include "brw_state.h"
-#include "brw_defines.h"
-#include "brw_batch.h"
-
-static void
-upload_sampler_state_pointers(struct brw_context *brw)
-{
- BEGIN_BATCH(4);
- OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS << 16 |
- VS_SAMPLER_STATE_CHANGE |
- GS_SAMPLER_STATE_CHANGE |
- PS_SAMPLER_STATE_CHANGE |
- (4 - 2));
- OUT_BATCH(brw->vs.base.sampler_offset); /* VS */
- OUT_BATCH(brw->gs.base.sampler_offset); /* GS */
- OUT_BATCH(brw->wm.base.sampler_offset);
- ADVANCE_BATCH();
-}
-
-const struct brw_tracked_state gfx6_sampler_state = {
- .dirty = {
- .mesa = 0,
- .brw = BRW_NEW_BATCH |
- BRW_NEW_BLORP |
- BRW_NEW_SAMPLER_STATE_TABLE |
- BRW_NEW_STATE_BASE_ADDRESS,
- },
- .emit = upload_sampler_state_pointers,
-};
+++ /dev/null
-/*
- * Copyright © 2011 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-/** \file gfx6_sol.c
- *
- * Code to initialize the binding table entries used by transform feedback.
- */
-
-#include "main/bufferobj.h"
-#include "main/macros.h"
-#include "brw_context.h"
-#include "brw_batch.h"
-#include "brw_defines.h"
-#include "brw_state.h"
-#include "main/transformfeedback.h"
-#include "util/u_memory.h"
-
-static void
-gfx6_update_sol_surfaces(struct brw_context *brw)
-{
- struct gl_context *ctx = &brw->ctx;
- bool xfb_active = _mesa_is_xfb_active_and_unpaused(ctx);
- struct gl_transform_feedback_object *xfb_obj;
- const struct gl_transform_feedback_info *linked_xfb_info = NULL;
-
- if (xfb_active) {
- /* BRW_NEW_TRANSFORM_FEEDBACK */
- xfb_obj = ctx->TransformFeedback.CurrentObject;
- linked_xfb_info = xfb_obj->program->sh.LinkedTransformFeedback;
- }
-
- for (int i = 0; i < BRW_MAX_SOL_BINDINGS; ++i) {
- const int surf_index = BRW_GFX6_SOL_BINDING_START + i;
- if (xfb_active && i < linked_xfb_info->NumOutputs) {
- unsigned buffer = linked_xfb_info->Outputs[i].OutputBuffer;
- unsigned buffer_offset =
- xfb_obj->Offset[buffer] / 4 +
- linked_xfb_info->Outputs[i].DstOffset;
- if (brw->programs[MESA_SHADER_GEOMETRY]) {
- brw_update_sol_surface(
- brw, xfb_obj->Buffers[buffer],
- &brw->gs.base.surf_offset[surf_index],
- linked_xfb_info->Outputs[i].NumComponents,
- linked_xfb_info->Buffers[buffer].Stride, buffer_offset);
- } else {
- brw_update_sol_surface(
- brw, xfb_obj->Buffers[buffer],
- &brw->ff_gs.surf_offset[surf_index],
- linked_xfb_info->Outputs[i].NumComponents,
- linked_xfb_info->Buffers[buffer].Stride, buffer_offset);
- }
- } else {
- if (!brw->programs[MESA_SHADER_GEOMETRY])
- brw->ff_gs.surf_offset[surf_index] = 0;
- else
- brw->gs.base.surf_offset[surf_index] = 0;
- }
- }
-
- brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
-}
-
-const struct brw_tracked_state gfx6_sol_surface = {
- .dirty = {
- .mesa = 0,
- .brw = BRW_NEW_BATCH |
- BRW_NEW_BLORP |
- BRW_NEW_TRANSFORM_FEEDBACK,
- },
- .emit = gfx6_update_sol_surfaces,
-};
-
-/**
- * Constructs the binding table for the WM surface state, which maps unit
- * numbers to surface state objects.
- */
-static void
-brw_gs_upload_binding_table(struct brw_context *brw)
-{
- uint32_t *bind;
- struct gl_context *ctx = &brw->ctx;
- const struct gl_program *prog;
- bool need_binding_table = false;
-
- /* We have two scenarios here:
- * 1) We are using a geometry shader only to implement transform feedback
- * for a vertex shader (brw->programs[MESA_SHADER_GEOMETRY] == NULL).
- * In this case, we only need surfaces for transform feedback in the
- * GS stage.
- * 2) We have a user-provided geometry shader. In this case we may need
- * surfaces for transform feedback and/or other stuff, like textures,
- * in the GS stage.
- */
-
- if (!brw->programs[MESA_SHADER_GEOMETRY]) {
- /* BRW_NEW_VERTEX_PROGRAM */
- prog = ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX];
- if (prog) {
- /* Skip making a binding table if we don't have anything to put in it */
- const struct gl_transform_feedback_info *linked_xfb_info =
- prog->sh.LinkedTransformFeedback;
- need_binding_table = linked_xfb_info->NumOutputs > 0;
- }
- if (!need_binding_table) {
- if (brw->ff_gs.bind_bo_offset != 0) {
- brw->ctx.NewDriverState |= BRW_NEW_BINDING_TABLE_POINTERS;
- brw->ff_gs.bind_bo_offset = 0;
- }
- return;
- }
-
- /* Might want to calculate nr_surfaces first, to avoid taking up so much
- * space for the binding table. Anyway, in this case we know that we only
- * use BRW_MAX_SOL_BINDINGS surfaces at most.
- */
- bind = brw_state_batch(brw, sizeof(uint32_t) * BRW_MAX_SOL_BINDINGS,
- 32, &brw->ff_gs.bind_bo_offset);
-
- /* BRW_NEW_SURFACES */
- memcpy(bind, brw->ff_gs.surf_offset,
- BRW_MAX_SOL_BINDINGS * sizeof(uint32_t));
- } else {
- /* BRW_NEW_GEOMETRY_PROGRAM */
- prog = ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY];
- if (prog) {
- /* Skip making a binding table if we don't have anything to put in it */
- struct brw_stage_prog_data *prog_data = brw->gs.base.prog_data;
- const struct gl_transform_feedback_info *linked_xfb_info =
- prog->sh.LinkedTransformFeedback;
- need_binding_table = linked_xfb_info->NumOutputs > 0 ||
- prog_data->binding_table.size_bytes > 0;
- }
- if (!need_binding_table) {
- if (brw->gs.base.bind_bo_offset != 0) {
- brw->gs.base.bind_bo_offset = 0;
- brw->ctx.NewDriverState |= BRW_NEW_BINDING_TABLE_POINTERS;
- }
- return;
- }
-
- /* Might want to calculate nr_surfaces first, to avoid taking up so much
- * space for the binding table.
- */
- bind = brw_state_batch(brw, sizeof(uint32_t) * BRW_MAX_SURFACES,
- 32, &brw->gs.base.bind_bo_offset);
-
- /* BRW_NEW_SURFACES */
- memcpy(bind, brw->gs.base.surf_offset,
- BRW_MAX_SURFACES * sizeof(uint32_t));
- }
-
- brw->ctx.NewDriverState |= BRW_NEW_BINDING_TABLE_POINTERS;
-}
-
-const struct brw_tracked_state gfx6_gs_binding_table = {
- .dirty = {
- .mesa = 0,
- .brw = BRW_NEW_BATCH |
- BRW_NEW_BLORP |
- BRW_NEW_GEOMETRY_PROGRAM |
- BRW_NEW_VERTEX_PROGRAM |
- BRW_NEW_SURFACES,
- },
- .emit = brw_gs_upload_binding_table,
-};
-
-struct gl_transform_feedback_object *
-brw_new_transform_feedback(struct gl_context *ctx, GLuint name)
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_transform_feedback_object *brw_obj =
- CALLOC_STRUCT(brw_transform_feedback_object);
- if (!brw_obj)
- return NULL;
-
- _mesa_init_transform_feedback_object(&brw_obj->base, name);
-
- brw_obj->offset_bo =
- brw_bo_alloc(brw->bufmgr, "transform feedback offsets", 16,
- BRW_MEMZONE_OTHER);
- brw_obj->prim_count_bo =
- brw_bo_alloc(brw->bufmgr, "xfb primitive counts", 16384,
- BRW_MEMZONE_OTHER);
-
- return &brw_obj->base;
-}
-
-void
-brw_delete_transform_feedback(struct gl_context *ctx,
- struct gl_transform_feedback_object *obj)
-{
- struct brw_transform_feedback_object *brw_obj =
- (struct brw_transform_feedback_object *) obj;
-
- brw_bo_unreference(brw_obj->offset_bo);
- brw_bo_unreference(brw_obj->prim_count_bo);
-
- _mesa_delete_transform_feedback_object(ctx, obj);
-}
-
-/**
- * Tally the number of primitives generated so far.
- *
- * The buffer contains a series of pairs:
- * (<start0, start1, start2, start3>, <end0, end1, end2, end3>) ;
- * (<start0, start1, start2, start3>, <end0, end1, end2, end3>) ;
- *
- * For each stream, we subtract the pair of values (end - start) to get the
- * number of primitives generated during one section. We accumulate these
- * values, adding them up to get the total number of primitives generated.
- *
- * Note that we expose one stream pre-Gfx7, so the above is just (start, end).
- */
-static void
-aggregate_transform_feedback_counter(
- struct brw_context *brw,
- struct brw_bo *bo,
- struct brw_transform_feedback_counter *counter)
-{
- const unsigned streams = brw->ctx.Const.MaxVertexStreams;
-
- /* If the current batch is still contributing to the number of primitives
- * generated, flush it now so the results will be present when mapped.
- */
- if (brw_batch_references(&brw->batch, bo))
- brw_batch_flush(brw);
-
- if (unlikely(brw->perf_debug && brw_bo_busy(bo)))
- perf_debug("Stalling for # of transform feedback primitives written.\n");
-
- uint64_t *prim_counts = brw_bo_map(brw, bo, MAP_READ);
- prim_counts += counter->bo_start * streams;
-
- for (unsigned i = counter->bo_start; i + 1 < counter->bo_end; i += 2) {
- for (unsigned s = 0; s < streams; s++)
- counter->accum[s] += prim_counts[streams + s] - prim_counts[s];
-
- prim_counts += 2 * streams;
- }
-
- brw_bo_unmap(bo);
-
- /* We've already gathered up the old data; we can safely overwrite it now. */
- counter->bo_start = counter->bo_end = 0;
-}
-
-/**
- * Store the SO_NUM_PRIMS_WRITTEN counters for each stream (4 uint64_t values)
- * to prim_count_bo.
- *
- * If prim_count_bo is out of space, gather up the results so far into
- * prims_generated[] and allocate a new buffer with enough space.
- *
- * The number of primitives written is used to compute the number of vertices
- * written to a transform feedback stream, which is required to implement
- * DrawTransformFeedback().
- */
-void
-brw_save_primitives_written_counters(struct brw_context *brw,
- struct brw_transform_feedback_object *obj)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- const struct gl_context *ctx = &brw->ctx;
- const int streams = ctx->Const.MaxVertexStreams;
-
- assert(obj->prim_count_bo != NULL);
-
- /* Check if there's enough space for a new pair of four values. */
- if ((obj->counter.bo_end + 2) * streams * sizeof(uint64_t) >=
- obj->prim_count_bo->size) {
- aggregate_transform_feedback_counter(brw, obj->prim_count_bo,
- &obj->previous_counter);
- aggregate_transform_feedback_counter(brw, obj->prim_count_bo,
- &obj->counter);
- }
-
- /* Flush any drawing so that the counters have the right values. */
- brw_emit_mi_flush(brw);
-
- /* Emit MI_STORE_REGISTER_MEM commands to write the values. */
- if (devinfo->ver >= 7) {
- for (int i = 0; i < streams; i++) {
- int offset = (streams * obj->counter.bo_end + i) * sizeof(uint64_t);
- brw_store_register_mem64(brw, obj->prim_count_bo,
- GFX7_SO_NUM_PRIMS_WRITTEN(i),
- offset);
- }
- } else {
- brw_store_register_mem64(brw, obj->prim_count_bo,
- GFX6_SO_NUM_PRIMS_WRITTEN,
- obj->counter.bo_end * sizeof(uint64_t));
- }
-
- /* Update where to write data to. */
- obj->counter.bo_end++;
-}
-
-static void
-compute_vertices_written_so_far(struct brw_context *brw,
- struct brw_transform_feedback_object *obj,
- struct brw_transform_feedback_counter *counter,
- uint64_t *vertices_written)
-{
- const struct gl_context *ctx = &brw->ctx;
- unsigned vertices_per_prim = 0;
-
- switch (obj->primitive_mode) {
- case GL_POINTS:
- vertices_per_prim = 1;
- break;
- case GL_LINES:
- vertices_per_prim = 2;
- break;
- case GL_TRIANGLES:
- vertices_per_prim = 3;
- break;
- default:
- unreachable("Invalid transform feedback primitive mode.");
- }
-
- /* Get the number of primitives generated. */
- aggregate_transform_feedback_counter(brw, obj->prim_count_bo, counter);
-
- for (int i = 0; i < ctx->Const.MaxVertexStreams; i++) {
- vertices_written[i] = vertices_per_prim * counter->accum[i];
- }
-}
-
-/**
- * Compute the number of vertices written by the last transform feedback
- * begin/end block.
- */
-static void
-compute_xfb_vertices_written(struct brw_context *brw,
- struct brw_transform_feedback_object *obj)
-{
- if (obj->vertices_written_valid || !obj->base.EndedAnytime)
- return;
-
- compute_vertices_written_so_far(brw, obj, &obj->previous_counter,
- obj->vertices_written);
- obj->vertices_written_valid = true;
-}
-
-/**
- * GetTransformFeedbackVertexCount() driver hook.
- *
- * Returns the number of vertices written to a particular stream by the last
- * Begin/EndTransformFeedback block. Used to implement DrawTransformFeedback().
- */
-GLsizei
-brw_get_transform_feedback_vertex_count(struct gl_context *ctx,
- struct gl_transform_feedback_object *obj,
- GLuint stream)
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_transform_feedback_object *brw_obj =
- (struct brw_transform_feedback_object *) obj;
-
- assert(obj->EndedAnytime);
- assert(stream < ctx->Const.MaxVertexStreams);
-
- compute_xfb_vertices_written(brw, brw_obj);
- return brw_obj->vertices_written[stream];
-}
-
-void
-brw_begin_transform_feedback(struct gl_context *ctx, GLenum mode,
- struct gl_transform_feedback_object *obj)
-{
- struct brw_context *brw = brw_context(ctx);
- const struct gl_program *prog;
- const struct gl_transform_feedback_info *linked_xfb_info;
- struct gl_transform_feedback_object *xfb_obj =
- ctx->TransformFeedback.CurrentObject;
- struct brw_transform_feedback_object *brw_obj =
- (struct brw_transform_feedback_object *) xfb_obj;
-
- assert(brw->screen->devinfo.ver == 6);
-
- if (ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY]) {
- /* BRW_NEW_GEOMETRY_PROGRAM */
- prog = ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY];
- } else {
- /* BRW_NEW_VERTEX_PROGRAM */
- prog = ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX];
- }
- linked_xfb_info = prog->sh.LinkedTransformFeedback;
-
- /* Compute the maximum number of vertices that we can write without
- * overflowing any of the buffers currently being used for feedback.
- */
- brw_obj->max_index
- = _mesa_compute_max_transform_feedback_vertices(ctx, xfb_obj,
- linked_xfb_info);
-
- /* Initialize the SVBI 0 register to zero and set the maximum index. */
- BEGIN_BATCH(4);
- OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2));
- OUT_BATCH(0); /* SVBI 0 */
- OUT_BATCH(0); /* starting index */
- OUT_BATCH(brw_obj->max_index);
- ADVANCE_BATCH();
-
- /* Initialize the rest of the unused streams to sane values. Otherwise,
- * they may indicate that there is no room to write data and prevent
- * anything from happening at all.
- */
- for (int i = 1; i < 4; i++) {
- BEGIN_BATCH(4);
- OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2));
- OUT_BATCH(i << SVB_INDEX_SHIFT);
- OUT_BATCH(0); /* starting index */
- OUT_BATCH(0xffffffff);
- ADVANCE_BATCH();
- }
-
- /* Store the starting value of the SO_NUM_PRIMS_WRITTEN counters. */
- brw_save_primitives_written_counters(brw, brw_obj);
-
- brw_obj->primitive_mode = mode;
-}
-
-void
-brw_end_transform_feedback(struct gl_context *ctx,
- struct gl_transform_feedback_object *obj)
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_transform_feedback_object *brw_obj =
- (struct brw_transform_feedback_object *) obj;
-
- /* Store the ending value of the SO_NUM_PRIMS_WRITTEN counters. */
- if (!obj->Paused)
- brw_save_primitives_written_counters(brw, brw_obj);
-
- /* We've reached the end of a transform feedback begin/end block. This
- * means that future DrawTransformFeedback() calls will need to pick up the
- * results of the current counter, and that it's time to roll back the
- * current primitive counter to zero.
- */
- brw_obj->previous_counter = brw_obj->counter;
- brw_reset_transform_feedback_counter(&brw_obj->counter);
-
- /* EndTransformFeedback() means that we need to update the number of
- * vertices written. Since it's only necessary if DrawTransformFeedback()
- * is called and it means mapping a buffer object, we delay computing it
- * until it's absolutely necessary to try and avoid stalls.
- */
- brw_obj->vertices_written_valid = false;
-}
-
-void
-brw_pause_transform_feedback(struct gl_context *ctx,
- struct gl_transform_feedback_object *obj)
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_transform_feedback_object *brw_obj =
- (struct brw_transform_feedback_object *) obj;
-
- /* Store the temporary ending value of the SO_NUM_PRIMS_WRITTEN counters.
- * While this operation is paused, other transform feedback actions may
- * occur, which will contribute to the counters. We need to exclude that
- * from our counts.
- */
- brw_save_primitives_written_counters(brw, brw_obj);
-}
-
-void
-brw_resume_transform_feedback(struct gl_context *ctx,
- struct gl_transform_feedback_object *obj)
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_transform_feedback_object *brw_obj =
- (struct brw_transform_feedback_object *) obj;
-
- /* Reload SVBI 0 with the count of vertices written so far. */
- uint64_t svbi;
- compute_vertices_written_so_far(brw, brw_obj, &brw_obj->counter, &svbi);
-
- BEGIN_BATCH(4);
- OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2));
- OUT_BATCH(0); /* SVBI 0 */
- OUT_BATCH((uint32_t) svbi); /* starting index */
- OUT_BATCH(brw_obj->max_index);
- ADVANCE_BATCH();
-
- /* Initialize the rest of the unused streams to sane values. Otherwise,
- * they may indicate that there is no room to write data and prevent
- * anything from happening at all.
- */
- for (int i = 1; i < 4; i++) {
- BEGIN_BATCH(4);
- OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2));
- OUT_BATCH(i << SVB_INDEX_SHIFT);
- OUT_BATCH(0); /* starting index */
- OUT_BATCH(0xffffffff);
- ADVANCE_BATCH();
- }
-
- /* Store the new starting value of the SO_NUM_PRIMS_WRITTEN counters. */
- brw_save_primitives_written_counters(brw, brw_obj);
-}
+++ /dev/null
-/*
- * Copyright © 2009 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- * Eric Anholt <eric@anholt.net>
- *
- */
-
-#include "main/macros.h"
-#include "brw_batch.h"
-#include "brw_context.h"
-#include "brw_state.h"
-#include "brw_defines.h"
-
-/**
- * When the GS is not in use, we assign the entire URB space to the VS. When
- * the GS is in use, we split the URB space evenly between the VS and the GS.
- * This is not ideal, but it's simple.
- *
- * URB size / 2 URB size / 2
- * _____________-______________ _____________-______________
- * / \ / \
- * +-------------------------------------------------------------+
- * | Vertex Shader Entries | Geometry Shader Entries |
- * +-------------------------------------------------------------+
- *
- * Sandybridge GT1 has 32kB of URB space, while GT2 has 64kB.
- * (See the Sandybridge PRM, Volume 2, Part 1, Section 1.4.7: 3DSTATE_URB.)
- */
-void
-gfx6_upload_urb(struct brw_context *brw, unsigned vs_size,
- bool gs_present, unsigned gs_size)
-{
- int nr_vs_entries, nr_gs_entries;
- int total_urb_size = brw->urb.size * 1024; /* in bytes */
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- /* Calculate how many entries fit in each stage's section of the URB */
- if (gs_present) {
- nr_vs_entries = (total_urb_size/2) / (vs_size * 128);
- nr_gs_entries = (total_urb_size/2) / (gs_size * 128);
- } else {
- nr_vs_entries = total_urb_size / (vs_size * 128);
- nr_gs_entries = 0;
- }
-
- /* Then clamp to the maximum allowed by the hardware */
- if (nr_vs_entries > devinfo->urb.max_entries[MESA_SHADER_VERTEX])
- nr_vs_entries = devinfo->urb.max_entries[MESA_SHADER_VERTEX];
-
- if (nr_gs_entries > devinfo->urb.max_entries[MESA_SHADER_GEOMETRY])
- nr_gs_entries = devinfo->urb.max_entries[MESA_SHADER_GEOMETRY];
-
- /* Finally, both must be a multiple of 4 (see 3DSTATE_URB in the PRM). */
- brw->urb.nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, 4);
- brw->urb.nr_gs_entries = ROUND_DOWN_TO(nr_gs_entries, 4);
-
- assert(brw->urb.nr_vs_entries >=
- devinfo->urb.min_entries[MESA_SHADER_VERTEX]);
- assert(brw->urb.nr_vs_entries % 4 == 0);
- assert(brw->urb.nr_gs_entries % 4 == 0);
- assert(vs_size <= 5);
- assert(gs_size <= 5);
-
- BEGIN_BATCH(3);
- OUT_BATCH(_3DSTATE_URB << 16 | (3 - 2));
- OUT_BATCH(((vs_size - 1) << GFX6_URB_VS_SIZE_SHIFT) |
- ((brw->urb.nr_vs_entries) << GFX6_URB_VS_ENTRIES_SHIFT));
- OUT_BATCH(((gs_size - 1) << GFX6_URB_GS_SIZE_SHIFT) |
- ((brw->urb.nr_gs_entries) << GFX6_URB_GS_ENTRIES_SHIFT));
- ADVANCE_BATCH();
-
- /* From the PRM Volume 2 part 1, section 1.4.7:
- *
- * Because of a urb corruption caused by allocating a previous gsunit’s
- * urb entry to vsunit software is required to send a "GS NULL
- * Fence"(Send URB fence with VS URB size == 1 and GS URB size == 0) plus
- * a dummy DRAW call before any case where VS will be taking over GS URB
- * space.
- *
- * It is not clear exactly what this means ("URB fence" is a command that
- * doesn't exist on Gfx6). So for now we just do a full pipeline flush as
- * a workaround.
- */
- if (brw->urb.gs_present && !gs_present)
- brw_emit_mi_flush(brw);
- brw->urb.gs_present = gs_present;
-}
-
-static void
-upload_urb(struct brw_context *brw)
-{
- /* BRW_NEW_VS_PROG_DATA */
- const struct brw_vue_prog_data *vs_vue_prog_data =
- brw_vue_prog_data(brw->vs.base.prog_data);
- const unsigned vs_size = MAX2(vs_vue_prog_data->urb_entry_size, 1);
-
- /* BRW_NEW_GEOMETRY_PROGRAM, BRW_NEW_GS_PROG_DATA */
- const bool gs_present =
- brw->ff_gs.prog_active || brw->programs[MESA_SHADER_GEOMETRY];
-
- /* Whe using GS to do transform feedback only we use the same VUE layout for
- * VS outputs and GS outputs (as it's what the SF and Clipper expect), so we
- * can simply make the GS URB entry size the same as for the VS. This may
- * technically be too large in cases where we have few vertex attributes and
- * a lot of varyings, since the VS size is determined by the larger of the
- * two. For now, it's safe.
- *
- * For user-provided GS the assumption above does not hold since the GS
- * outputs can be different from the VS outputs.
- */
- unsigned gs_size = vs_size;
- if (brw->programs[MESA_SHADER_GEOMETRY]) {
- const struct brw_vue_prog_data *gs_vue_prog_data =
- brw_vue_prog_data(brw->gs.base.prog_data);
- gs_size = gs_vue_prog_data->urb_entry_size;
- assert(gs_size >= 1);
- }
-
- gfx6_upload_urb(brw, vs_size, gs_present, gs_size);
-}
-
-const struct brw_tracked_state gfx6_urb = {
- .dirty = {
- .mesa = 0,
- .brw = BRW_NEW_BLORP |
- BRW_NEW_CONTEXT |
- BRW_NEW_FF_GS_PROG_DATA |
- BRW_NEW_GEOMETRY_PROGRAM |
- BRW_NEW_GS_PROG_DATA |
- BRW_NEW_VS_PROG_DATA,
- },
- .emit = upload_urb,
-};
+++ /dev/null
-/*
- * Copyright (c) 2015 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "common/intel_l3_config.h"
-
-#include "brw_context.h"
-#include "brw_defines.h"
-#include "brw_state.h"
-#include "brw_batch.h"
-
-/**
- * Calculate the desired L3 partitioning based on the current state of the
- * pipeline. For now this simply returns the conservative defaults calculated
- * by get_default_l3_weights(), but we could probably do better by gathering
- * more statistics from the pipeline state (e.g. guess of expected URB usage
- * and bound surfaces), or by using feed-back from performance counters.
- */
-static struct intel_l3_weights
-get_pipeline_state_l3_weights(const struct brw_context *brw)
-{
- const struct brw_stage_state *stage_states[] = {
- [MESA_SHADER_VERTEX] = &brw->vs.base,
- [MESA_SHADER_TESS_CTRL] = &brw->tcs.base,
- [MESA_SHADER_TESS_EVAL] = &brw->tes.base,
- [MESA_SHADER_GEOMETRY] = &brw->gs.base,
- [MESA_SHADER_FRAGMENT] = &brw->wm.base,
- [MESA_SHADER_COMPUTE] = &brw->cs.base
- };
- bool needs_dc = false, needs_slm = false;
-
- for (unsigned i = 0; i < ARRAY_SIZE(stage_states); i++) {
- const struct gl_program *prog =
- brw->ctx._Shader->CurrentProgram[stage_states[i]->stage];
- const struct brw_stage_prog_data *prog_data = stage_states[i]->prog_data;
-
- needs_dc |= (prog && (prog->sh.data->NumAtomicBuffers ||
- prog->sh.data->NumShaderStorageBlocks ||
- prog->info.num_images)) ||
- (prog_data && prog_data->total_scratch);
- needs_slm |= prog_data && prog_data->total_shared;
- }
-
- return intel_get_default_l3_weights(&brw->screen->devinfo,
- needs_dc, needs_slm);
-}
-
-/**
- * Program the hardware to use the specified L3 configuration.
- */
-static void
-setup_l3_config(struct brw_context *brw, const struct intel_l3_config *cfg)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- const bool has_dc = cfg->n[INTEL_L3P_DC] || cfg->n[INTEL_L3P_ALL];
- const bool has_is = cfg->n[INTEL_L3P_IS] || cfg->n[INTEL_L3P_RO] ||
- cfg->n[INTEL_L3P_ALL];
- const bool has_c = cfg->n[INTEL_L3P_C] || cfg->n[INTEL_L3P_RO] ||
- cfg->n[INTEL_L3P_ALL];
- const bool has_t = cfg->n[INTEL_L3P_T] || cfg->n[INTEL_L3P_RO] ||
- cfg->n[INTEL_L3P_ALL];
- const bool has_slm = cfg->n[INTEL_L3P_SLM];
-
- /* According to the hardware docs, the L3 partitioning can only be changed
- * while the pipeline is completely drained and the caches are flushed,
- * which involves a first PIPE_CONTROL flush which stalls the pipeline...
- */
- brw_emit_pipe_control_flush(brw,
- PIPE_CONTROL_DATA_CACHE_FLUSH |
- PIPE_CONTROL_CS_STALL);
-
- /* ...followed by a second pipelined PIPE_CONTROL that initiates
- * invalidation of the relevant caches. Note that because RO invalidation
- * happens at the top of the pipeline (i.e. right away as the PIPE_CONTROL
- * command is processed by the CS) we cannot combine it with the previous
- * stalling flush as the hardware documentation suggests, because that
- * would cause the CS to stall on previous rendering *after* RO
- * invalidation and wouldn't prevent the RO caches from being polluted by
- * concurrent rendering before the stall completes. This intentionally
- * doesn't implement the SKL+ hardware workaround suggesting to enable CS
- * stall on PIPE_CONTROLs with the texture cache invalidation bit set for
- * GPGPU workloads because the previous and subsequent PIPE_CONTROLs
- * already guarantee that there is no concurrent GPGPU kernel execution
- * (see SKL HSD 2132585).
- */
- brw_emit_pipe_control_flush(brw,
- PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
- PIPE_CONTROL_CONST_CACHE_INVALIDATE |
- PIPE_CONTROL_INSTRUCTION_INVALIDATE |
- PIPE_CONTROL_STATE_CACHE_INVALIDATE);
-
- /* Now send a third stalling flush to make sure that invalidation is
- * complete when the L3 configuration registers are modified.
- */
- brw_emit_pipe_control_flush(brw,
- PIPE_CONTROL_DATA_CACHE_FLUSH |
- PIPE_CONTROL_CS_STALL);
-
- if (devinfo->ver >= 8) {
- assert(!cfg->n[INTEL_L3P_IS] && !cfg->n[INTEL_L3P_C] && !cfg->n[INTEL_L3P_T]);
-
- const unsigned imm_data = (
- (devinfo->ver < 11 && has_slm ? GFX8_L3CNTLREG_SLM_ENABLE : 0) |
- (devinfo->ver == 11 ? GFX11_L3CNTLREG_USE_FULL_WAYS : 0) |
- SET_FIELD(cfg->n[INTEL_L3P_URB], GFX8_L3CNTLREG_URB_ALLOC) |
- SET_FIELD(cfg->n[INTEL_L3P_RO], GFX8_L3CNTLREG_RO_ALLOC) |
- SET_FIELD(cfg->n[INTEL_L3P_DC], GFX8_L3CNTLREG_DC_ALLOC) |
- SET_FIELD(cfg->n[INTEL_L3P_ALL], GFX8_L3CNTLREG_ALL_ALLOC));
-
- /* Set up the L3 partitioning. */
- brw_load_register_imm32(brw, GFX8_L3CNTLREG, imm_data);
- } else {
- assert(!cfg->n[INTEL_L3P_ALL]);
-
- /* When enabled SLM only uses a portion of the L3 on half of the banks,
- * the matching space on the remaining banks has to be allocated to a
- * client (URB for all validated configurations) set to the
- * lower-bandwidth 2-bank address hashing mode.
- */
- const bool urb_low_bw = has_slm && devinfo->platform != INTEL_PLATFORM_BYT;
- assert(!urb_low_bw || cfg->n[INTEL_L3P_URB] == cfg->n[INTEL_L3P_SLM]);
-
- /* Minimum number of ways that can be allocated to the URB. */
- const unsigned n0_urb = (devinfo->platform == INTEL_PLATFORM_BYT ? 32 : 0);
- assert(cfg->n[INTEL_L3P_URB] >= n0_urb);
-
- BEGIN_BATCH(7);
- OUT_BATCH(MI_LOAD_REGISTER_IMM | (7 - 2));
-
- /* Demote any clients with no ways assigned to LLC. */
- OUT_BATCH(GFX7_L3SQCREG1);
- OUT_BATCH((devinfo->platform == INTEL_PLATFORM_HSW ? HSW_L3SQCREG1_SQGHPCI_DEFAULT :
- devinfo->platform == INTEL_PLATFORM_BYT ? VLV_L3SQCREG1_SQGHPCI_DEFAULT :
- IVB_L3SQCREG1_SQGHPCI_DEFAULT) |
- (has_dc ? 0 : GFX7_L3SQCREG1_CONV_DC_UC) |
- (has_is ? 0 : GFX7_L3SQCREG1_CONV_IS_UC) |
- (has_c ? 0 : GFX7_L3SQCREG1_CONV_C_UC) |
- (has_t ? 0 : GFX7_L3SQCREG1_CONV_T_UC));
-
- /* Set up the L3 partitioning. */
- OUT_BATCH(GFX7_L3CNTLREG2);
- OUT_BATCH((has_slm ? GFX7_L3CNTLREG2_SLM_ENABLE : 0) |
- SET_FIELD(cfg->n[INTEL_L3P_URB] - n0_urb, GFX7_L3CNTLREG2_URB_ALLOC) |
- (urb_low_bw ? GFX7_L3CNTLREG2_URB_LOW_BW : 0) |
- SET_FIELD(cfg->n[INTEL_L3P_ALL], GFX7_L3CNTLREG2_ALL_ALLOC) |
- SET_FIELD(cfg->n[INTEL_L3P_RO], GFX7_L3CNTLREG2_RO_ALLOC) |
- SET_FIELD(cfg->n[INTEL_L3P_DC], GFX7_L3CNTLREG2_DC_ALLOC));
- OUT_BATCH(GFX7_L3CNTLREG3);
- OUT_BATCH(SET_FIELD(cfg->n[INTEL_L3P_IS], GFX7_L3CNTLREG3_IS_ALLOC) |
- SET_FIELD(cfg->n[INTEL_L3P_C], GFX7_L3CNTLREG3_C_ALLOC) |
- SET_FIELD(cfg->n[INTEL_L3P_T], GFX7_L3CNTLREG3_T_ALLOC));
-
- ADVANCE_BATCH();
-
- if (can_do_hsw_l3_atomics(brw->screen)) {
- /* Enable L3 atomics on HSW if we have a DC partition, otherwise keep
- * them disabled to avoid crashing the system hard.
- */
- BEGIN_BATCH(5);
- OUT_BATCH(MI_LOAD_REGISTER_IMM | (5 - 2));
- OUT_BATCH(HSW_SCRATCH1);
- OUT_BATCH(has_dc ? 0 : HSW_SCRATCH1_L3_ATOMIC_DISABLE);
- OUT_BATCH(HSW_ROW_CHICKEN3);
- OUT_BATCH(REG_MASK(HSW_ROW_CHICKEN3_L3_ATOMIC_DISABLE) |
- (has_dc ? 0 : HSW_ROW_CHICKEN3_L3_ATOMIC_DISABLE));
- ADVANCE_BATCH();
- }
- }
-}
-
-/**
- * Update the URB size in the context state for the specified L3
- * configuration.
- */
-static void
-update_urb_size(struct brw_context *brw, const struct intel_l3_config *cfg)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- const unsigned sz = intel_get_l3_config_urb_size(devinfo, cfg);
-
- if (brw->urb.size != sz) {
- brw->urb.size = sz;
- brw->ctx.NewDriverState |= BRW_NEW_URB_SIZE;
-
- /* If we change the total URB size, reset the individual stage sizes to
- * zero so that, even if there is no URB size change, gfx7_upload_urb
- * still re-emits 3DSTATE_URB_*.
- */
- brw->urb.vsize = 0;
- brw->urb.gsize = 0;
- brw->urb.hsize = 0;
- brw->urb.dsize = 0;
- }
-}
-
-void
-brw_emit_l3_state(struct brw_context *brw)
-{
- const struct intel_l3_weights w = get_pipeline_state_l3_weights(brw);
- const float dw = intel_diff_l3_weights(w, intel_get_l3_config_weights(brw->l3.config));
- /* The distance between any two compatible weight vectors cannot exceed two
- * due to the triangle inequality.
- */
- const float large_dw_threshold = 2.0;
- /* Somewhat arbitrary, simply makes sure that there will be no repeated
- * transitions to the same L3 configuration, could probably do better here.
- */
- const float small_dw_threshold = 0.5;
- /* If we're emitting a new batch the caches should already be clean and the
- * transition should be relatively cheap, so it shouldn't hurt much to use
- * the smaller threshold. Otherwise use the larger threshold so that we
- * only reprogram the L3 mid-batch if the most recently programmed
- * configuration is incompatible with the current pipeline state.
- */
- const float dw_threshold = (brw->ctx.NewDriverState & BRW_NEW_BATCH ?
- small_dw_threshold : large_dw_threshold);
-
- if (dw > dw_threshold && can_do_pipelined_register_writes(brw->screen)) {
- const struct intel_l3_config *const cfg =
- intel_get_l3_config(&brw->screen->devinfo, w);
-
- setup_l3_config(brw, cfg);
- update_urb_size(brw, cfg);
- brw->l3.config = cfg;
-
- if (INTEL_DEBUG(DEBUG_L3)) {
- fprintf(stderr, "L3 config transition (%f > %f): ", dw, dw_threshold);
- intel_dump_l3_config(cfg, stderr);
- }
- }
-}
-
-const struct brw_tracked_state gfx7_l3_state = {
- .dirty = {
- .mesa = 0,
- .brw = BRW_NEW_BATCH |
- BRW_NEW_BLORP |
- BRW_NEW_CS_PROG_DATA |
- BRW_NEW_FS_PROG_DATA |
- BRW_NEW_GS_PROG_DATA |
- BRW_NEW_TCS_PROG_DATA |
- BRW_NEW_TES_PROG_DATA |
- BRW_NEW_VS_PROG_DATA,
- },
- .emit = brw_emit_l3_state
-};
-
-/**
- * Hack to restore the default L3 configuration.
- *
- * This will be called at the end of every batch in order to reset the L3
- * configuration to the default values for the time being until the kernel is
- * fixed. Until kernel commit 6702cf16e0ba8b0129f5aa1b6609d4e9c70bc13b
- * (included in v4.1) we would set the MI_RESTORE_INHIBIT bit when submitting
- * batch buffers for the default context used by the DDX, which meant that any
- * context state changed by the GL would leak into the DDX, the assumption
- * being that the DDX would initialize any state it cares about manually. The
- * DDX is however not careful enough to program an L3 configuration
- * explicitly, and it makes assumptions about it (URB size) which won't hold
- * and cause it to misrender if we let our L3 set-up to leak into the DDX.
- *
- * Since v4.1 of the Linux kernel the default context is saved and restored
- * normally, so it's far less likely for our L3 programming to interfere with
- * other contexts -- In fact restoring the default L3 configuration at the end
- * of the batch will be redundant most of the time. A kind of state leak is
- * still possible though if the context making assumptions about L3 state is
- * created immediately after our context was active (e.g. without the DDX
- * default context being scheduled in between) because at present the DRM
- * doesn't fully initialize the contents of newly created contexts and instead
- * sets the MI_RESTORE_INHIBIT flag causing it to inherit the state from the
- * last active context.
- *
- * It's possible to realize such a scenario if, say, an X server (or a GL
- * application using an outdated non-L3-aware Mesa version) is started while
- * another GL application is running and happens to have modified the L3
- * configuration, or if no X server is running at all and a GL application
- * using a non-L3-aware Mesa version is started after another GL application
- * ran and modified the L3 configuration -- The latter situation can actually
- * be reproduced easily on IVB in our CI system.
- */
-void
-gfx7_restore_default_l3_config(struct brw_context *brw)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- const struct intel_l3_config *const cfg = intel_get_default_l3_config(devinfo);
-
- if (cfg != brw->l3.config &&
- can_do_pipelined_register_writes(brw->screen)) {
- setup_l3_config(brw, cfg);
- update_urb_size(brw, cfg);
- brw->l3.config = cfg;
- }
-}
+++ /dev/null
-/*
- * Copyright © 2011 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-/**
- * @file gfx7_sol_state.c
- *
- * Controls the stream output logic (SOL) stage of the gfx7 hardware, which is
- * used to implement GL_EXT_transform_feedback.
- */
-
-#include "brw_context.h"
-#include "brw_state.h"
-#include "brw_defines.h"
-#include "brw_batch.h"
-#include "brw_buffer_objects.h"
-#include "main/transformfeedback.h"
-
-void
-gfx7_begin_transform_feedback(struct gl_context *ctx, GLenum mode,
- struct gl_transform_feedback_object *obj)
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_transform_feedback_object *brw_obj =
- (struct brw_transform_feedback_object *) obj;
-
- assert(brw->screen->devinfo.ver == 7);
-
- /* Store the starting value of the SO_NUM_PRIMS_WRITTEN counters. */
- brw_save_primitives_written_counters(brw, brw_obj);
-
- /* Reset the SO buffer offsets to 0. */
- if (!can_do_pipelined_register_writes(brw->screen)) {
- brw_batch_flush(brw);
- brw->batch.needs_sol_reset = true;
- } else {
- for (int i = 0; i < 4; i++) {
- brw_load_register_imm32(brw, GFX7_SO_WRITE_OFFSET(i), 0);
- }
- }
-
- brw_obj->primitive_mode = mode;
-}
-
-void
-gfx7_end_transform_feedback(struct gl_context *ctx,
- struct gl_transform_feedback_object *obj)
-{
- /* After EndTransformFeedback, it's likely that the client program will try
- * to draw using the contents of the transform feedback buffer as vertex
- * input. In order for this to work, we need to flush the data through at
- * least the GS stage of the pipeline, and flush out the render cache. For
- * simplicity, just do a full flush.
- */
- struct brw_context *brw = brw_context(ctx);
- struct brw_transform_feedback_object *brw_obj =
- (struct brw_transform_feedback_object *) obj;
-
- /* Store the ending value of the SO_NUM_PRIMS_WRITTEN counters. */
- if (!obj->Paused)
- brw_save_primitives_written_counters(brw, brw_obj);
-
- /* We've reached the end of a transform feedback begin/end block. This
- * means that future DrawTransformFeedback() calls will need to pick up the
- * results of the current counter, and that it's time to roll back the
- * current primitive counter to zero.
- */
- brw_obj->previous_counter = brw_obj->counter;
- brw_reset_transform_feedback_counter(&brw_obj->counter);
-
- /* EndTransformFeedback() means that we need to update the number of
- * vertices written. Since it's only necessary if DrawTransformFeedback()
- * is called and it means mapping a buffer object, we delay computing it
- * until it's absolutely necessary to try and avoid stalls.
- */
- brw_obj->vertices_written_valid = false;
-}
-
-void
-gfx7_pause_transform_feedback(struct gl_context *ctx,
- struct gl_transform_feedback_object *obj)
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_transform_feedback_object *brw_obj =
- (struct brw_transform_feedback_object *) obj;
-
- /* Flush any drawing so that the counters have the right values. */
- brw_emit_mi_flush(brw);
-
- assert(brw->screen->devinfo.ver == 7);
-
- /* Save the SOL buffer offset register values. */
- for (int i = 0; i < 4; i++) {
- BEGIN_BATCH(3);
- OUT_BATCH(MI_STORE_REGISTER_MEM | (3 - 2));
- OUT_BATCH(GFX7_SO_WRITE_OFFSET(i));
- OUT_RELOC(brw_obj->offset_bo, RELOC_WRITE, i * sizeof(uint32_t));
- ADVANCE_BATCH();
- }
-
- /* Store the temporary ending value of the SO_NUM_PRIMS_WRITTEN counters.
- * While this operation is paused, other transform feedback actions may
- * occur, which will contribute to the counters. We need to exclude that
- * from our counts.
- */
- brw_save_primitives_written_counters(brw, brw_obj);
-}
-
-void
-gfx7_resume_transform_feedback(struct gl_context *ctx,
- struct gl_transform_feedback_object *obj)
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_transform_feedback_object *brw_obj =
- (struct brw_transform_feedback_object *) obj;
-
- assert(brw->screen->devinfo.ver == 7);
-
- /* Reload the SOL buffer offset registers. */
- for (int i = 0; i < 4; i++) {
- BEGIN_BATCH(3);
- OUT_BATCH(GFX7_MI_LOAD_REGISTER_MEM | (3 - 2));
- OUT_BATCH(GFX7_SO_WRITE_OFFSET(i));
- OUT_RELOC(brw_obj->offset_bo, RELOC_WRITE, i * sizeof(uint32_t));
- ADVANCE_BATCH();
- }
-
- /* Store the new starting value of the SO_NUM_PRIMS_WRITTEN counters. */
- brw_save_primitives_written_counters(brw, brw_obj);
-}
+++ /dev/null
-/*
- * Copyright © 2011 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "main/macros.h"
-#include "brw_batch.h"
-#include "brw_context.h"
-#include "brw_state.h"
-#include "brw_defines.h"
-
-#include "common/intel_l3_config.h"
-
-/**
- * The following diagram shows how we partition the URB:
- *
- * 16kB or 32kB Rest of the URB space
- * __________-__________ _________________-_________________
- * / \ / \
- * +-------------------------------------------------------------+
- * | VS/HS/DS/GS/FS Push | VS/HS/DS/GS URB |
- * | Constants | Entries |
- * +-------------------------------------------------------------+
- *
- * Notably, push constants must be stored at the beginning of the URB
- * space, while entries can be stored anywhere. Ivybridge and Haswell
- * GT1/GT2 have a maximum constant buffer size of 16kB, while Haswell GT3
- * doubles this (32kB).
- *
- * Ivybridge and Haswell GT1/GT2 allow push constants to be located (and
- * sized) in increments of 1kB. Haswell GT3 requires them to be located and
- * sized in increments of 2kB.
- *
- * Currently we split the constant buffer space evenly among whatever stages
- * are active. This is probably not ideal, but simple.
- *
- * Ivybridge GT1 and Haswell GT1 have 128kB of URB space.
- * Ivybridge GT2 and Haswell GT2 have 256kB of URB space.
- * Haswell GT3 has 512kB of URB space.
- *
- * See "Volume 2a: 3D Pipeline," section 1.8, "Volume 1b: Configurations",
- * and the documentation for 3DSTATE_PUSH_CONSTANT_ALLOC_xS.
- */
-static void
-gfx7_allocate_push_constants(struct brw_context *brw)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- /* BRW_NEW_GEOMETRY_PROGRAM */
- bool gs_present = brw->programs[MESA_SHADER_GEOMETRY];
-
- /* BRW_NEW_TESS_PROGRAMS */
- bool tess_present = brw->programs[MESA_SHADER_TESS_EVAL];
-
- unsigned avail_size = 16;
- unsigned multiplier = devinfo->max_constant_urb_size_kb / 16;
-
- int stages = 2 + gs_present + 2 * tess_present;
-
- /* Divide up the available space equally between stages. Because we
- * round down (using floor division), there may be some left over
- * space. We allocate that to the pixel shader stage.
- */
- unsigned size_per_stage = avail_size / stages;
-
- unsigned vs_size = size_per_stage;
- unsigned hs_size = tess_present ? size_per_stage : 0;
- unsigned ds_size = tess_present ? size_per_stage : 0;
- unsigned gs_size = gs_present ? size_per_stage : 0;
- unsigned fs_size = avail_size - size_per_stage * (stages - 1);
-
- gfx7_emit_push_constant_state(brw, multiplier * vs_size,
- multiplier * hs_size, multiplier * ds_size,
- multiplier * gs_size, multiplier * fs_size);
-
- /* From p115 of the Ivy Bridge PRM (3.2.1.4 3DSTATE_PUSH_CONSTANT_ALLOC_VS):
- *
- * Programming Restriction:
- *
- * The 3DSTATE_CONSTANT_VS must be reprogrammed prior to the next
- * 3DPRIMITIVE command after programming the
- * 3DSTATE_PUSH_CONSTANT_ALLOC_VS.
- *
- * Similar text exists for the other 3DSTATE_PUSH_CONSTANT_ALLOC_*
- * commands.
- */
- brw->vs.base.push_constants_dirty = true;
- brw->tcs.base.push_constants_dirty = true;
- brw->tes.base.push_constants_dirty = true;
- brw->gs.base.push_constants_dirty = true;
- brw->wm.base.push_constants_dirty = true;
-}
-
-void
-gfx7_emit_push_constant_state(struct brw_context *brw, unsigned vs_size,
- unsigned hs_size, unsigned ds_size,
- unsigned gs_size, unsigned fs_size)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- unsigned offset = 0;
-
- /* From the SKL PRM, Workarounds section (#878):
- *
- * Push constant buffer corruption possible. WA: Insert 2 zero-length
- * PushConst_PS before every intended PushConst_PS update, issue a
- * NULLPRIM after each of the zero len PC update to make sure CS commits
- * them.
- *
- * This workaround is attempting to solve a pixel shader push constant
- * synchronization issue.
- *
- * There's an unpublished WA that involves re-emitting
- * 3DSTATE_PUSH_CONSTANT_ALLOC_PS for every 500-ish 3DSTATE_CONSTANT_PS
- * packets. Since our counting methods may not be reliable due to
- * context-switching and pre-emption, we instead choose to approximate this
- * behavior by re-emitting the packet at the top of the batch.
- */
- if (brw->ctx.NewDriverState == BRW_NEW_BATCH) {
- /* SKL GT2 and GLK 2x6 have reliably demonstrated this issue thus far.
- * We've also seen some intermittent failures from SKL GT4 and BXT in
- * the past.
- */
- if (devinfo->platform != INTEL_PLATFORM_SKL &&
- devinfo->platform != INTEL_PLATFORM_BXT &&
- devinfo->platform != INTEL_PLATFORM_GLK)
- return;
- }
-
- BEGIN_BATCH(10);
- OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_VS << 16 | (2 - 2));
- OUT_BATCH(vs_size | offset << GFX7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT);
- offset += vs_size;
-
- OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_HS << 16 | (2 - 2));
- OUT_BATCH(hs_size | offset << GFX7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT);
- offset += hs_size;
-
- OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_DS << 16 | (2 - 2));
- OUT_BATCH(ds_size | offset << GFX7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT);
- offset += ds_size;
-
- OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_GS << 16 | (2 - 2));
- OUT_BATCH(gs_size | offset << GFX7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT);
- offset += gs_size;
-
- OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_PS << 16 | (2 - 2));
- OUT_BATCH(fs_size | offset << GFX7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT);
- ADVANCE_BATCH();
-
- /* From p292 of the Ivy Bridge PRM (11.2.4 3DSTATE_PUSH_CONSTANT_ALLOC_PS):
- *
- * A PIPE_CONTROL command with the CS Stall bit set must be programmed
- * in the ring after this instruction.
- *
- * No such restriction exists for Haswell or Baytrail.
- */
- if (devinfo->verx10 <= 70 && devinfo->platform != INTEL_PLATFORM_BYT)
- gfx7_emit_cs_stall_flush(brw);
-}
-
-const struct brw_tracked_state gfx7_push_constant_space = {
- .dirty = {
- .mesa = 0,
- .brw = BRW_NEW_CONTEXT |
- BRW_NEW_BATCH | /* Push constant workaround */
- BRW_NEW_GEOMETRY_PROGRAM |
- BRW_NEW_TESS_PROGRAMS,
- },
- .emit = gfx7_allocate_push_constants,
-};
-
-static void
-upload_urb(struct brw_context *brw)
-{
- /* BRW_NEW_VS_PROG_DATA */
- const struct brw_vue_prog_data *vs_vue_prog_data =
- brw_vue_prog_data(brw->vs.base.prog_data);
- const unsigned vs_size = MAX2(vs_vue_prog_data->urb_entry_size, 1);
- /* BRW_NEW_GS_PROG_DATA */
- const bool gs_present = brw->gs.base.prog_data;
- /* BRW_NEW_TES_PROG_DATA */
- const bool tess_present = brw->tes.base.prog_data;
-
- gfx7_upload_urb(brw, vs_size, gs_present, tess_present);
-}
-
-void
-gfx7_upload_urb(struct brw_context *brw, unsigned vs_size,
- bool gs_present, bool tess_present)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- /* BRW_NEW_{VS,TCS,TES,GS}_PROG_DATA */
- struct brw_vue_prog_data *prog_data[4] = {
- [MESA_SHADER_VERTEX] =
- brw_vue_prog_data(brw->vs.base.prog_data),
- [MESA_SHADER_TESS_CTRL] =
- tess_present ? brw_vue_prog_data(brw->tcs.base.prog_data) : NULL,
- [MESA_SHADER_TESS_EVAL] =
- tess_present ? brw_vue_prog_data(brw->tes.base.prog_data) : NULL,
- [MESA_SHADER_GEOMETRY] =
- gs_present ? brw_vue_prog_data(brw->gs.base.prog_data) : NULL,
- };
-
- unsigned entry_size[4];
- entry_size[MESA_SHADER_VERTEX] = vs_size;
- for (int i = MESA_SHADER_TESS_CTRL; i <= MESA_SHADER_GEOMETRY; i++) {
- entry_size[i] = prog_data[i] ? prog_data[i]->urb_entry_size : 1;
- }
-
- /* If we're just switching between programs with the same URB requirements,
- * skip the rest of the logic.
- */
- if (brw->urb.vsize == entry_size[MESA_SHADER_VERTEX] &&
- brw->urb.gs_present == gs_present &&
- brw->urb.gsize == entry_size[MESA_SHADER_GEOMETRY] &&
- brw->urb.tess_present == tess_present &&
- brw->urb.hsize == entry_size[MESA_SHADER_TESS_CTRL] &&
- brw->urb.dsize == entry_size[MESA_SHADER_TESS_EVAL]) {
- return;
- }
- brw->urb.vsize = entry_size[MESA_SHADER_VERTEX];
- brw->urb.gs_present = gs_present;
- brw->urb.gsize = entry_size[MESA_SHADER_GEOMETRY];
- brw->urb.tess_present = tess_present;
- brw->urb.hsize = entry_size[MESA_SHADER_TESS_CTRL];
- brw->urb.dsize = entry_size[MESA_SHADER_TESS_EVAL];
-
- unsigned entries[4];
- unsigned start[4];
- bool constrained;
- intel_get_urb_config(devinfo, brw->l3.config,
- tess_present, gs_present, entry_size,
- entries, start, NULL, &constrained);
-
- if (devinfo->platform == INTEL_PLATFORM_IVB)
- gfx7_emit_vs_workaround_flush(brw);
-
- BEGIN_BATCH(8);
- for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
- assert(devinfo->ver != 10 || entry_size[i] % 3);
- OUT_BATCH((_3DSTATE_URB_VS + i) << 16 | (2 - 2));
- OUT_BATCH(entries[i] |
- ((entry_size[i] - 1) << GFX7_URB_ENTRY_SIZE_SHIFT) |
- (start[i] << GFX7_URB_STARTING_ADDRESS_SHIFT));
- }
- ADVANCE_BATCH();
-}
-
-const struct brw_tracked_state gfx7_urb = {
- .dirty = {
- .mesa = 0,
- .brw = BRW_NEW_BLORP |
- BRW_NEW_CONTEXT |
- BRW_NEW_URB_SIZE |
- BRW_NEW_GS_PROG_DATA |
- BRW_NEW_TCS_PROG_DATA |
- BRW_NEW_TES_PROG_DATA |
- BRW_NEW_VS_PROG_DATA,
- },
- .emit = upload_urb,
-};
+++ /dev/null
-/*
- * Copyright © 2011 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "brw_batch.h"
-#include "brw_mipmap_tree.h"
-#include "brw_fbo.h"
-#include "brw_context.h"
-#include "brw_state.h"
-#include "brw_defines.h"
-#include "compiler/brw_eu_defines.h"
-#include "brw_wm.h"
-#include "main/framebuffer.h"
-
-/**
- * Should we set the PMA FIX ENABLE bit?
- *
- * To avoid unnecessary depth related stalls, we need to set this bit.
- * However, there is a very complicated formula which governs when it
- * is legal to do so. This function computes that.
- *
- * See the documenation for the CACHE_MODE_1 register, bit 11.
- */
-static bool
-pma_fix_enable(const struct brw_context *brw)
-{
- const struct gl_context *ctx = &brw->ctx;
- /* BRW_NEW_FS_PROG_DATA */
- const struct brw_wm_prog_data *wm_prog_data =
- brw_wm_prog_data(brw->wm.base.prog_data);
- /* _NEW_BUFFERS */
- struct brw_renderbuffer *depth_irb =
- brw_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH);
-
- /* 3DSTATE_WM::ForceThreadDispatch is never used. */
- const bool wm_force_thread_dispatch = false;
-
- /* 3DSTATE_RASTER::ForceSampleCount is never used. */
- const bool raster_force_sample_count_nonzero = false;
-
- /* _NEW_BUFFERS:
- * 3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL &&
- * 3DSTATE_DEPTH_BUFFER::HIZ Enable
- */
- const bool hiz_enabled = depth_irb && brw_renderbuffer_has_hiz(depth_irb);
-
- /* 3DSTATE_WM::Early Depth/Stencil Control != EDSC_PREPS (2). */
- const bool edsc_not_preps = !wm_prog_data->early_fragment_tests;
-
- /* 3DSTATE_PS_EXTRA::PixelShaderValid is always true. */
- const bool pixel_shader_valid = true;
-
- /* !(3DSTATE_WM_HZ_OP::DepthBufferClear ||
- * 3DSTATE_WM_HZ_OP::DepthBufferResolve ||
- * 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||
- * 3DSTATE_WM_HZ_OP::StencilBufferClear)
- *
- * HiZ operations are done outside of the normal state upload, so they're
- * definitely not happening now.
- */
- const bool in_hiz_op = false;
-
- /* _NEW_DEPTH:
- * DEPTH_STENCIL_STATE::DepthTestEnable
- */
- const bool depth_test_enabled = depth_irb && ctx->Depth.Test;
-
- /* _NEW_DEPTH:
- * 3DSTATE_WM_DEPTH_STENCIL::DepthWriteEnable &&
- * 3DSTATE_DEPTH_BUFFER::DEPTH_WRITE_ENABLE.
- */
- const bool depth_writes_enabled = brw_depth_writes_enabled(brw);
-
- /* _NEW_STENCIL:
- * !DEPTH_STENCIL_STATE::Stencil Buffer Write Enable ||
- * !3DSTATE_DEPTH_BUFFER::Stencil Buffer Enable ||
- * !3DSTATE_STENCIL_BUFFER::Stencil Buffer Enable
- */
- const bool stencil_writes_enabled = brw->stencil_write_enabled;
-
- /* 3DSTATE_PS_EXTRA::Pixel Shader Computed Depth Mode != PSCDEPTH_OFF */
- const bool ps_computes_depth =
- wm_prog_data->computed_depth_mode != BRW_PSCDEPTH_OFF;
-
- /* BRW_NEW_FS_PROG_DATA: 3DSTATE_PS_EXTRA::PixelShaderKillsPixels
- * BRW_NEW_FS_PROG_DATA: 3DSTATE_PS_EXTRA::oMask Present to RenderTarget
- * _NEW_MULTISAMPLE: 3DSTATE_PS_BLEND::AlphaToCoverageEnable
- * _NEW_COLOR: 3DSTATE_PS_BLEND::AlphaTestEnable
- * _NEW_BUFFERS: 3DSTATE_PS_BLEND::AlphaTestEnable
- * 3DSTATE_PS_BLEND::AlphaToCoverageEnable
- *
- * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable is always false.
- * 3DSTATE_WM::ForceKillPix != ForceOff is always true.
- */
- const bool kill_pixel =
- wm_prog_data->uses_kill ||
- wm_prog_data->uses_omask ||
- _mesa_is_alpha_test_enabled(ctx) ||
- _mesa_is_alpha_to_coverage_enabled(ctx);
-
- /* The big formula in CACHE_MODE_1::NP PMA FIX ENABLE. */
- return !wm_force_thread_dispatch &&
- !raster_force_sample_count_nonzero &&
- hiz_enabled &&
- edsc_not_preps &&
- pixel_shader_valid &&
- !in_hiz_op &&
- depth_test_enabled &&
- (ps_computes_depth ||
- (kill_pixel && (depth_writes_enabled || stencil_writes_enabled)));
-}
-
-void
-gfx8_write_pma_stall_bits(struct brw_context *brw, uint32_t pma_stall_bits)
-{
- /* If we haven't actually changed the value, bail now to avoid unnecessary
- * pipeline stalls and register writes.
- */
- if (brw->pma_stall_bits == pma_stall_bits)
- return;
-
- brw->pma_stall_bits = pma_stall_bits;
-
- /* According to the PIPE_CONTROL documentation, software should emit a
- * PIPE_CONTROL with the CS Stall and Depth Cache Flush bits set prior
- * to the LRI. If stencil buffer writes are enabled, then a Render Cache
- * Flush is also necessary.
- */
- const uint32_t render_cache_flush =
- brw->stencil_write_enabled ? PIPE_CONTROL_RENDER_TARGET_FLUSH : 0;
- brw_emit_pipe_control_flush(brw,
- PIPE_CONTROL_CS_STALL |
- PIPE_CONTROL_DEPTH_CACHE_FLUSH |
- render_cache_flush);
-
- /* CACHE_MODE_1 is a non-privileged register. */
- brw_load_register_imm32(brw, GFX7_CACHE_MODE_1,
- GFX8_HIZ_PMA_MASK_BITS |
- pma_stall_bits );
-
- /* After the LRI, a PIPE_CONTROL with both the Depth Stall and Depth Cache
- * Flush bits is often necessary. We do it regardless because it's easier.
- * The render cache flush is also necessary if stencil writes are enabled.
- */
- brw_emit_pipe_control_flush(brw,
- PIPE_CONTROL_DEPTH_STALL |
- PIPE_CONTROL_DEPTH_CACHE_FLUSH |
- render_cache_flush);
-
-}
-
-static void
-gfx8_emit_pma_stall_workaround(struct brw_context *brw)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- uint32_t bits = 0;
-
- if (devinfo->ver >= 9)
- return;
-
- if (pma_fix_enable(brw))
- bits |= GFX8_HIZ_NP_PMA_FIX_ENABLE | GFX8_HIZ_NP_EARLY_Z_FAILS_DISABLE;
-
- gfx8_write_pma_stall_bits(brw, bits);
-}
-
-const struct brw_tracked_state gfx8_pma_fix = {
- .dirty = {
- .mesa = _NEW_BUFFERS |
- _NEW_COLOR |
- _NEW_DEPTH |
- _NEW_MULTISAMPLE |
- _NEW_STENCIL,
- .brw = BRW_NEW_BLORP |
- BRW_NEW_FS_PROG_DATA,
- },
- .emit = gfx8_emit_pma_stall_workaround
-};
+++ /dev/null
-/*
- * Copyright © 2012 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "brw_batch.h"
-
-#include "brw_context.h"
-#include "brw_defines.h"
-#include "brw_multisample_state.h"
-
-/**
- * 3DSTATE_SAMPLE_PATTERN
- */
-void
-gfx8_emit_3dstate_sample_pattern(struct brw_context *brw)
-{
- BEGIN_BATCH(9);
- OUT_BATCH(_3DSTATE_SAMPLE_PATTERN << 16 | (9 - 2));
-
- /* 16x MSAA */
- OUT_BATCH(brw_multisample_positions_16x[0]); /* positions 3, 2, 1, 0 */
- OUT_BATCH(brw_multisample_positions_16x[1]); /* positions 7, 6, 5, 4 */
- OUT_BATCH(brw_multisample_positions_16x[2]); /* positions 11, 10, 9, 8 */
- OUT_BATCH(brw_multisample_positions_16x[3]); /* positions 15, 14, 13, 12 */
-
- /* 8x MSAA */
- OUT_BATCH(brw_multisample_positions_8x[1]); /* sample positions 7654 */
- OUT_BATCH(brw_multisample_positions_8x[0]); /* sample positions 3210 */
-
- /* 4x MSAA */
- OUT_BATCH(brw_multisample_positions_4x);
-
- /* 1x and 2x MSAA */
- OUT_BATCH(brw_multisample_positions_1x_2x);
- ADVANCE_BATCH();
-}
+++ /dev/null
-/*
- * Copyright (c) 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- */
-
-/** @file hsw_queryobj.c
- *
- * Support for query buffer objects (GL_ARB_query_buffer_object) on Haswell+.
- */
-#include "brw_context.h"
-#include "brw_defines.h"
-#include "brw_batch.h"
-#include "brw_buffer_objects.h"
-
-/*
- * GPR0 = 80 * GPR0;
- */
-static void
-mult_gpr0_by_80(struct brw_context *brw)
-{
- static const uint32_t maths[] = {
- MI_MATH_ALU2(LOAD, SRCA, R0),
- MI_MATH_ALU2(LOAD, SRCB, R0),
- MI_MATH_ALU0(ADD),
- MI_MATH_ALU2(STORE, R1, ACCU),
- MI_MATH_ALU2(LOAD, SRCA, R1),
- MI_MATH_ALU2(LOAD, SRCB, R1),
- MI_MATH_ALU0(ADD),
- MI_MATH_ALU2(STORE, R1, ACCU),
- MI_MATH_ALU2(LOAD, SRCA, R1),
- MI_MATH_ALU2(LOAD, SRCB, R1),
- MI_MATH_ALU0(ADD),
- MI_MATH_ALU2(STORE, R1, ACCU),
- MI_MATH_ALU2(LOAD, SRCA, R1),
- MI_MATH_ALU2(LOAD, SRCB, R1),
- MI_MATH_ALU0(ADD),
- /* GPR1 = 16 * GPR0 */
- MI_MATH_ALU2(STORE, R1, ACCU),
- MI_MATH_ALU2(LOAD, SRCA, R1),
- MI_MATH_ALU2(LOAD, SRCB, R1),
- MI_MATH_ALU0(ADD),
- MI_MATH_ALU2(STORE, R2, ACCU),
- MI_MATH_ALU2(LOAD, SRCA, R2),
- MI_MATH_ALU2(LOAD, SRCB, R2),
- MI_MATH_ALU0(ADD),
- /* GPR2 = 64 * GPR0 */
- MI_MATH_ALU2(STORE, R2, ACCU),
- MI_MATH_ALU2(LOAD, SRCA, R1),
- MI_MATH_ALU2(LOAD, SRCB, R2),
- MI_MATH_ALU0(ADD),
- /* GPR0 = 80 * GPR0 */
- MI_MATH_ALU2(STORE, R0, ACCU),
- };
-
- BEGIN_BATCH(1 + ARRAY_SIZE(maths));
- OUT_BATCH(HSW_MI_MATH | (1 + ARRAY_SIZE(maths) - 2));
-
- for (int m = 0; m < ARRAY_SIZE(maths); m++)
- OUT_BATCH(maths[m]);
-
- ADVANCE_BATCH();
-}
-
-/*
- * GPR0 = GPR0 & ((1ull << n) - 1);
- */
-static void
-keep_gpr0_lower_n_bits(struct brw_context *brw, uint32_t n)
-{
- static const uint32_t maths[] = {
- MI_MATH_ALU2(LOAD, SRCA, R0),
- MI_MATH_ALU2(LOAD, SRCB, R1),
- MI_MATH_ALU0(AND),
- MI_MATH_ALU2(STORE, R0, ACCU),
- };
-
- assert(n < 64);
- brw_load_register_imm64(brw, HSW_CS_GPR(1), (1ull << n) - 1);
-
- BEGIN_BATCH(1 + ARRAY_SIZE(maths));
- OUT_BATCH(HSW_MI_MATH | (1 + ARRAY_SIZE(maths) - 2));
-
- for (int m = 0; m < ARRAY_SIZE(maths); m++)
- OUT_BATCH(maths[m]);
-
- ADVANCE_BATCH();
-}
-
-/*
- * GPR0 = GPR0 << 30;
- */
-static void
-shl_gpr0_by_30_bits(struct brw_context *brw)
-{
- /* First we mask 34 bits of GPR0 to prevent overflow */
- keep_gpr0_lower_n_bits(brw, 34);
-
- static const uint32_t shl_maths[] = {
- MI_MATH_ALU2(LOAD, SRCA, R0),
- MI_MATH_ALU2(LOAD, SRCB, R0),
- MI_MATH_ALU0(ADD),
- MI_MATH_ALU2(STORE, R0, ACCU),
- };
-
- const uint32_t outer_count = 5;
- const uint32_t inner_count = 6;
- STATIC_ASSERT(outer_count * inner_count == 30);
- const uint32_t cmd_len = 1 + inner_count * ARRAY_SIZE(shl_maths);
- const uint32_t batch_len = cmd_len * outer_count;
-
- BEGIN_BATCH(batch_len);
-
- /* We'll emit 5 commands, each shifting GPR0 left by 6 bits, for a total of
- * 30 left shifts.
- */
- for (int o = 0; o < outer_count; o++) {
- /* Submit one MI_MATH to shift left by 6 bits */
- OUT_BATCH(HSW_MI_MATH | (cmd_len - 2));
- for (int i = 0; i < inner_count; i++)
- for (int m = 0; m < ARRAY_SIZE(shl_maths); m++)
- OUT_BATCH(shl_maths[m]);
- }
-
- ADVANCE_BATCH();
-}
-
-/*
- * GPR0 = GPR0 >> 2;
- *
- * Note that the upper 30 bits of GPR0 are lost!
- */
-static void
-shr_gpr0_by_2_bits(struct brw_context *brw)
-{
- shl_gpr0_by_30_bits(brw);
- brw_load_register_reg(brw, HSW_CS_GPR(0), HSW_CS_GPR(0) + 4);
- brw_load_register_imm32(brw, HSW_CS_GPR(0) + 4, 0);
-}
-
-/*
- * GPR0 = (GPR0 == 0) ? 0 : 1;
- */
-static void
-gpr0_to_bool(struct brw_context *brw)
-{
- static const uint32_t maths[] = {
- MI_MATH_ALU2(LOAD, SRCA, R0),
- MI_MATH_ALU1(LOAD0, SRCB),
- MI_MATH_ALU0(ADD),
- MI_MATH_ALU2(STOREINV, R0, ZF),
- MI_MATH_ALU2(LOAD, SRCA, R0),
- MI_MATH_ALU2(LOAD, SRCB, R1),
- MI_MATH_ALU0(AND),
- MI_MATH_ALU2(STORE, R0, ACCU),
- };
-
- brw_load_register_imm64(brw, HSW_CS_GPR(1), 1ull);
-
- BEGIN_BATCH(1 + ARRAY_SIZE(maths));
- OUT_BATCH(HSW_MI_MATH | (1 + ARRAY_SIZE(maths) - 2));
-
- for (int m = 0; m < ARRAY_SIZE(maths); m++)
- OUT_BATCH(maths[m]);
-
- ADVANCE_BATCH();
-}
-
-static void
-load_overflow_data_to_cs_gprs(struct brw_context *brw,
- struct brw_query_object *query,
- int idx)
-{
- int offset = idx * sizeof(uint64_t) * 4;
-
- brw_load_register_mem64(brw, HSW_CS_GPR(1), query->bo, offset);
-
- offset += sizeof(uint64_t);
- brw_load_register_mem64(brw, HSW_CS_GPR(2), query->bo, offset);
-
- offset += sizeof(uint64_t);
- brw_load_register_mem64(brw, HSW_CS_GPR(3), query->bo, offset);
-
- offset += sizeof(uint64_t);
- brw_load_register_mem64(brw, HSW_CS_GPR(4), query->bo, offset);
-}
-
-/*
- * R3 = R4 - R3;
- * R1 = R2 - R1;
- * R1 = R3 - R1;
- * R0 = R0 | R1;
- */
-static void
-calc_overflow_for_stream(struct brw_context *brw)
-{
- static const uint32_t maths[] = {
- MI_MATH_ALU2(LOAD, SRCA, R4),
- MI_MATH_ALU2(LOAD, SRCB, R3),
- MI_MATH_ALU0(SUB),
- MI_MATH_ALU2(STORE, R3, ACCU),
- MI_MATH_ALU2(LOAD, SRCA, R2),
- MI_MATH_ALU2(LOAD, SRCB, R1),
- MI_MATH_ALU0(SUB),
- MI_MATH_ALU2(STORE, R1, ACCU),
- MI_MATH_ALU2(LOAD, SRCA, R3),
- MI_MATH_ALU2(LOAD, SRCB, R1),
- MI_MATH_ALU0(SUB),
- MI_MATH_ALU2(STORE, R1, ACCU),
- MI_MATH_ALU2(LOAD, SRCA, R1),
- MI_MATH_ALU2(LOAD, SRCB, R0),
- MI_MATH_ALU0(OR),
- MI_MATH_ALU2(STORE, R0, ACCU),
- };
-
- BEGIN_BATCH(1 + ARRAY_SIZE(maths));
- OUT_BATCH(HSW_MI_MATH | (1 + ARRAY_SIZE(maths) - 2));
-
- for (int m = 0; m < ARRAY_SIZE(maths); m++)
- OUT_BATCH(maths[m]);
-
- ADVANCE_BATCH();
-}
-
-static void
-calc_overflow_to_gpr0(struct brw_context *brw, struct brw_query_object *query,
- int count)
-{
- brw_load_register_imm64(brw, HSW_CS_GPR(0), 0ull);
-
- for (int i = 0; i < count; i++) {
- load_overflow_data_to_cs_gprs(brw, query, i);
- calc_overflow_for_stream(brw);
- }
-}
-
-/*
- * Take a query and calculate whether there was overflow during transform
- * feedback. Store the result in the gpr0 register.
- */
-void
-hsw_overflow_result_to_gpr0(struct brw_context *brw,
- struct brw_query_object *query,
- int count)
-{
- calc_overflow_to_gpr0(brw, query, count);
- gpr0_to_bool(brw);
-}
-
-static void
-hsw_result_to_gpr0(struct gl_context *ctx, struct brw_query_object *query,
- struct gl_buffer_object *buf, intptr_t offset,
- GLenum pname, GLenum ptype)
-{
- struct brw_context *brw = brw_context(ctx);
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- assert(query->bo);
- assert(pname != GL_QUERY_TARGET);
-
- if (pname == GL_QUERY_RESULT_AVAILABLE) {
- /* The query result availability is stored at offset 0 of the buffer. */
- brw_load_register_mem64(brw,
- HSW_CS_GPR(0),
- query->bo,
- 2 * sizeof(uint64_t));
- return;
- }
-
- if (pname == GL_QUERY_RESULT) {
- /* Since GL_QUERY_RESULT_NO_WAIT wasn't used, they want us to stall to
- * make sure the query is available.
- */
- brw_emit_pipe_control_flush(brw,
- PIPE_CONTROL_CS_STALL |
- PIPE_CONTROL_STALL_AT_SCOREBOARD);
- }
-
- if (query->Base.Target == GL_TIMESTAMP) {
- brw_load_register_mem64(brw,
- HSW_CS_GPR(0),
- query->bo,
- 0 * sizeof(uint64_t));
- } else if (query->Base.Target == GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB
- || query->Base.Target == GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB) {
- /* Don't do anything in advance here, since the math for this is a little
- * more complex.
- */
- } else {
- brw_load_register_mem64(brw,
- HSW_CS_GPR(1),
- query->bo,
- 0 * sizeof(uint64_t));
- brw_load_register_mem64(brw,
- HSW_CS_GPR(2),
- query->bo,
- 1 * sizeof(uint64_t));
-
- BEGIN_BATCH(5);
- OUT_BATCH(HSW_MI_MATH | (5 - 2));
-
- OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R2));
- OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R1));
- OUT_BATCH(MI_MATH_ALU0(SUB));
- OUT_BATCH(MI_MATH_ALU2(STORE, R0, ACCU));
-
- ADVANCE_BATCH();
- }
-
- switch (query->Base.Target) {
- case GL_FRAGMENT_SHADER_INVOCATIONS_ARB:
- /* Implement the "WaDividePSInvocationCountBy4:HSW,BDW" workaround:
- * "Invocation counter is 4 times actual. WA: SW to divide HW reported
- * PS Invocations value by 4."
- *
- * Prior to Haswell, invocation count was counted by the WM, and it
- * buggily counted invocations in units of subspans (2x2 unit). To get the
- * correct value, the CS multiplied this by 4. With HSW the logic moved,
- * and correctly emitted the number of pixel shader invocations, but,
- * whomever forgot to undo the multiply by 4.
- */
- if (devinfo->ver == 8 || devinfo->platform == INTEL_PLATFORM_HSW)
- shr_gpr0_by_2_bits(brw);
- break;
- case GL_TIME_ELAPSED:
- case GL_TIMESTAMP:
- mult_gpr0_by_80(brw);
- if (query->Base.Target == GL_TIMESTAMP) {
- keep_gpr0_lower_n_bits(brw, 36);
- }
- break;
- case GL_ANY_SAMPLES_PASSED:
- case GL_ANY_SAMPLES_PASSED_CONSERVATIVE:
- gpr0_to_bool(brw);
- break;
- case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB:
- hsw_overflow_result_to_gpr0(brw, query, 1);
- break;
- case GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB:
- hsw_overflow_result_to_gpr0(brw, query, MAX_VERTEX_STREAMS);
- break;
- }
-}
-
-/*
- * Store immediate data into the user buffer using the requested size.
- */
-static void
-store_query_result_imm(struct brw_context *brw, struct brw_bo *bo,
- uint32_t offset, GLenum ptype, uint64_t imm)
-{
- switch (ptype) {
- case GL_INT:
- case GL_UNSIGNED_INT:
- brw_store_data_imm32(brw, bo, offset, imm);
- break;
- case GL_INT64_ARB:
- case GL_UNSIGNED_INT64_ARB:
- brw_store_data_imm64(brw, bo, offset, imm);
- break;
- default:
- unreachable("Unexpected result type");
- }
-}
-
-static void
-set_predicate(struct brw_context *brw, struct brw_bo *query_bo)
-{
- brw_load_register_imm64(brw, MI_PREDICATE_SRC1, 0ull);
-
- /* Load query availability into SRC0 */
- brw_load_register_mem64(brw, MI_PREDICATE_SRC0, query_bo,
- 2 * sizeof(uint64_t));
-
- /* predicate = !(query_availability == 0); */
- BEGIN_BATCH(1);
- OUT_BATCH(GFX7_MI_PREDICATE |
- MI_PREDICATE_LOADOP_LOADINV |
- MI_PREDICATE_COMBINEOP_SET |
- MI_PREDICATE_COMPAREOP_SRCS_EQUAL);
- ADVANCE_BATCH();
-}
-
-/*
- * Store data from the register into the user buffer using the requested size.
- * The write also enables the predication to prevent writing the result if the
- * query has not finished yet.
- */
-static void
-store_query_result_reg(struct brw_context *brw, struct brw_bo *bo,
- uint32_t offset, GLenum ptype, uint32_t reg,
- const bool pipelined)
-{
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
- uint32_t cmd_size = devinfo->ver >= 8 ? 4 : 3;
- uint32_t dwords = (ptype == GL_INT || ptype == GL_UNSIGNED_INT) ? 1 : 2;
- assert(devinfo->ver >= 6);
-
- BEGIN_BATCH(dwords * cmd_size);
- for (int i = 0; i < dwords; i++) {
- OUT_BATCH(MI_STORE_REGISTER_MEM |
- (pipelined ? MI_STORE_REGISTER_MEM_PREDICATE : 0) |
- (cmd_size - 2));
- OUT_BATCH(reg + 4 * i);
- if (devinfo->ver >= 8) {
- OUT_RELOC64(bo, RELOC_WRITE, offset + 4 * i);
- } else {
- OUT_RELOC(bo, RELOC_WRITE | RELOC_NEEDS_GGTT, offset + 4 * i);
- }
- }
- ADVANCE_BATCH();
-}
-
-static void
-hsw_store_query_result(struct gl_context *ctx, struct gl_query_object *q,
- struct gl_buffer_object *buf, intptr_t offset,
- GLenum pname, GLenum ptype)
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_query_object *query = (struct brw_query_object *)q;
- struct brw_buffer_object *bo = brw_buffer_object(buf);
- const bool pipelined = brw_is_query_pipelined(query);
-
- if (pname == GL_QUERY_TARGET) {
- store_query_result_imm(brw, bo->buffer, offset, ptype,
- query->Base.Target);
- return;
- } else if (pname == GL_QUERY_RESULT_AVAILABLE && !pipelined) {
- store_query_result_imm(brw, bo->buffer, offset, ptype, 1ull);
- } else if (query->bo) {
- /* The query bo still around. Therefore, we:
- *
- * 1. Compute the current result in GPR0
- * 2. Set the command streamer predicate based on query availability
- * 3. (With predication) Write GPR0 to the requested buffer
- */
- hsw_result_to_gpr0(ctx, query, buf, offset, pname, ptype);
- if (pipelined)
- set_predicate(brw, query->bo);
- store_query_result_reg(brw, bo->buffer, offset, ptype, HSW_CS_GPR(0),
- pipelined);
- } else {
- /* The query bo is gone, so the query must have been processed into
- * client memory. In this case we can fill the buffer location with the
- * requested data using MI_STORE_DATA_IMM.
- */
- switch (pname) {
- case GL_QUERY_RESULT_AVAILABLE:
- store_query_result_imm(brw, bo->buffer, offset, ptype, 1ull);
- break;
- case GL_QUERY_RESULT_NO_WAIT:
- case GL_QUERY_RESULT:
- store_query_result_imm(brw, bo->buffer, offset, ptype,
- q->Result);
- break;
- default:
- unreachable("Unexpected result type");
- }
- }
-
-}
-
-/* Initialize hsw+-specific query object functions. */
-void hsw_init_queryobj_functions(struct dd_function_table *functions)
-{
- gfx6_init_queryobj_functions(functions);
- functions->StoreQueryResult = hsw_store_query_result;
-}
+++ /dev/null
-/*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-/**
- * An implementation of the transform feedback driver hooks for Haswell
- * and later hardware. This uses MI_MATH to compute the number of vertices
- * written (for use by DrawTransformFeedback()) without any CPU<->GPU
- * synchronization which could stall.
- */
-
-#include "brw_context.h"
-#include "brw_state.h"
-#include "brw_defines.h"
-#include "brw_batch.h"
-#include "brw_buffer_objects.h"
-#include "main/transformfeedback.h"
-
-/**
- * We store several values in obj->prim_count_bo:
- *
- * [4x 32-bit values]: Final Number of Vertices Written
- * [4x 32-bit values]: Tally of Primitives Written So Far
- * [4x 64-bit values]: Starting SO_NUM_PRIMS_WRITTEN Counter Snapshots
- *
- * The first set of values is used by DrawTransformFeedback(), which
- * copies one of them into the 3DPRIM_VERTEX_COUNT register and performs
- * an indirect draw. The other values are just temporary storage.
- */
-
-#define TALLY_OFFSET (BRW_MAX_XFB_STREAMS * sizeof(uint32_t))
-#define START_OFFSET (TALLY_OFFSET * 2)
-
-/**
- * Store the SO_NUM_PRIMS_WRITTEN counters for each stream (4 uint64_t values)
- * to prim_count_bo.
- */
-static void
-save_prim_start_values(struct brw_context *brw,
- struct brw_transform_feedback_object *obj)
-{
- /* Flush any drawing so that the counters have the right values. */
- brw_emit_mi_flush(brw);
-
- /* Emit MI_STORE_REGISTER_MEM commands to write the values. */
- for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) {
- brw_store_register_mem64(brw, obj->prim_count_bo,
- GFX7_SO_NUM_PRIMS_WRITTEN(i),
- START_OFFSET + i * sizeof(uint64_t));
- }
-}
-
-/**
- * Compute the number of primitives written during our most recent
- * transform feedback activity (the current SO_NUM_PRIMS_WRITTEN value
- * minus the stashed "start" value), and add it to our running tally.
- *
- * If \p finalize is true, also compute the number of vertices written
- * (by multiplying by the number of vertices per primitive), and store
- * that to the "final" location.
- *
- * Otherwise, just overwrite the old tally with the new one.
- */
-static void
-tally_prims_written(struct brw_context *brw,
- struct brw_transform_feedback_object *obj,
- bool finalize)
-{
- /* Flush any drawing so that the counters have the right values. */
- brw_emit_mi_flush(brw);
-
- for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) {
- /* GPR0 = Tally */
- brw_load_register_imm32(brw, HSW_CS_GPR(0) + 4, 0);
- brw_load_register_mem(brw, HSW_CS_GPR(0), obj->prim_count_bo,
- TALLY_OFFSET + i * sizeof(uint32_t));
- if (!obj->base.Paused) {
- /* GPR1 = Start Snapshot */
- brw_load_register_mem64(brw, HSW_CS_GPR(1), obj->prim_count_bo,
- START_OFFSET + i * sizeof(uint64_t));
- /* GPR2 = Ending Snapshot */
- brw_load_register_reg64(brw, HSW_CS_GPR(2),
- GFX7_SO_NUM_PRIMS_WRITTEN(i));
-
- BEGIN_BATCH(9);
- OUT_BATCH(HSW_MI_MATH | (9 - 2));
- /* GPR1 = GPR2 (End) - GPR1 (Start) */
- OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R2));
- OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R1));
- OUT_BATCH(MI_MATH_ALU0(SUB));
- OUT_BATCH(MI_MATH_ALU2(STORE, R1, ACCU));
- /* GPR0 = GPR0 (Tally) + GPR1 (Diff) */
- OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R0));
- OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R1));
- OUT_BATCH(MI_MATH_ALU0(ADD));
- OUT_BATCH(MI_MATH_ALU2(STORE, R0, ACCU));
- ADVANCE_BATCH();
- }
-
- if (!finalize) {
- /* Write back the new tally */
- brw_store_register_mem32(brw, obj->prim_count_bo, HSW_CS_GPR(0),
- TALLY_OFFSET + i * sizeof(uint32_t));
- } else {
- /* Convert the number of primitives to the number of vertices. */
- if (obj->primitive_mode == GL_LINES) {
- /* Double R0 (R0 = R0 + R0) */
- BEGIN_BATCH(5);
- OUT_BATCH(HSW_MI_MATH | (5 - 2));
- OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R0));
- OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R0));
- OUT_BATCH(MI_MATH_ALU0(ADD));
- OUT_BATCH(MI_MATH_ALU2(STORE, R0, ACCU));
- ADVANCE_BATCH();
- } else if (obj->primitive_mode == GL_TRIANGLES) {
- /* Triple R0 (R1 = R0 + R0, R0 = R0 + R1) */
- BEGIN_BATCH(9);
- OUT_BATCH(HSW_MI_MATH | (9 - 2));
- OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R0));
- OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R0));
- OUT_BATCH(MI_MATH_ALU0(ADD));
- OUT_BATCH(MI_MATH_ALU2(STORE, R1, ACCU));
- OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R0));
- OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R1));
- OUT_BATCH(MI_MATH_ALU0(ADD));
- OUT_BATCH(MI_MATH_ALU2(STORE, R0, ACCU));
- ADVANCE_BATCH();
- }
- /* Store it to the final result */
- brw_store_register_mem32(brw, obj->prim_count_bo, HSW_CS_GPR(0),
- i * sizeof(uint32_t));
- }
- }
-}
-
-/**
- * BeginTransformFeedback() driver hook.
- */
-void
-hsw_begin_transform_feedback(struct gl_context *ctx, GLenum mode,
- struct gl_transform_feedback_object *obj)
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_transform_feedback_object *brw_obj =
- (struct brw_transform_feedback_object *) obj;
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- brw_obj->primitive_mode = mode;
-
- /* Reset the SO buffer offsets to 0. */
- if (devinfo->ver >= 8) {
- brw_obj->zero_offsets = true;
- } else {
- BEGIN_BATCH(1 + 2 * BRW_MAX_XFB_STREAMS);
- OUT_BATCH(MI_LOAD_REGISTER_IMM | (1 + 2 * BRW_MAX_XFB_STREAMS - 2));
- for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) {
- OUT_BATCH(GFX7_SO_WRITE_OFFSET(i));
- OUT_BATCH(0);
- }
- ADVANCE_BATCH();
- }
-
- /* Zero out the initial tallies */
- brw_store_data_imm64(brw, brw_obj->prim_count_bo, TALLY_OFFSET, 0ull);
- brw_store_data_imm64(brw, brw_obj->prim_count_bo, TALLY_OFFSET + 8, 0ull);
-
- /* Store the new starting value of the SO_NUM_PRIMS_WRITTEN counters. */
- save_prim_start_values(brw, brw_obj);
-}
-
-/**
- * PauseTransformFeedback() driver hook.
- */
-void
-hsw_pause_transform_feedback(struct gl_context *ctx,
- struct gl_transform_feedback_object *obj)
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_transform_feedback_object *brw_obj =
- (struct brw_transform_feedback_object *) obj;
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- if (devinfo->platform == INTEL_PLATFORM_HSW) {
- /* Flush any drawing so that the counters have the right values. */
- brw_emit_mi_flush(brw);
-
- /* Save the SOL buffer offset register values. */
- for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) {
- BEGIN_BATCH(3);
- OUT_BATCH(MI_STORE_REGISTER_MEM | (3 - 2));
- OUT_BATCH(GFX7_SO_WRITE_OFFSET(i));
- OUT_RELOC(brw_obj->offset_bo, RELOC_WRITE, i * sizeof(uint32_t));
- ADVANCE_BATCH();
- }
- }
-
- /* Add any primitives written to our tally */
- tally_prims_written(brw, brw_obj, false);
-}
-
-/**
- * ResumeTransformFeedback() driver hook.
- */
-void
-hsw_resume_transform_feedback(struct gl_context *ctx,
- struct gl_transform_feedback_object *obj)
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_transform_feedback_object *brw_obj =
- (struct brw_transform_feedback_object *) obj;
- const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
- if (devinfo->platform == INTEL_PLATFORM_HSW) {
- /* Reload the SOL buffer offset registers. */
- for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) {
- BEGIN_BATCH(3);
- OUT_BATCH(GFX7_MI_LOAD_REGISTER_MEM | (3 - 2));
- OUT_BATCH(GFX7_SO_WRITE_OFFSET(i));
- OUT_RELOC(brw_obj->offset_bo, RELOC_WRITE, i * sizeof(uint32_t));
- ADVANCE_BATCH();
- }
- }
-
- /* Store the new starting value of the SO_NUM_PRIMS_WRITTEN counters. */
- save_prim_start_values(brw, brw_obj);
-}
-
-/**
- * EndTransformFeedback() driver hook.
- */
-void
-hsw_end_transform_feedback(struct gl_context *ctx,
- struct gl_transform_feedback_object *obj)
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_transform_feedback_object *brw_obj =
- (struct brw_transform_feedback_object *) obj;
-
- /* Add any primitives written to our tally, convert it from the number
- * of primitives written to the number of vertices written, and store
- * it in the "final" location in the buffer which DrawTransformFeedback()
- * will use as the vertex count.
- */
- tally_prims_written(brw, brw_obj, true);
-}
+++ /dev/null
-/*
- * Copyright © 2014 NVIDIA Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef LIBDRM_LIBDRM_H
-#define LIBDRM_LIBDRM_H
-
-#include "util/macros.h"
-
-
-#include <sys/mman.h>
-
-#if defined(ANDROID) && !defined(__LP64__)
-/* 32-bit needs mmap64 for 64-bit offsets */
-# define drm_mmap(addr, length, prot, flags, fd, offset) \
- mmap64(addr, length, prot, flags, fd, offset)
-
-# define drm_munmap(addr, length) \
- munmap(addr, length)
-
-#else
-
-/* assume large file support exists */
-# define drm_mmap(addr, length, prot, flags, fd, offset) \
- mmap(addr, length, prot, flags, fd, offset)
-
-
-static inline int drm_munmap(void *addr, size_t length)
-{
- /* Copied from configure code generated by AC_SYS_LARGEFILE */
-#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + \
- (((off_t) 1 << 31) << 31))
- STATIC_ASSERT(LARGE_OFF_T % 2147483629 == 721 &&
- LARGE_OFF_T % 2147483647 == 1);
-#undef LARGE_OFF_T
-
- return munmap(addr, length);
-}
-#endif
-
-#endif
+++ /dev/null
-# Copyright © 2017 Intel Corporation
-
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-
-files_i965 = files(
- 'brw_binding_tables.c',
- 'brw_blorp.c',
- 'brw_blorp.h',
- 'brw_bufmgr.c',
- 'brw_bufmgr.h',
- 'brw_clear.c',
- 'brw_clip.c',
- 'brw_compute.c',
- 'brw_conditional_render.c',
- 'brw_context.c',
- 'brw_context.h',
- 'brw_cs.c',
- 'brw_cs.h',
- 'brw_curbe.c',
- 'brw_defines.h',
- 'brw_disk_cache.c',
- 'brw_draw.c',
- 'brw_draw.h',
- 'brw_draw_upload.c',
- 'brw_ff_gs.c',
- 'brw_ff_gs.h',
- 'brw_formatquery.c',
- 'brw_generate_mipmap.c',
- 'brw_gs.c',
- 'brw_gs.h',
- 'brw_gs_surface_state.c',
- 'brw_link.cpp',
- 'brw_meta_util.c',
- 'brw_meta_util.h',
- 'brw_misc_state.c',
- 'brw_multisample_state.h',
- 'brw_nir_uniforms.cpp',
- 'brw_object_purgeable.c',
- 'brw_pipe_control.c',
- 'brw_performance_query.c',
- 'brw_program.c',
- 'brw_program.h',
- 'brw_program_binary.c',
- 'brw_program_cache.c',
- 'brw_primitive_restart.c',
- 'brw_queryobj.c',
- 'brw_reset.c',
- 'brw_sf.c',
- 'brw_state.h',
- 'brw_state_upload.c',
- 'brw_structs.h',
- 'brw_surface_formats.c',
- 'brw_sync.c',
- 'brw_tcs.c',
- 'brw_tcs_surface_state.c',
- 'brw_tes.c',
- 'brw_tes_surface_state.c',
- 'brw_urb.c',
- 'brw_util.c',
- 'brw_util.h',
- 'brw_vs.c',
- 'brw_vs.h',
- 'brw_vs_surface_state.c',
- 'brw_wm.c',
- 'brw_wm.h',
- 'brw_wm_surface_state.c',
- 'gfx4_blorp_exec.h',
- 'gfx6_clip_state.c',
- 'gfx6_constant_state.c',
- 'gfx6_multisample_state.c',
- 'gfx6_queryobj.c',
- 'gfx6_sampler_state.c',
- 'gfx6_sol.c',
- 'gfx6_urb.c',
- 'gfx7_l3_state.c',
- 'gfx7_sol_state.c',
- 'gfx7_urb.c',
- 'gfx8_depth_state.c',
- 'gfx8_multisample_state.c',
- 'hsw_queryobj.c',
- 'hsw_sol.c',
- 'brw_batch.c',
- 'brw_batch.h',
- 'brw_blit.c',
- 'brw_blit.h',
- 'brw_buffer_objects.c',
- 'brw_buffer_objects.h',
- 'brw_buffers.c',
- 'brw_buffers.h',
- 'brw_copy_image.c',
- 'brw_extensions.c',
- 'brw_fbo.c',
- 'brw_fbo.h',
- 'brw_image.h',
- 'brw_mipmap_tree.c',
- 'brw_mipmap_tree.h',
- 'brw_pixel_bitmap.c',
- 'brw_pixel.c',
- 'brw_pixel_copy.c',
- 'brw_pixel_draw.c',
- 'brw_pixel.h',
- 'brw_pixel_read.c',
- 'brw_screen.c',
- 'brw_screen.h',
- 'brw_state.c',
- 'brw_tex.c',
- 'brw_tex_copy.c',
- 'brw_tex.h',
- 'brw_tex_image.c',
- 'brw_tex_obj.h',
- 'brw_tex_validate.c',
- 'brw_upload.c',
- 'libdrm_macros.h',
-)
-
-i965_per_hw_ver_libs = []
-foreach v : ['40', '45', '50', '60', '70', '75', '80', '90', '110']
- i965_per_hw_ver_libs += static_library(
- 'i965_per_hw_ver@0@'.format(v),
- ['genX_blorp_exec.c', 'genX_boilerplate.h', 'genX_pipe_control.c',
- 'genX_state_upload.c', gen_xml_pack],
- include_directories : [inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_intel, inc_dri_common],
- c_args : [
- no_override_init_args, c_sse2_args,
- '-DGFX_VERx10=@0@'.format(v),
- ],
- gnu_symbol_visibility : 'hidden',
- dependencies : [dep_libdrm, idep_nir_headers, idep_mesautil],
- )
-endforeach
-
-
-libi965 = static_library(
- 'i965',
- [files_i965, ir_expression_operation_h],
- include_directories : [
- inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_intel, inc_dri_common,
- ],
- c_args : [no_override_init_args, c_sse2_args],
- cpp_args : [c_sse2_args],
- gnu_symbol_visibility : 'hidden',
- link_with : [
- i965_per_hw_ver_libs, libintel_dev, libisl, libintel_compiler, libblorp,
- libintel_perf
- ],
- dependencies : [
- dep_libdrm, dep_valgrind, idep_libintel_common, idep_nir_headers, idep_genxml,
- idep_xmlconfig,
- ],
-)
_dri_drivers = []
_dri_link = []
-if with_dri_i965
- subdir('i965')
- _dri_drivers += libi965
- _dri_link += 'i965_dri.so'
-endif
if _dri_drivers != []
libmesa_dri_drivers = shared_library(