# "*** Color comparison failed"
KHR-GLES3.packed_depth_stencil.verify_read_pixels.depth24_stencil8,Fail
-# "MESA: error: ir3_ra() failed!"
-KHR-GLES31.core.arrays_of_arrays.InteractionFunctionCalls2,Fail
-KHR-GLES31.core.arrays_of_arrays.InteractionArgumentAliasing5,Fail
-KHR-GLES31.core.arrays_of_arrays.InteractionArgumentAliasing6,Fail
-
# "The values of resultStd[i] & 0xFFFFFFFE and resultFma[i] & 0xFFFFFFFE and resultCPU[i] & 0xFFFFFFFE are not bitwise equal for i = 0..99 "
KHR-GLES31.core.gpu_shader5.fma_precision_float,Fail
KHR-GLES31.core.gpu_shader5.fma_precision_vec2,Fail
dEQP-VK.api.object_management.alloc_callback_fail.device,Fail
dEQP-VK.api.object_management.alloc_callback_fail.device_group,Fail
-# "MESA: error: ir3_ra() failed!"
-# https://gitlab.freedesktop.org/mesa/mesa/-/issues/33
-dEQP-VK.graphicsfuzz.spv-stable-maze-flatten-copy-composite,Fail
-dEQP-VK.graphicsfuzz.spv-stable-pillars-volatile-nontemporal-store,Fail
-
# https://gitlab.khronos.org/Tracker/vk-gl-cts/-/issues/3019
# should be fixed by https://gerrit.khronos.org/c/vk-gl-cts/+/7745
dEQP-VK.renderpass.dedicated_allocation.attachment_allocation.input_output.7,Fail
dEQP-VK.renderpass2.dedicated_allocation.attachment_allocation.input_output.7,Fail
dEQP-VK.renderpass2.suballocation.attachment_allocation.input_output.7,Fail
-# "MESA: error: ir3_ra() failed!
-# https://gitlab.freedesktop.org/mesa/mesa/-/issues/33
-dEQP-VK.spirv_assembly.instruction.compute.opcopymemory.array,Fail
-
# "deqp-vk: ../src/freedreno/vulkan/tu_cs.h:186: tu_cs_reserve: Assertion `tu_cs_get_space(cs) >= reserved_size' failed."
# https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8841
dEQP-VK.spirv_assembly.instruction.compute.opphi.wide,Crash
dEQP-VK.spirv_assembly.instruction.graphics.variable_pointers.graphics.writes_two_buffers_geom,Fail
dEQP-VK.spirv_assembly.instruction.graphics.variable_pointers.graphics.writes_two_buffers_vert,Fail
-# "MESA: error: ir3_ra() failed!"
-# https://gitlab.freedesktop.org/mesa/mesa/-/issues/33
-# Needs spilling, or maybe some scheduling (though throwing a bit of nir_move/sink
-# at it didn't help).
-dEQP-VK.spirv_assembly.instruction.spirv1p4.opcopylogical.nested_arrays_different_inner_stride,Fail
-dEQP-VK.spirv_assembly.instruction.spirv1p4.opcopylogical.nested_arrays_different_outer_stride,Fail
-dEQP-VK.spirv_assembly.instruction.spirv1p4.opcopylogical.nested_arrays_different_strides,Fail
-
dEQP-VK.texture.filtering.2d.formats.d24_unorm_s8_uint_stencil.nearest,Fail
dEQP-VK.texture.filtering.2d_array.formats.d24_unorm_s8_uint_stencil.d24_unorm_s8_uint_stencil_nearest,Fail
dEQP-VK.texture.filtering.cube.formats.d24_unorm_s8_uint_stencil.nearest,Fail
# Broken on all drivers: https://gitlab.freedesktop.org/mesa/mesa/-/issues/4582
dEQP-VK.wsi.display_control.register_device_event,Fail
-# "MESA: error: ir3_ra() failed!"
-# https://gitlab.freedesktop.org/mesa/mesa/-/issues/33
-dEQP-VK.ssbo.layout.2_level_array.scalar.row_major_mat4_comp_access,Fail
-dEQP-VK.ssbo.layout.2_level_array.scalar.row_major_mat4_comp_access_store_cols,Fail
-dEQP-VK.ssbo.layout.2_level_array.scalar.row_major_mat4,Fail
-dEQP-VK.ssbo.layout.2_level_array.scalar.row_major_mat4_store_cols,Fail
-dEQP-VK.ssbo.layout.2_level_array.std140.row_major_mat4_comp_access,Fail
-dEQP-VK.ssbo.layout.2_level_array.std140.row_major_mat4_comp_access_store_cols,Fail
-dEQP-VK.ssbo.layout.2_level_array.std140.row_major_mat4,Fail
-dEQP-VK.ssbo.layout.2_level_array.std140.row_major_mat4_store_cols,Fail
-dEQP-VK.ssbo.layout.2_level_array.std430.row_major_mat4_comp_access,Fail
-dEQP-VK.ssbo.layout.2_level_array.std430.row_major_mat4_comp_access_store_cols,Fail
-dEQP-VK.ssbo.layout.2_level_array.std430.row_major_mat4,Fail
-dEQP-VK.ssbo.layout.2_level_array.std430.row_major_mat4_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_array.scalar.row_major_mat2x4_comp_access,Fail
-dEQP-VK.ssbo.layout.3_level_array.scalar.row_major_mat2x4_comp_access_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_array.scalar.row_major_mat2x4,Fail
-dEQP-VK.ssbo.layout.3_level_array.scalar.row_major_mat2x4_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_array.scalar.row_major_mat3_comp_access,Fail
-dEQP-VK.ssbo.layout.3_level_array.scalar.row_major_mat3_comp_access_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_array.scalar.row_major_mat3,Fail
-dEQP-VK.ssbo.layout.3_level_array.scalar.row_major_mat3_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_array.scalar.row_major_mat3x4_comp_access,Fail
-dEQP-VK.ssbo.layout.3_level_array.scalar.row_major_mat3x4_comp_access_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_array.scalar.row_major_mat3x4,Fail
-dEQP-VK.ssbo.layout.3_level_array.scalar.row_major_mat3x4_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_array.scalar.row_major_mat4_comp_access,Fail
-dEQP-VK.ssbo.layout.3_level_array.scalar.row_major_mat4_comp_access_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_array.scalar.row_major_mat4,Fail
-dEQP-VK.ssbo.layout.3_level_array.scalar.row_major_mat4_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_array.scalar.row_major_mat4x2_comp_access,Fail
-dEQP-VK.ssbo.layout.3_level_array.scalar.row_major_mat4x2_comp_access_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_array.scalar.row_major_mat4x2,Fail
-dEQP-VK.ssbo.layout.3_level_array.scalar.row_major_mat4x2_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_array.scalar.row_major_mat4x3_comp_access,Fail
-dEQP-VK.ssbo.layout.3_level_array.scalar.row_major_mat4x3_comp_access_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_array.scalar.row_major_mat4x3,Fail
-dEQP-VK.ssbo.layout.3_level_array.scalar.row_major_mat4x3_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_array.std140.row_major_mat2x4_comp_access,Fail
-dEQP-VK.ssbo.layout.3_level_array.std140.row_major_mat2x4_comp_access_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_array.std140.row_major_mat2x4,Fail
-dEQP-VK.ssbo.layout.3_level_array.std140.row_major_mat2x4_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_array.std140.row_major_mat3_comp_access,Fail
-dEQP-VK.ssbo.layout.3_level_array.std140.row_major_mat3_comp_access_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_array.std140.row_major_mat3,Fail
-dEQP-VK.ssbo.layout.3_level_array.std140.row_major_mat3_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_array.std140.row_major_mat3x4_comp_access,Fail
-dEQP-VK.ssbo.layout.3_level_array.std140.row_major_mat3x4_comp_access_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_array.std140.row_major_mat3x4,Fail
-dEQP-VK.ssbo.layout.3_level_array.std140.row_major_mat3x4_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_array.std140.row_major_mat4_comp_access,Fail
-dEQP-VK.ssbo.layout.3_level_array.std140.row_major_mat4_comp_access_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_array.std140.row_major_mat4,Fail
-dEQP-VK.ssbo.layout.3_level_array.std140.row_major_mat4_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_array.std140.row_major_mat4x2_comp_access,Fail
-dEQP-VK.ssbo.layout.3_level_array.std140.row_major_mat4x2_comp_access_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_array.std140.row_major_mat4x2,Fail
-dEQP-VK.ssbo.layout.3_level_array.std140.row_major_mat4x2_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_array.std140.row_major_mat4x3_comp_access,Fail
-dEQP-VK.ssbo.layout.3_level_array.std140.row_major_mat4x3_comp_access_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_array.std140.row_major_mat4x3,Fail
-dEQP-VK.ssbo.layout.3_level_array.std140.row_major_mat4x3_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_array.std430.row_major_mat2x4_comp_access,Fail
-dEQP-VK.ssbo.layout.3_level_array.std430.row_major_mat2x4_comp_access_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_array.std430.row_major_mat2x4,Fail
-dEQP-VK.ssbo.layout.3_level_array.std430.row_major_mat2x4_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_array.std430.row_major_mat3_comp_access,Fail
-dEQP-VK.ssbo.layout.3_level_array.std430.row_major_mat3_comp_access_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_array.std430.row_major_mat3,Fail
-dEQP-VK.ssbo.layout.3_level_array.std430.row_major_mat3_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_array.std430.row_major_mat3x4_comp_access,Fail
-dEQP-VK.ssbo.layout.3_level_array.std430.row_major_mat3x4_comp_access_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_array.std430.row_major_mat3x4,Fail
-dEQP-VK.ssbo.layout.3_level_array.std430.row_major_mat3x4_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_array.std430.row_major_mat4_comp_access,Fail
-dEQP-VK.ssbo.layout.3_level_array.std430.row_major_mat4_comp_access_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_array.std430.row_major_mat4,Fail
-dEQP-VK.ssbo.layout.3_level_array.std430.row_major_mat4_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_array.std430.row_major_mat4x2_comp_access,Fail
-dEQP-VK.ssbo.layout.3_level_array.std430.row_major_mat4x2_comp_access_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_array.std430.row_major_mat4x2,Fail
-dEQP-VK.ssbo.layout.3_level_array.std430.row_major_mat4x2_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_array.std430.row_major_mat4x3_comp_access,Fail
-dEQP-VK.ssbo.layout.3_level_array.std430.row_major_mat4x3_comp_access_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_array.std430.row_major_mat4x3,Fail
-dEQP-VK.ssbo.layout.3_level_array.std430.row_major_mat4x3_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.scalar.row_major_mat2x4_comp_access,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.scalar.row_major_mat2x4_comp_access_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.scalar.row_major_mat2x4,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.scalar.row_major_mat2x4_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.scalar.row_major_mat3_comp_access,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.scalar.row_major_mat3_comp_access_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.scalar.row_major_mat3,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.scalar.row_major_mat3_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.scalar.row_major_mat3x4_comp_access,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.scalar.row_major_mat3x4_comp_access_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.scalar.row_major_mat3x4,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.scalar.row_major_mat3x4_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.scalar.row_major_mat4_comp_access,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.scalar.row_major_mat4_comp_access_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.scalar.row_major_mat4,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.scalar.row_major_mat4_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.scalar.row_major_mat4x2_comp_access,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.scalar.row_major_mat4x2_comp_access_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.scalar.row_major_mat4x2,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.scalar.row_major_mat4x2_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.scalar.row_major_mat4x3_comp_access,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.scalar.row_major_mat4x3_comp_access_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.scalar.row_major_mat4x3,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.scalar.row_major_mat4x3_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.std140.row_major_mat2x4_comp_access,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.std140.row_major_mat2x4_comp_access_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.std140.row_major_mat2x4,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.std140.row_major_mat2x4_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.std140.row_major_mat3_comp_access,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.std140.row_major_mat3_comp_access_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.std140.row_major_mat3,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.std140.row_major_mat3_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.std140.row_major_mat3x4_comp_access,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.std140.row_major_mat3x4_comp_access_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.std140.row_major_mat3x4,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.std140.row_major_mat3x4_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.std140.row_major_mat4_comp_access,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.std140.row_major_mat4_comp_access_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.std140.row_major_mat4,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.std140.row_major_mat4_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.std140.row_major_mat4x2_comp_access,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.std140.row_major_mat4x2_comp_access_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.std140.row_major_mat4x2,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.std140.row_major_mat4x2_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.std140.row_major_mat4x3_comp_access,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.std140.row_major_mat4x3_comp_access_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.std140.row_major_mat4x3,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.std140.row_major_mat4x3_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.std430.row_major_mat2x4_comp_access,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.std430.row_major_mat2x4_comp_access_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.std430.row_major_mat2x4,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.std430.row_major_mat2x4_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.std430.row_major_mat3_comp_access,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.std430.row_major_mat3_comp_access_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.std430.row_major_mat3,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.std430.row_major_mat3_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.std430.row_major_mat3x4_comp_access,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.std430.row_major_mat3x4_comp_access_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.std430.row_major_mat3x4,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.std430.row_major_mat3x4_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.std430.row_major_mat4_comp_access,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.std430.row_major_mat4_comp_access_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.std430.row_major_mat4,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.std430.row_major_mat4_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.std430.row_major_mat4x2_comp_access,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.std430.row_major_mat4x2_comp_access_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.std430.row_major_mat4x2,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.std430.row_major_mat4x2_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.std430.row_major_mat4x3_comp_access,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.std430.row_major_mat4x3_comp_access_store_cols,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.std430.row_major_mat4x3,Fail
-dEQP-VK.ssbo.layout.3_level_unsized_array.std430.row_major_mat4x3_store_cols,Fail
-dEQP-VK.ssbo.layout.basic_unsized_array.scalar.row_major_mat3x4_comp_access,Fail
-dEQP-VK.ssbo.layout.basic_unsized_array.scalar.row_major_mat3x4_comp_access_store_cols,Fail
-dEQP-VK.ssbo.layout.basic_unsized_array.scalar.row_major_mat3x4,Fail
-dEQP-VK.ssbo.layout.basic_unsized_array.scalar.row_major_mat3x4_store_cols,Fail
-dEQP-VK.ssbo.layout.basic_unsized_array.scalar.row_major_mat4_comp_access,Fail
-dEQP-VK.ssbo.layout.basic_unsized_array.scalar.row_major_mat4_comp_access_store_cols,Fail
-dEQP-VK.ssbo.layout.basic_unsized_array.scalar.row_major_mat4,Fail
-dEQP-VK.ssbo.layout.basic_unsized_array.scalar.row_major_mat4_store_cols,Fail
-dEQP-VK.ssbo.layout.basic_unsized_array.scalar.row_major_mat4x3_comp_access,Fail
-dEQP-VK.ssbo.layout.basic_unsized_array.scalar.row_major_mat4x3_comp_access_store_cols,Fail
-dEQP-VK.ssbo.layout.basic_unsized_array.scalar.row_major_mat4x3,Fail
-dEQP-VK.ssbo.layout.basic_unsized_array.scalar.row_major_mat4x3_store_cols,Fail
-dEQP-VK.ssbo.layout.basic_unsized_array.std140.row_major_mat3x4_comp_access,Fail
-dEQP-VK.ssbo.layout.basic_unsized_array.std140.row_major_mat3x4_comp_access_store_cols,Fail
-dEQP-VK.ssbo.layout.basic_unsized_array.std140.row_major_mat3x4,Fail
-dEQP-VK.ssbo.layout.basic_unsized_array.std140.row_major_mat3x4_store_cols,Fail
-dEQP-VK.ssbo.layout.basic_unsized_array.std140.row_major_mat4_comp_access,Fail
-dEQP-VK.ssbo.layout.basic_unsized_array.std140.row_major_mat4_comp_access_store_cols,Fail
-dEQP-VK.ssbo.layout.basic_unsized_array.std140.row_major_mat4,Fail
-dEQP-VK.ssbo.layout.basic_unsized_array.std140.row_major_mat4_store_cols,Fail
-dEQP-VK.ssbo.layout.basic_unsized_array.std140.row_major_mat4x3_comp_access,Fail
-dEQP-VK.ssbo.layout.basic_unsized_array.std140.row_major_mat4x3_comp_access_store_cols,Fail
-dEQP-VK.ssbo.layout.basic_unsized_array.std140.row_major_mat4x3,Fail
-dEQP-VK.ssbo.layout.basic_unsized_array.std140.row_major_mat4x3_store_cols,Fail
-dEQP-VK.ssbo.layout.basic_unsized_array.std430.row_major_mat3x4_comp_access,Fail
-dEQP-VK.ssbo.layout.basic_unsized_array.std430.row_major_mat3x4_comp_access_store_cols,Fail
-dEQP-VK.ssbo.layout.basic_unsized_array.std430.row_major_mat3x4,Fail
-dEQP-VK.ssbo.layout.basic_unsized_array.std430.row_major_mat3x4_store_cols,Fail
-dEQP-VK.ssbo.layout.basic_unsized_array.std430.row_major_mat4_comp_access,Fail
-dEQP-VK.ssbo.layout.basic_unsized_array.std430.row_major_mat4_comp_access_store_cols,Fail
-dEQP-VK.ssbo.layout.basic_unsized_array.std430.row_major_mat4,Fail
-dEQP-VK.ssbo.layout.basic_unsized_array.std430.row_major_mat4_store_cols,Fail
-dEQP-VK.ssbo.layout.basic_unsized_array.std430.row_major_mat4x3_comp_access,Fail
-dEQP-VK.ssbo.layout.basic_unsized_array.std430.row_major_mat4x3_comp_access_store_cols,Fail
-dEQP-VK.ssbo.layout.basic_unsized_array.std430.row_major_mat4x3,Fail
-dEQP-VK.ssbo.layout.basic_unsized_array.std430.row_major_mat4x3_store_cols,Fail
-dEQP-VK.ssbo.layout.random.all_shared_buffer.5,Fail
-dEQP-VK.ssbo.layout.random.nested_structs_arrays.0,Fail
-dEQP-VK.ssbo.layout.random.nested_structs_arrays.17,Fail
-dEQP-VK.ssbo.layout.random.scalar.19,Fail
-
bypass-dEQP-VK.renderpass.dedicated_allocation.attachment_allocation.input_output.7,Fail
bypass-dEQP-VK.renderpass.suballocation.attachment_allocation.input_output.7,Fail
bypass-dEQP-VK.renderpass2.dedicated_allocation.attachment_allocation.input_output.7,Fail
#include "ir3_shader.h"
/*
- * This pass does one thing so far:
+ * This pass does two things:
*
* 1. Calculates the maximum register pressure. To do this, we need to use the
- * exact same technique that RA uses for combining meta_split instructions
- * with their sources, so that our calculation agrees with RA.
- *
- * It will also optionally spill registers once that's implemented.
+ * exact same technique that RA uses for combining meta_split instructions
+ * with their sources, so that our calculation agrees with RA.
+ * 2. Spills when the register pressure is exceeded a limit calculated by RA.
+ * The implementation is based on "Register Spilling and Live-Range Splitting
+ * for SSA-Form Programs" by Braun and Hack, although again care has to be
+ * taken to handle combining split/collect instructions.
*/
+struct reg_or_immed {
+ unsigned flags;
+ union {
+ struct ir3_register *def;
+ uint32_t uimm;
+ unsigned const_num;
+ };
+};
+
struct ra_spill_interval {
struct ir3_reg_interval interval;
+
+ struct rb_node node;
+ struct rb_node half_node;
+
+ /* The current SSA value/const/immed this source is mapped to. */
+ struct reg_or_immed dst;
+
+ /* When computing use distances we use the distance relative to the start
+ * of the block. So, for example, a value that's defined in cycle 5 of the
+ * block and used 6 cycles later will always have a next_use_distance of 11
+ * until we reach that use.
+ */
+ unsigned next_use_distance;
+
+ /* Whether this value was reloaded and therefore doesn't need to be
+ * spilled again. Corresponds to the S set in the paper.
+ */
+ bool already_spilled;
+
+ /* We need to add sources early for accounting purposes, but we have to
+ * insert the reload code for them last. Keep track of whether this interval
+ * needs to be reloaded later.
+ */
+ bool needs_reload;
+
+ /* Keep track of whether this interval currently can't be spilled because:
+ * - It or one of its children is a source and we're making space for
+ * sources.
+ * - It is a destination and we're making space for destinations.
+ */
+ bool cant_spill;
+};
+
+struct ra_spill_block_state {
+ unsigned *next_use_end;
+ unsigned *next_use_start;
+
+ unsigned cycles;
+
+ /* Map from SSA def to reg_or_immed it is mapped to at the end of the block.
+ * This map only contains values which we didn't spill, so it also serves as
+ * a record of the new live-out set for this block.
+ */
+ struct hash_table *remap;
+
+ /* For blocks whose successors are visited first (i.e. loop backedges), which
+ * values should be live at the end.
+ */
+ BITSET_WORD *live_out;
+
+ bool visited;
};
struct ra_spill_ctx {
struct ir3_reg_ctx reg_ctx;
- struct ra_spill_interval *intervals;
+ struct ra_spill_interval **intervals;
+ unsigned intervals_count;
+
+ /* rb tree of live intervals that we can spill, ordered by next-use distance.
+ * full_live_intervals contains the full+shared intervals in the merged_regs
+ * case. We use this list to determine what to spill.
+ */
+ struct rb_tree full_live_intervals;
+ struct rb_tree half_live_intervals;
struct ir3_pressure cur_pressure, max_pressure;
+ struct ir3_pressure limit_pressure;
+
+ /* When spilling, we need to reserve a register to serve as the zero'd
+ * "base". For simplicity we reserve a register at the beginning so that it's
+ * always available.
+ */
+ struct ir3_register *base_reg;
+
+ /* Current pvtmem offset in bytes. */
+ unsigned spill_slot;
+
struct ir3_liveness *live;
const struct ir3_compiler *compiler;
+
+ struct ra_spill_block_state *blocks;
+
+ bool spilling;
+
+ bool merged_regs;
};
static void
-ra_spill_interval_init(struct ra_spill_interval *interval,
- struct ir3_register *reg)
+add_base_reg(struct ra_spill_ctx *ctx, struct ir3 *ir)
{
- ir3_reg_interval_init(&interval->interval, reg);
+ struct ir3_block *start = ir3_start_block(ir);
+
+ /* We need to stick it after any meta instructions which need to be first. */
+ struct ir3_instruction *after = NULL;
+ foreach_instr (instr, &start->instr_list) {
+ if (instr->opc != OPC_META_INPUT &&
+ instr->opc != OPC_META_TEX_PREFETCH) {
+ after = instr;
+ break;
+ }
+ }
+
+ struct ir3_instruction *mov = create_immed(start, 0);
+
+ if (after)
+ ir3_instr_move_before(mov, after);
+
+ ctx->base_reg = mov->dsts[0];
+
+ /* We don't create an interval, etc. for the base reg, so just lower the
+ * register pressure limit to account for it. We assume it's always
+ * available for simplicity.
+ */
+ ctx->limit_pressure.full -= reg_size(ctx->base_reg);
+}
+
+
+/* Compute the number of cycles per instruction used for next-use-distance
+ * analysis. This is just approximate, obviously.
+ */
+static unsigned
+instr_cycles(struct ir3_instruction *instr)
+{
+ if (instr->opc == OPC_META_PARALLEL_COPY) {
+ unsigned cycles = 0;
+ for (unsigned i = 0; i < instr->dsts_count; i++) {
+ if (!instr->srcs[i]->def ||
+ instr->srcs[i]->def->merge_set != instr->dsts[i]->merge_set) {
+ cycles += reg_elems(instr->srcs[i]);
+ }
+ }
+
+ return cycles;
+ }
+
+ if (instr->opc == OPC_META_COLLECT) {
+ unsigned cycles = 0;
+ for (unsigned i = 0; i < instr->srcs_count; i++) {
+ if (!instr->srcs[i]->def ||
+ instr->srcs[i]->def->merge_set != instr->dsts[0]->merge_set) {
+ cycles++;
+ }
+ }
+
+ return cycles;
+ }
+
+ if (is_meta(instr))
+ return 0;
+
+ return 1 + instr->repeat;
+}
+
+static bool
+compute_block_next_distance(struct ra_spill_ctx *ctx, struct ir3_block *block,
+ unsigned *tmp_next_use)
+{
+ struct ra_spill_block_state *state = &ctx->blocks[block->index];
+ memcpy(tmp_next_use, state->next_use_end,
+ ctx->live->definitions_count * sizeof(*tmp_next_use));
+
+ unsigned cycle = state->cycles;
+ foreach_instr_rev (instr, &block->instr_list) {
+ ra_foreach_dst (dst, instr) {
+ dst->next_use = tmp_next_use[dst->name];
+ }
+
+ ra_foreach_src (src, instr) {
+ src->next_use = tmp_next_use[src->def->name];
+ }
+
+ cycle -= instr_cycles(instr);
+
+ if (instr->opc == OPC_META_PARALLEL_COPY) {
+ ra_foreach_src_n (src, i, instr) {
+ if (src->def->merge_set == instr->dsts[i]->merge_set &&
+ src->def->merge_set_offset == instr->dsts[i]->merge_set_offset) {
+ tmp_next_use[src->def->name] =
+ tmp_next_use[instr->dsts[i]->name];
+ } else {
+ tmp_next_use[src->def->name] = cycle;
+ }
+ }
+ } else if (instr->opc != OPC_META_PHI) {
+ ra_foreach_src (src, instr) {
+ tmp_next_use[src->def->name] = cycle;
+ }
+ }
+
+ ra_foreach_dst (dst, instr) {
+ tmp_next_use[dst->name] = UINT_MAX;
+ }
+ }
+
+ memcpy(state->next_use_start, tmp_next_use,
+ ctx->live->definitions_count * sizeof(*tmp_next_use));
+
+ bool progress = false;
+ for (unsigned i = 0; i < block->predecessors_count; i++) {
+ const struct ir3_block *pred = block->predecessors[i];
+ struct ra_spill_block_state *pred_state = &ctx->blocks[pred->index];
+
+ /* Add a large-enough distance in front of edges exiting the loop so that
+ * variables that are live-through the loop but not used inside it are
+ * prioritized for spilling, as per the paper. This just needs to be
+ * larger than the longest path through the loop.
+ */
+ bool loop_exit = pred->loop_depth < block->loop_depth;
+ unsigned block_distance = pred_state->cycles + (loop_exit ? 100000 : 0);
+
+ for (unsigned j = 0; j < ctx->live->definitions_count; j++) {
+ if (state->next_use_start[j] < UINT_MAX &&
+ state->next_use_start[j] + block_distance <
+ pred_state->next_use_end[j]) {
+ pred_state->next_use_end[j] = state->next_use_start[j] +
+ block_distance;
+ progress = true;
+ }
+ }
+
+ foreach_instr (phi, &block->instr_list) {
+ if (phi->opc != OPC_META_PHI)
+ break;
+ if (!phi->srcs[i]->def)
+ continue;
+ unsigned src = phi->srcs[i]->def->name;
+ if (phi->dsts[0]->next_use < UINT_MAX &&
+ phi->dsts[0]->next_use + block_distance <
+ pred_state->next_use_end[src]) {
+ pred_state->next_use_end[src] = phi->dsts[0]->next_use +
+ block_distance;
+ progress = true;
+ }
+ }
+ }
+
+ return progress;
}
static void
-ra_pressure_add(struct ir3_pressure *pressure,
- struct ra_spill_interval *interval)
+compute_next_distance(struct ra_spill_ctx *ctx, struct ir3 *ir)
{
- unsigned size = reg_size(interval->interval.reg);
- if (interval->interval.reg->flags & IR3_REG_SHARED)
- pressure->shared += size;
- else if (interval->interval.reg->flags & IR3_REG_HALF)
- pressure->half += size;
- else
- pressure->full += size;
+ for (unsigned i = 0; i < ctx->live->block_count; i++) {
+ ctx->blocks[i].next_use_start =
+ ralloc_array(ctx, unsigned, ctx->live->definitions_count);
+ ctx->blocks[i].next_use_end =
+ ralloc_array(ctx, unsigned, ctx->live->definitions_count);
+
+ for (unsigned j = 0; j < ctx->live->definitions_count; j++) {
+ ctx->blocks[i].next_use_start[j] = UINT_MAX;
+ ctx->blocks[i].next_use_end[j] = UINT_MAX;
+ }
+ }
+
+ foreach_block (block, &ir->block_list) {
+ struct ra_spill_block_state *state = &ctx->blocks[block->index];
+ state->cycles = 0;
+ foreach_instr (instr, &block->instr_list) {
+ state->cycles += instr_cycles(instr);
+ foreach_dst (dst, instr) {
+ dst->spill_slot = ~0;
+ }
+ }
+ }
+
+ unsigned *tmp_next_use =
+ ralloc_array(ctx, unsigned, ctx->live->definitions_count);
+
+ bool progress = true;
+ while (progress) {
+ progress = false;
+ foreach_block_rev (block, &ir->block_list) {
+ progress |= compute_block_next_distance(ctx, block, tmp_next_use);
+ }
+ }
}
static void
-ra_pressure_sub(struct ir3_pressure *pressure,
- struct ra_spill_interval *interval)
+ra_spill_interval_init(struct ra_spill_interval *interval,
+ struct ir3_register *reg)
{
- unsigned size = reg_size(interval->interval.reg);
- if (interval->interval.reg->flags & IR3_REG_SHARED)
- pressure->shared -= size;
- else if (interval->interval.reg->flags & IR3_REG_HALF)
- pressure->half -= size;
- else
- pressure->full -= size;
+ ir3_reg_interval_init(&interval->interval, reg);
+ interval->dst.flags = reg->flags;
+ interval->dst.def = reg;
+ interval->already_spilled = false;
+ interval->needs_reload = false;
+ interval->cant_spill = false;
}
static struct ra_spill_interval *
return rb_node_data(struct ra_spill_interval, interval, interval);
}
+static struct ra_spill_interval *
+ra_spill_interval_root(struct ra_spill_interval *interval)
+{
+ struct ir3_reg_interval *ir3_interval = &interval->interval;
+ while (ir3_interval->parent)
+ ir3_interval = ir3_interval->parent;
+ return ir3_reg_interval_to_interval(ir3_interval);
+}
+
static struct ra_spill_ctx *
ir3_reg_ctx_to_ctx(struct ir3_reg_ctx *ctx)
{
return rb_node_data(struct ra_spill_ctx, ctx, reg_ctx);
}
+static int
+ra_spill_interval_cmp(const struct rb_node *_a, const struct rb_node *_b)
+{
+ const struct ra_spill_interval *a =
+ rb_node_data(const struct ra_spill_interval, _a, node);
+ const struct ra_spill_interval *b =
+ rb_node_data(const struct ra_spill_interval, _b, node);
+ return a->next_use_distance - b->next_use_distance;
+}
+
+static int
+ra_spill_interval_half_cmp(const struct rb_node *_a, const struct rb_node *_b)
+{
+ const struct ra_spill_interval *a =
+ rb_node_data(const struct ra_spill_interval, _a, half_node);
+ const struct ra_spill_interval *b =
+ rb_node_data(const struct ra_spill_interval, _b, half_node);
+ return a->next_use_distance - b->next_use_distance;
+}
+
static void
interval_add(struct ir3_reg_ctx *_ctx, struct ir3_reg_interval *_interval)
{
struct ra_spill_interval *interval = ir3_reg_interval_to_interval(_interval);
struct ra_spill_ctx *ctx = ir3_reg_ctx_to_ctx(_ctx);
- ra_pressure_add(&ctx->cur_pressure, interval);
+ unsigned size = reg_size(interval->interval.reg);
+ if (interval->interval.reg->flags & IR3_REG_SHARED) {
+ ctx->cur_pressure.shared += size;
+ } else {
+ if (interval->interval.reg->flags & IR3_REG_HALF) {
+ ctx->cur_pressure.half += size;
+ if (ctx->spilling) {
+ rb_tree_insert(&ctx->half_live_intervals, &interval->half_node,
+ ra_spill_interval_half_cmp);
+ }
+ }
+ if (ctx->merged_regs || !(interval->interval.reg->flags & IR3_REG_HALF)) {
+ ctx->cur_pressure.full += size;
+ if (ctx->spilling) {
+ rb_tree_insert(&ctx->full_live_intervals, &interval->node,
+ ra_spill_interval_cmp);
+ }
+ }
+ }
}
static void
struct ra_spill_interval *interval = ir3_reg_interval_to_interval(_interval);
struct ra_spill_ctx *ctx = ir3_reg_ctx_to_ctx(_ctx);
- ra_pressure_sub(&ctx->cur_pressure, interval);
+ unsigned size = reg_size(interval->interval.reg);
+ if (interval->interval.reg->flags & IR3_REG_SHARED) {
+ ctx->cur_pressure.shared -= size;
+ } else {
+ if (interval->interval.reg->flags & IR3_REG_HALF) {
+ ctx->cur_pressure.half -= size;
+ if (ctx->spilling) {
+ rb_tree_remove(&ctx->half_live_intervals, &interval->half_node);
+ }
+ }
+ if (ctx->merged_regs || !(interval->interval.reg->flags & IR3_REG_HALF)) {
+ ctx->cur_pressure.full -= size;
+ if (ctx->spilling) {
+ rb_tree_remove(&ctx->full_live_intervals, &interval->node);
+ }
+ }
+ }
}
static void
}
static void
-spill_ctx_init(struct ra_spill_ctx *ctx)
+spill_ctx_init(struct ra_spill_ctx *ctx, struct ir3_shader_variant *v,
+ struct ir3_liveness *live)
{
+ ctx->live = live;
+ ctx->intervals = ralloc_array(ctx, struct ra_spill_interval *,
+ ctx->live->definitions_count);
+ struct ra_spill_interval *intervals =
+ rzalloc_array(ctx, struct ra_spill_interval,
+ ctx->live->definitions_count);
+ for (unsigned i = 0; i < ctx->live->definitions_count; i++)
+ ctx->intervals[i] = &intervals[i];
+
+ ctx->intervals_count = ctx->live->definitions_count;
+ ctx->compiler = v->shader->compiler;
+ ctx->merged_regs = v->mergedregs;
+
rb_tree_init(&ctx->reg_ctx.intervals);
ctx->reg_ctx.interval_add = interval_add;
ctx->reg_ctx.interval_delete = interval_delete;
static void
init_dst(struct ra_spill_ctx *ctx, struct ir3_register *dst)
{
- struct ra_spill_interval *interval = &ctx->intervals[dst->name];
+ struct ra_spill_interval *interval = ctx->intervals[dst->name];
ra_spill_interval_init(interval, dst);
+ if (ctx->spilling)
+ interval->next_use_distance = dst->next_use;
}
static void
insert_dst(struct ra_spill_ctx *ctx, struct ir3_register *dst)
{
- struct ra_spill_interval *interval = &ctx->intervals[dst->name];
+ struct ra_spill_interval *interval = ctx->intervals[dst->name];
if (interval->interval.inserted)
return;
ra_spill_ctx_insert(ctx, interval);
+ interval->cant_spill = true;
/* For precolored inputs, make sure we leave enough registers to allow for
* holes in the inputs. It can happen that the binning shader has a lower
}
static void
-remove_src_early(struct ra_spill_ctx *ctx, struct ir3_instruction *instr,
- struct ir3_register *src)
+insert_src(struct ra_spill_ctx *ctx, struct ir3_register *src)
{
- if (!(src->flags & IR3_REG_FIRST_KILL))
+ struct ra_spill_interval *interval = ctx->intervals[src->def->name];
+
+ ra_spill_interval_root(interval)->cant_spill = true;
+
+ if (interval->interval.inserted)
return;
- struct ra_spill_interval *interval = &ctx->intervals[src->def->name];
+ ra_spill_ctx_insert(ctx, interval);
+ interval->needs_reload = true;
+ interval->already_spilled = true;
+}
+
+static void
+remove_src_early(struct ra_spill_ctx *ctx, struct ir3_instruction *instr,
+ struct ir3_register *src)
+{
+ struct ra_spill_interval *interval = ctx->intervals[src->def->name];
if (!interval->interval.inserted || interval->interval.parent ||
!rb_tree_is_empty(&interval->interval.children))
remove_src(struct ra_spill_ctx *ctx, struct ir3_instruction *instr,
struct ir3_register *src)
{
- if (!(src->flags & IR3_REG_FIRST_KILL))
- return;
-
- struct ra_spill_interval *interval = &ctx->intervals[src->def->name];
+ struct ra_spill_interval *interval = ctx->intervals[src->def->name];
if (!interval->interval.inserted)
return;
ra_spill_ctx_remove(ctx, interval);
}
+static void
+finish_dst(struct ra_spill_ctx *ctx, struct ir3_register *dst)
+{
+ struct ra_spill_interval *interval = ctx->intervals[dst->name];
+ interval->cant_spill = false;
+}
+
static void
remove_dst(struct ra_spill_ctx *ctx, struct ir3_register *dst)
{
- struct ra_spill_interval *interval = &ctx->intervals[dst->name];
+ struct ra_spill_interval *interval = ctx->intervals[dst->name];
if (!interval->interval.inserted)
return;
ra_spill_ctx_remove(ctx, interval);
}
+static void
+update_src_next_use(struct ra_spill_ctx *ctx, struct ir3_register *src)
+{
+ struct ra_spill_interval *interval = ctx->intervals[src->def->name];
+
+ assert(interval->interval.inserted);
+
+ interval->next_use_distance = src->next_use;
+
+ /* If this node is inserted in one of the trees, then it needs to be resorted
+ * as its key has changed.
+ */
+ if (!interval->interval.parent && !(src->flags & IR3_REG_SHARED)) {
+ if (src->flags & IR3_REG_HALF) {
+ rb_tree_remove(&ctx->half_live_intervals, &interval->half_node);
+ rb_tree_insert(&ctx->half_live_intervals, &interval->half_node,
+ ra_spill_interval_half_cmp);
+ }
+ if (ctx->merged_regs || !(src->flags & IR3_REG_HALF)) {
+ rb_tree_remove(&ctx->full_live_intervals, &interval->node);
+ rb_tree_insert(&ctx->full_live_intervals, &interval->node,
+ ra_spill_interval_cmp);
+ }
+ }
+}
+
+static unsigned
+get_spill_slot(struct ra_spill_ctx *ctx, struct ir3_register *reg)
+{
+ if (reg->merge_set) {
+ if (reg->merge_set->spill_slot == ~0) {
+ reg->merge_set->spill_slot = ALIGN_POT(ctx->spill_slot,
+ reg->merge_set->alignment);
+ ctx->spill_slot = reg->merge_set->spill_slot + reg->merge_set->size * 2;
+ }
+ return reg->merge_set->spill_slot + reg->merge_set_offset * 2;
+ } else {
+ if (reg->spill_slot == ~0) {
+ reg->spill_slot = ALIGN_POT(ctx->spill_slot, reg_elem_size(reg));
+ ctx->spill_slot = reg->spill_slot + reg_size(reg) * 2;
+ }
+ return reg->spill_slot;
+ }
+}
+
+static void
+set_src_val(struct ir3_register *src, const struct reg_or_immed *val)
+{
+ if (val->flags & IR3_REG_IMMED) {
+ src->flags = IR3_REG_IMMED | (val->flags & IR3_REG_HALF);
+ src->uim_val = val->uimm;
+ src->def = NULL;
+ } else if (val->flags & IR3_REG_CONST) {
+ src->flags = IR3_REG_CONST | (val->flags & IR3_REG_HALF);
+ src->num = val->const_num;
+ src->def = NULL;
+ } else {
+ src->def = val->def;
+ }
+}
+
+static struct ir3_register *
+materialize_pcopy_src(const struct reg_or_immed *src,
+ struct ir3_instruction *instr,
+ struct ir3_block *block)
+{
+ struct ir3_instruction *mov = ir3_instr_create(block, OPC_MOV, 1, 1);
+ struct ir3_register *dst = __ssa_dst(mov);
+ dst->flags |= src->flags & IR3_REG_HALF;
+ struct ir3_register *mov_src = ir3_src_create(mov, INVALID_REG, src->flags);
+ set_src_val(mov_src, src);
+ mov->cat1.src_type = mov->cat1.dst_type =
+ (src->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
+
+ if (instr)
+ ir3_instr_move_before(mov, instr);
+ return dst;
+}
+
+static void
+spill(struct ra_spill_ctx *ctx, const struct reg_or_immed *val,
+ unsigned spill_slot, struct ir3_instruction *instr, struct ir3_block *block)
+{
+ struct ir3_register *reg;
+
+ /* If spilling an immed/const pcopy src, we need to actually materialize it
+ * first with a mov.
+ */
+ if (val->flags & (IR3_REG_CONST | IR3_REG_IMMED)) {
+ reg = materialize_pcopy_src(val, instr, block);
+ } else {
+ reg = val->def;
+ }
+
+ d("spilling ssa_%u:%u to %u", reg->instr->serialno, reg->name,
+ spill_slot);
+
+ unsigned elems = reg_elems(reg);
+ struct ir3_instruction *spill =
+ ir3_instr_create(block, OPC_SPILL_MACRO, 0, 3);
+ ir3_src_create(spill, INVALID_REG, ctx->base_reg->flags)->def = ctx->base_reg;
+ unsigned src_flags = reg->flags & (IR3_REG_HALF | IR3_REG_IMMED |
+ IR3_REG_CONST | IR3_REG_SSA |
+ IR3_REG_ARRAY);
+ struct ir3_register *src = ir3_src_create(spill, INVALID_REG, src_flags);
+ ir3_src_create(spill, INVALID_REG, IR3_REG_IMMED)->uim_val = elems;
+ spill->cat6.dst_offset = spill_slot;
+ spill->cat6.type = (reg->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
+
+ src->def = reg;
+ if (reg->flags & IR3_REG_ARRAY) {
+ src->size = reg->size;
+ src->array.id = reg->array.id;
+ src->array.offset = 0;
+ } else {
+ src->wrmask = reg->wrmask;
+ }
+
+ if (instr)
+ ir3_instr_move_before(spill, instr);
+}
+
+static void
+spill_interval(struct ra_spill_ctx *ctx, struct ra_spill_interval *interval,
+ struct ir3_instruction *instr, struct ir3_block *block)
+{
+ spill(ctx, &interval->dst, get_spill_slot(ctx, interval->interval.reg),
+ instr, block);
+}
+
+/* This is similar to "limit" in the paper. */
+static void
+limit(struct ra_spill_ctx *ctx, struct ir3_instruction *instr)
+{
+ if (ctx->cur_pressure.half > ctx->limit_pressure.half) {
+ d("cur half pressure %u exceeds %u", ctx->cur_pressure.half,
+ ctx->limit_pressure.half);
+ rb_tree_foreach_safe (struct ra_spill_interval, interval,
+ &ctx->half_live_intervals, half_node) {
+ d("trying ssa_%u:%u", interval->interval.reg->instr->serialno,
+ interval->interval.reg->name);
+ if (!interval->cant_spill) {
+ if (!interval->already_spilled)
+ spill_interval(ctx, interval, instr, instr->block);
+ ir3_reg_interval_remove_all(&ctx->reg_ctx, &interval->interval);
+ if (ctx->cur_pressure.half <= ctx->limit_pressure.half)
+ break;
+ }
+ }
+
+ assert(ctx->cur_pressure.half <= ctx->limit_pressure.half);
+ }
+
+ if (ctx->cur_pressure.full > ctx->limit_pressure.full) {
+ d("cur full pressure %u exceeds %u", ctx->cur_pressure.full,
+ ctx->limit_pressure.full);
+ rb_tree_foreach_safe (struct ra_spill_interval, interval,
+ &ctx->full_live_intervals, node) {
+ d("trying ssa_%u:%u", interval->interval.reg->instr->serialno,
+ interval->interval.reg->name);
+ if (!interval->cant_spill) {
+ if (!interval->already_spilled)
+ spill_interval(ctx, interval, instr, instr->block);
+ ir3_reg_interval_remove_all(&ctx->reg_ctx, &interval->interval);
+ if (ctx->cur_pressure.full <= ctx->limit_pressure.full)
+ break;
+ } else {
+ d("can't spill");
+ }
+ }
+
+ assert(ctx->cur_pressure.full <= ctx->limit_pressure.full);
+ }
+}
+
+/* There's a corner case where we reload a value which has overlapping live
+ * values already reloaded, either because it's the child of some other interval
+ * that was already reloaded or some of its children have already been
+ * reloaded. Because RA only expects overlapping source/dest intervals for meta
+ * instructions (split/collect), and we don't want to add register pressure by
+ * creating an entirely separate value, we need to add splits and collects to
+ * deal with this case. These splits/collects have to also have correct merge
+ * set information, so that it doesn't result in any actual code or register
+ * pressure in practice.
+ */
+
+static void
+add_to_merge_set(struct ir3_merge_set *set, struct ir3_register *def,
+ unsigned offset)
+{
+ def->merge_set = set;
+ def->merge_set_offset = offset;
+ def->interval_start = set->interval_start + offset;
+ def->interval_end = set->interval_start + offset + reg_size(def);
+}
+
+static struct ir3_register *
+split(struct ir3_register *def, unsigned offset,
+ struct ir3_instruction *after, struct ir3_block *block)
+{
+ if (reg_elems(def) == 1) {
+ assert(offset == 0);
+ return def;
+ }
+
+ assert(!(def->flags & IR3_REG_ARRAY));
+ assert(def->merge_set);
+ struct ir3_instruction *split =
+ ir3_instr_create(after->block, OPC_META_SPLIT, 1, 1);
+ struct ir3_register *dst = __ssa_dst(split);
+ dst->flags |= def->flags & IR3_REG_HALF;
+ struct ir3_register *src = ir3_src_create(split, INVALID_REG, def->flags);
+ src->wrmask = def->wrmask;
+ src->def = def;
+ add_to_merge_set(def->merge_set, dst,
+ def->merge_set_offset + offset * reg_elem_size(def));
+ if (after)
+ ir3_instr_move_before(split, after);
+ return dst;
+}
+
+static struct ir3_register *
+extract(struct ir3_register *parent_def, unsigned offset, unsigned elems,
+ struct ir3_instruction *after, struct ir3_block *block)
+{
+ if (offset == 0 && elems == reg_elems(parent_def))
+ return parent_def;
+
+ struct ir3_instruction *collect =
+ ir3_instr_create(after->block, OPC_META_COLLECT, 1, elems);
+ struct ir3_register *dst = __ssa_dst(collect);
+ dst->flags |= parent_def->flags & IR3_REG_HALF;
+ dst->wrmask = MASK(elems);
+ add_to_merge_set(parent_def->merge_set, dst, parent_def->merge_set_offset);
+
+ for (unsigned i = 0; i < elems; i++) {
+ ir3_src_create(collect, INVALID_REG, parent_def->flags)->def =
+ split(parent_def, offset + i, after, block);
+ }
+
+ if (after)
+ ir3_instr_move_before(collect, after);
+ return dst;
+}
+
+static struct ir3_register *
+reload(struct ra_spill_ctx *ctx, struct ir3_register *reg,
+ struct ir3_instruction *after, struct ir3_block *block)
+{
+ unsigned spill_slot = get_spill_slot(ctx, reg);
+
+ d("reloading ssa_%u:%u from %u", reg->instr->serialno, reg->name,
+ spill_slot);
+
+ unsigned elems = reg_elems(reg);
+ struct ir3_instruction *reload =
+ ir3_instr_create(block, OPC_RELOAD_MACRO, 1, 3);
+ struct ir3_register *dst = __ssa_dst(reload);
+ dst->flags |= reg->flags & (IR3_REG_HALF | IR3_REG_ARRAY);
+ ir3_src_create(reload, INVALID_REG, ctx->base_reg->flags)->def = ctx->base_reg;
+ struct ir3_register *offset_reg =
+ ir3_src_create(reload, INVALID_REG, IR3_REG_IMMED);
+ offset_reg->uim_val = spill_slot;
+ ir3_src_create(reload, INVALID_REG, IR3_REG_IMMED)->uim_val = elems;
+ reload->cat6.type = (reg->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
+
+ if (reg->flags & IR3_REG_ARRAY) {
+ dst->array.offset = 0;
+ dst->array.id = reg->array.id;
+ dst->size = reg->size;
+ } else {
+ dst->wrmask = MASK(elems);
+ }
+
+ dst->merge_set = reg->merge_set;
+ dst->merge_set_offset = reg->merge_set_offset;
+ dst->interval_start = reg->interval_start;
+ dst->interval_end = reg->interval_end;
+
+ if (after)
+ ir3_instr_move_before(reload, after);
+
+ return dst;
+}
+
+static void
+rewrite_src_interval(struct ra_spill_ctx *ctx,
+ struct ra_spill_interval *interval,
+ struct ir3_register *def,
+ struct ir3_instruction *instr,
+ struct ir3_block *block)
+{
+ interval->dst.flags = def->flags;
+ interval->dst.def = def;
+ interval->needs_reload = false;
+
+ rb_tree_foreach (struct ra_spill_interval, child,
+ &interval->interval.children, interval.node) {
+ struct ir3_register *child_reg = child->interval.reg;
+ struct ir3_register *child_def =
+ extract(def, (child_reg->interval_start -
+ interval->interval.reg->interval_start) / reg_elem_size(def),
+ reg_elems(child_reg), instr, block);
+ rewrite_src_interval(ctx, child, child_def, instr, block);
+ }
+}
+
+static void
+reload_def(struct ra_spill_ctx *ctx, struct ir3_register *def,
+ struct ir3_instruction *instr, struct ir3_block *block)
+{
+ unsigned elems = reg_elems(def);
+ struct ra_spill_interval *interval = ctx->intervals[def->name];
+
+ struct ir3_reg_interval *ir3_parent = interval->interval.parent;
+
+ if (ir3_parent) {
+ struct ra_spill_interval *parent =
+ ir3_reg_interval_to_interval(ir3_parent);
+ if (!parent->needs_reload) {
+ interval->dst.flags = def->flags;
+ interval->dst.def = extract(
+ parent->dst.def, (def->interval_start - parent->dst.def->interval_start) /
+ reg_elem_size(def), elems, instr, block);
+ return;
+ }
+ }
+
+ struct ir3_register *dst = reload(ctx, def, instr, block);
+
+ rewrite_src_interval(ctx, interval, dst, instr, block);
+}
+
+static void
+reload_src(struct ra_spill_ctx *ctx, struct ir3_instruction *instr,
+ struct ir3_register *src)
+{
+ struct ra_spill_interval *interval = ctx->intervals[src->def->name];
+
+ if (interval->needs_reload) {
+ reload_def(ctx, src->def, instr, instr->block);
+ }
+
+ ra_spill_interval_root(interval)->cant_spill = false;
+}
+
+static void
+rewrite_src(struct ra_spill_ctx *ctx, struct ir3_instruction *instr,
+ struct ir3_register *src)
+{
+ struct ra_spill_interval *interval = ctx->intervals[src->def->name];
+
+ set_src_val(src, &interval->dst);
+}
+
static void
update_max_pressure(struct ra_spill_ctx *ctx)
{
static void
handle_instr(struct ra_spill_ctx *ctx, struct ir3_instruction *instr)
{
- di(instr, "processing");
-
ra_foreach_dst (dst, instr) {
init_dst(ctx, dst);
}
+ if (ctx->spilling) {
+ ra_foreach_src (src, instr)
+ insert_src(ctx, src);
+ }
+
/* Handle tied destinations. If a destination is tied to a source and that
* source is live-through, then we need to allocate a new register for the
* destination which is live-through itself and cannot overlap the
insert_dst(ctx, dst);
}
- update_max_pressure(ctx);
+ if (ctx->spilling)
+ limit(ctx, instr);
+ else
+ update_max_pressure(ctx);
+
+ if (ctx->spilling) {
+ ra_foreach_src (src, instr) {
+ reload_src(ctx, instr, src);
+ update_src_next_use(ctx, src);
+ }
+ }
ra_foreach_src (src, instr) {
if (src->flags & IR3_REG_FIRST_KILL)
insert_dst(ctx, dst);
}
- update_max_pressure(ctx);
+ if (ctx->spilling)
+ limit(ctx, instr);
+ else
+ update_max_pressure(ctx);
+
+ /* We have to remove sources before rewriting them so that we can lookup the
+ * interval to remove before the source itself is changed.
+ */
+ ra_foreach_src (src, instr) {
+ if (src->flags & IR3_REG_FIRST_KILL)
+ remove_src(ctx, instr, src);
+ }
+
+ if (ctx->spilling) {
+ ra_foreach_src (src, instr) {
+ rewrite_src(ctx, instr, src);
+ }
+ }
- for (unsigned i = 0; i < instr->srcs_count; i++) {
- if (ra_reg_is_src(instr->srcs[i]) &&
- (instr->srcs[i]->flags & IR3_REG_FIRST_KILL))
- remove_src(ctx, instr, instr->srcs[i]);
+ ra_foreach_dst (dst, instr) {
+ finish_dst(ctx, dst);
}
+
for (unsigned i = 0; i < instr->dsts_count; i++) {
if (ra_reg_is_dst(instr->dsts[i]) &&
(instr->dsts[i]->flags & IR3_REG_UNUSED))
}
}
-static void
-handle_input_phi(struct ra_spill_ctx *ctx, struct ir3_instruction *instr)
+static struct ra_spill_interval *
+create_temp_interval(struct ra_spill_ctx *ctx, struct ir3_register *def)
{
- init_dst(ctx, instr->dsts[0]);
- insert_dst(ctx, instr->dsts[0]);
-}
+ unsigned name = ctx->intervals_count++;
+ unsigned offset = ctx->live->interval_offset;
-static void
-remove_input_phi(struct ra_spill_ctx *ctx, struct ir3_instruction *instr)
-{
- ra_foreach_src (src, instr)
- remove_src(ctx, instr, src);
- if (instr->dsts[0]->flags & IR3_REG_UNUSED)
- remove_dst(ctx, instr->dsts[0]);
+ /* This is kinda hacky, but we need to create a fake SSA def here that is
+ * only used as part of the pcopy accounting. See below.
+ */
+ struct ir3_register *reg = rzalloc(ctx, struct ir3_register);
+ *reg = *def;
+ reg->name = name;
+ reg->interval_start = offset;
+ reg->interval_end = offset + reg_size(def);
+ reg->merge_set = NULL;
+
+ ctx->intervals = reralloc(ctx, ctx->intervals, struct ra_spill_interval *,
+ ctx->intervals_count);
+ struct ra_spill_interval *interval = rzalloc(ctx, struct ra_spill_interval);
+ ra_spill_interval_init(interval, reg);
+ ctx->intervals[name] = interval;
+ ctx->live->interval_offset += reg_size(def);
+ return interval;
}
-static void
-handle_live_in(struct ra_spill_ctx *ctx, struct ir3_register *def)
+/* In the sequence of copies generated (see below), would this source be killed?
+ */
+static bool
+is_last_pcopy_src(struct ir3_instruction *pcopy, unsigned src_n)
{
- struct ra_spill_interval *interval = &ctx->intervals[def->name];
- ra_spill_interval_init(interval, def);
- insert_dst(ctx, def);
+ struct ir3_register *src = pcopy->srcs[src_n];
+ if (!(src->flags & IR3_REG_KILL))
+ return false;
+ for (unsigned j = src_n + 1; j < pcopy->srcs_count; j++) {
+ if (pcopy->srcs[j]->def == src->def)
+ return false;
+ }
+ return true;
}
-static void
-handle_block(struct ra_spill_ctx *ctx, struct ir3_block *block)
-{
+/* Parallel copies are different from normal instructions. The sources together
+ * may be larger than the entire register file, so we cannot just reload every
+ * source like normal, and indeed that probably wouldn't be a great idea.
+ * Instead we essentially need to lower the parallel copy to "copies," just like
+ * in the normal CSSA construction, although we implement the copies by
+ * reloading and then possibly spilling values. We essentially just shuffle
+ * around the sources until each source either (a) is live or (b) has the same
+ * spill slot as its corresponding destination. We do this by decomposing the
+ * copy into a series of copies, so:
+ *
+ * a, b, c = d, e, f
+ *
+ * becomes:
+ *
+ * d' = d
+ * e' = e
+ * f' = f
+ * a = d'
+ * b = e'
+ * c = f'
+ *
+ * the temporary SSA values d', e', and f' never actually show up in the result.
+ * They are only used for our internal accounting. They may, however, have their
+ * own spill slot created for them. Similarly, we don't actually emit any copy
+ * instructions, although we emit the spills/reloads that *would've* been
+ * required if those copies were there.
+ *
+ * TODO: in order to reduce the number of temporaries and therefore spill slots,
+ * we could instead do a more complicated analysis that considers the location
+ * transfer graph.
+ *
+ * In addition, we actually remove the parallel copy and rewrite all its uses
+ * (in the phi nodes) rather than rewrite its sources at the end. Recreating it
+ * later turns out to be easier than keeping it up-to-date throughout this pass,
+ * since we may have to remove entries for phi sources that are spilled and add
+ * entries for live-outs that are spilled and reloaded, which can happen here
+ * and then possibly be undone or done again when processing live-ins of the
+ * successor block.
+ */
+
+static void
+handle_pcopy(struct ra_spill_ctx *ctx, struct ir3_instruction *pcopy)
+{
+ foreach_dst (dst, pcopy) {
+ struct ra_spill_interval *dst_interval = ctx->intervals[dst->name];
+ ra_spill_interval_init(dst_interval, dst);
+ }
+
+ foreach_src_n (src, i, pcopy) {
+ d("processing src %u", i);
+ struct ir3_register *dst = pcopy->dsts[i];
+
+ /* Skip the intermediate copy for cases where the source is merged with
+ * the destination. Crucially this means that we also don't reload/spill
+ * it if it's been spilled, because it shares the same spill slot.
+ */
+ if (src->def && src->def->merge_set &&
+ src->def->merge_set == dst->merge_set &&
+ src->def->merge_set_offset == dst->merge_set_offset) {
+ struct ra_spill_interval *src_interval = ctx->intervals[src->def->name];
+ struct ra_spill_interval *dst_interval = ctx->intervals[dst->name];
+ if (src_interval->interval.inserted) {
+ update_src_next_use(ctx, src);
+ if (is_last_pcopy_src(pcopy, i))
+ ra_spill_ctx_remove(ctx, src_interval);
+ dst_interval->cant_spill = true;
+ ra_spill_ctx_insert(ctx, dst_interval);
+ limit(ctx, pcopy);
+ dst_interval->cant_spill = false;
+ dst_interval->dst = src_interval->dst;
+ }
+ } else if (src->def) {
+ struct ra_spill_interval *temp_interval =
+ create_temp_interval(ctx, dst);
+ struct ir3_register *temp = temp_interval->interval.reg;
+ temp_interval->next_use_distance = src->next_use;
+
+ insert_src(ctx, src);
+ limit(ctx, pcopy);
+ reload_src(ctx, pcopy, src);
+ update_src_next_use(ctx, src);
+ if (is_last_pcopy_src(pcopy, i))
+ remove_src(ctx, pcopy, src);
+ struct ra_spill_interval *src_interval =
+ ctx->intervals[src->def->name];
+ temp_interval->dst = src_interval->dst;
+
+ temp_interval->cant_spill = true;
+ ra_spill_ctx_insert(ctx, temp_interval);
+ limit(ctx, pcopy);
+ temp_interval->cant_spill = false;
+
+ src->flags = temp->flags;
+ src->def = temp;
+ }
+ }
+
+ d("done with pcopy srcs");
+
+ foreach_src_n (src, i, pcopy) {
+ struct ir3_register *dst = pcopy->dsts[i];
+
+ if (src->def && src->def->merge_set &&
+ src->def->merge_set == dst->merge_set &&
+ src->def->merge_set_offset == dst->merge_set_offset)
+ continue;
+
+ struct ra_spill_interval *dst_interval = ctx->intervals[dst->name];
+
+ if (!src->def) {
+ dst_interval->cant_spill = true;
+ ra_spill_ctx_insert(ctx, dst_interval);
+ limit(ctx, pcopy);
+ dst_interval->cant_spill = false;
+
+ assert(src->flags & (IR3_REG_CONST | IR3_REG_IMMED));
+ if (src->flags & IR3_REG_CONST) {
+ dst_interval->dst.flags = src->flags;
+ dst_interval->dst.const_num = src->num;
+ } else {
+ dst_interval->dst.flags = src->flags;
+ dst_interval->dst.uimm = src->uim_val;
+ }
+ } else {
+ struct ra_spill_interval *temp_interval = ctx->intervals[src->def->name];
+
+ insert_src(ctx, src);
+ limit(ctx, pcopy);
+ reload_src(ctx, pcopy, src);
+ remove_src(ctx, pcopy, src);
+
+ dst_interval->dst = temp_interval->dst;
+ ra_spill_ctx_insert(ctx, dst_interval);
+ }
+ }
+
+ pcopy->flags |= IR3_INSTR_UNUSED;
+}
+
+static void
+handle_input_phi(struct ra_spill_ctx *ctx, struct ir3_instruction *instr)
+{
+ init_dst(ctx, instr->dsts[0]);
+ insert_dst(ctx, instr->dsts[0]);
+ finish_dst(ctx, instr->dsts[0]);
+}
+
+static void
+remove_input_phi(struct ra_spill_ctx *ctx, struct ir3_instruction *instr)
+{
+ if (instr->opc == OPC_META_TEX_PREFETCH) {
+ ra_foreach_src (src, instr)
+ remove_src(ctx, instr, src);
+ }
+ if (instr->dsts[0]->flags & IR3_REG_UNUSED)
+ remove_dst(ctx, instr->dsts[0]);
+}
+
+static void
+handle_live_in(struct ra_spill_ctx *ctx, struct ir3_block *block,
+ struct ir3_register *def)
+{
+ struct ra_spill_interval *interval = ctx->intervals[def->name];
+ ra_spill_interval_init(interval, def);
+ if (ctx->spilling) {
+ interval->next_use_distance =
+ ctx->blocks[block->index].next_use_start[def->name];
+ }
+
+ ra_spill_ctx_insert(ctx, interval);
+}
+
+static bool
+is_live_in_phi(struct ir3_register *def, struct ir3_block *block)
+{
+ return def->instr->opc == OPC_META_PHI && def->instr->block == block;
+}
+
+static bool
+is_live_in_pred(struct ra_spill_ctx *ctx, struct ir3_register *def,
+ struct ir3_block *block, unsigned pred_idx)
+{
+ struct ir3_block *pred = block->predecessors[pred_idx];
+ struct ra_spill_block_state *state = &ctx->blocks[pred->index];
+ if (is_live_in_phi(def, block)) {
+ def = def->instr->srcs[pred_idx]->def;
+ if (!def)
+ return false;
+ }
+
+ return _mesa_hash_table_search(state->remap, def);
+}
+
+static bool
+is_live_in_undef(struct ir3_register *def,
+ struct ir3_block *block, unsigned pred_idx)
+{
+ if (!is_live_in_phi(def, block))
+ return false;
+
+ return !def->instr->srcs[pred_idx]->def;
+}
+
+static struct reg_or_immed *
+read_live_in(struct ra_spill_ctx *ctx, struct ir3_register *def,
+ struct ir3_block *block, unsigned pred_idx)
+{
+ struct ir3_block *pred = block->predecessors[pred_idx];
+ struct ra_spill_block_state *state = &ctx->blocks[pred->index];
+
+ if (is_live_in_phi(def, block)) {
+ def = def->instr->srcs[pred_idx]->def;
+ if (!def)
+ return NULL;
+ }
+
+ struct hash_entry *entry = _mesa_hash_table_search(state->remap, def);
+ if (entry)
+ return entry->data;
+ else
+ return NULL;
+}
+
+static bool
+is_live_in_all_preds(struct ra_spill_ctx *ctx, struct ir3_register *def,
+ struct ir3_block *block)
+{
+ for (unsigned i = 0; i < block->predecessors_count; i++) {
+ if (!is_live_in_pred(ctx, def, block, i))
+ return false;
+ }
+
+ return true;
+}
+
+static void
+spill_live_in(struct ra_spill_ctx *ctx, struct ir3_register *def,
+ struct ir3_block *block)
+{
+ for (unsigned i = 0; i < block->predecessors_count; i++) {
+ struct ir3_block *pred = block->predecessors[i];
+ struct ra_spill_block_state *state = &ctx->blocks[pred->index];
+
+ if (!state->visited)
+ continue;
+
+ struct reg_or_immed *pred_def = read_live_in(ctx, def, block, i);
+ if (pred_def) {
+ spill(ctx, pred_def, get_spill_slot(ctx, def), NULL, pred);
+ }
+ }
+}
+
+static void
+spill_live_ins(struct ra_spill_ctx *ctx, struct ir3_block *block)
+{
+ bool all_preds_visited = true;
+ for (unsigned i = 0; i < block->predecessors_count; i++) {
+ struct ir3_block *pred = block->predecessors[i];
+ struct ra_spill_block_state *state = &ctx->blocks[pred->index];
+ if (!state->visited) {
+ all_preds_visited = false;
+ break;
+ }
+ }
+
+ /* Note: in the paper they explicitly spill live-through values first, but we
+ * should be doing that automatically by virtue of picking the largest
+ * distance due to the extra distance added to edges out of loops.
+ *
+ * TODO: Keep track of pressure in each block and preemptively spill
+ * live-through values as described in the paper to avoid spilling them
+ * inside the loop.
+ */
+
+ if (ctx->cur_pressure.half > ctx->limit_pressure.half) {
+ rb_tree_foreach_safe (struct ra_spill_interval, interval,
+ &ctx->half_live_intervals, half_node) {
+ if (all_preds_visited &&
+ is_live_in_all_preds(ctx, interval->interval.reg, block))
+ continue;
+ spill_live_in(ctx, interval->interval.reg, block);
+ ir3_reg_interval_remove_all(&ctx->reg_ctx, &interval->interval);
+ if (ctx->cur_pressure.half <= ctx->limit_pressure.half)
+ break;
+ }
+ }
+
+ if (ctx->cur_pressure.full > ctx->limit_pressure.full) {
+ rb_tree_foreach_safe (struct ra_spill_interval, interval,
+ &ctx->full_live_intervals, node) {
+ if (all_preds_visited &&
+ is_live_in_all_preds(ctx, interval->interval.reg, block))
+ continue;
+ spill_live_in(ctx, interval->interval.reg, block);
+ ir3_reg_interval_remove_all(&ctx->reg_ctx, &interval->interval);
+ if (ctx->cur_pressure.full <= ctx->limit_pressure.full)
+ break;
+ }
+ }
+}
+
+static void
+live_in_rewrite(struct ra_spill_ctx *ctx,
+ struct ra_spill_interval *interval,
+ struct reg_or_immed *new_val,
+ struct ir3_block *block, unsigned pred_idx)
+{
+ struct ir3_block *pred = block->predecessors[pred_idx];
+ struct ra_spill_block_state *state = &ctx->blocks[pred->index];
+ struct ir3_register *def = interval->interval.reg;
+ if (is_live_in_phi(def, block)) {
+ def = def->instr->srcs[pred_idx]->def;
+ }
+
+ if (def)
+ _mesa_hash_table_insert(state->remap, def, new_val);
+
+ rb_tree_foreach (struct ra_spill_interval, child,
+ &interval->interval.children, interval.node) {
+ assert(new_val->flags & IR3_REG_SSA);
+ struct ir3_register *child_def =
+ extract(new_val->def,
+ (child->interval.reg->interval_start - def->interval_start) /
+ reg_elem_size(def), reg_elems(child->interval.reg),
+ NULL, pred);
+ struct reg_or_immed *child_val = ralloc(ctx, struct reg_or_immed);
+ child_val->def = child_def;
+ child_val->flags = child_def->flags;
+ live_in_rewrite(ctx, child, child_val, block, pred_idx);
+ }
+}
+
+static void
+reload_live_in(struct ra_spill_ctx *ctx, struct ir3_register *def,
+ struct ir3_block *block)
+{
+ struct ra_spill_interval *interval = ctx->intervals[def->name];
+ for (unsigned i = 0; i < block->predecessors_count; i++) {
+ struct ir3_block *pred = block->predecessors[i];
+ struct ra_spill_block_state *state = &ctx->blocks[pred->index];
+ if (!state->visited)
+ continue;
+
+ if (is_live_in_undef(def, block, i))
+ continue;
+
+ struct reg_or_immed *new_val = read_live_in(ctx, def, block, i);
+
+ if (!new_val) {
+ new_val = ralloc(ctx, struct reg_or_immed);
+ new_val->def = reload(ctx, def, NULL, pred);
+ new_val->flags = new_val->def->flags;
+ }
+ live_in_rewrite(ctx, interval, new_val, block, i);
+ }
+}
+
+static void
+reload_live_ins(struct ra_spill_ctx *ctx, struct ir3_block *block)
+{
+ rb_tree_foreach (struct ra_spill_interval, interval, &ctx->reg_ctx.intervals,
+ interval.node) {
+ reload_live_in(ctx, interval->interval.reg, block);
+ }
+}
+
+static void
+add_live_in_phi(struct ra_spill_ctx *ctx, struct ir3_register *def,
+ struct ir3_block *block)
+{
+ struct ra_spill_interval *interval = ctx->intervals[def->name];
+ if (!interval->interval.inserted)
+ return;
+
+ bool needs_phi = false;
+ struct ir3_register *cur_def = NULL;
+ for (unsigned i = 0; i < block->predecessors_count; i++) {
+ struct ir3_block *pred = block->predecessors[i];
+ struct ra_spill_block_state *state = &ctx->blocks[pred->index];
+
+ if (!state->visited) {
+ needs_phi = true;
+ break;
+ }
+
+ struct hash_entry *entry =
+ _mesa_hash_table_search(state->remap, def);
+ assert(entry);
+ struct reg_or_immed *pred_val = entry->data;
+ if ((pred_val->flags & (IR3_REG_IMMED | IR3_REG_CONST)) ||
+ !pred_val->def ||
+ (cur_def && cur_def != pred_val->def)) {
+ needs_phi = true;
+ break;
+ }
+ cur_def = pred_val->def;
+ }
+
+ if (!needs_phi) {
+ interval->dst.def = cur_def;
+ interval->dst.flags = cur_def->flags;
+ return;
+ }
+
+ struct ir3_instruction *phi =
+ ir3_instr_create(block, OPC_META_PHI, 1, block->predecessors_count);
+ struct ir3_register *dst = __ssa_dst(phi);
+ dst->flags |= def->flags & (IR3_REG_HALF | IR3_REG_ARRAY);
+ dst->size = def->size;
+ dst->wrmask = def->wrmask;
+
+ dst->interval_start = def->interval_start;
+ dst->interval_end = def->interval_end;
+ dst->merge_set = def->merge_set;
+ dst->merge_set_offset = def->merge_set_offset;
+
+ for (unsigned i = 0; i < block->predecessors_count; i++) {
+ struct ir3_block *pred = block->predecessors[i];
+ struct ra_spill_block_state *state = &ctx->blocks[pred->index];
+ struct ir3_register *src = ir3_src_create(phi, INVALID_REG, dst->flags);
+ src->size = def->size;
+ src->wrmask = def->wrmask;
+
+ if (state->visited) {
+ struct hash_entry *entry =
+ _mesa_hash_table_search(state->remap, def);
+ assert(entry);
+ struct reg_or_immed *new_val = entry->data;
+ set_src_val(src, new_val);
+ } else {
+ src->def = def;
+ }
+ }
+
+ interval->dst.def = dst;
+ interval->dst.flags = dst->flags;
+
+ ir3_instr_move_before_block(phi, block);
+}
+
+/* When spilling a block with a single predecessors, the pred may have other
+ * successors so we can't choose what's live in and we can't spill/restore
+ * anything. Just make the inserted intervals exactly match the predecessor. If
+ * it wasn't live in the predecessor then it must've already been spilled. Also,
+ * there are no phi nodes and no live-ins.
+ */
+static void
+spill_single_pred_live_in(struct ra_spill_ctx *ctx,
+ struct ir3_block *block)
+{
+ unsigned name;
+ BITSET_FOREACH_SET (name, ctx->live->live_in[block->index],
+ ctx->live->definitions_count) {
+ struct ir3_register *reg = ctx->live->definitions[name];
+ struct ra_spill_interval *interval = ctx->intervals[reg->name];
+ struct reg_or_immed *val = read_live_in(ctx, reg, block, 0);
+ if (val)
+ interval->dst = *val;
+ else
+ ra_spill_ctx_remove(ctx, interval);
+ }
+}
+
+static void
+rewrite_phi(struct ra_spill_ctx *ctx, struct ir3_instruction *phi,
+ struct ir3_block *block)
+{
+ if (!ctx->intervals[phi->dsts[0]->name]->interval.inserted) {
+ phi->flags |= IR3_INSTR_UNUSED;
+ return;
+ }
+
+ for (unsigned i = 0; i < block->predecessors_count; i++) {
+ struct ir3_block *pred = block->predecessors[i];
+ struct ra_spill_block_state *state = &ctx->blocks[pred->index];
+
+ if (!state->visited)
+ continue;
+
+ struct ir3_register *src = phi->srcs[i];
+ if (!src->def)
+ continue;
+
+ struct hash_entry *entry =
+ _mesa_hash_table_search(state->remap, src->def);
+ assert(entry);
+ struct reg_or_immed *new_val = entry->data;
+ set_src_val(src, new_val);
+ }
+}
+
+static void
+spill_live_out(struct ra_spill_ctx *ctx, struct ra_spill_interval *interval,
+ struct ir3_block *block)
+{
+ struct ir3_register *def = interval->interval.reg;
+
+ spill(ctx, &interval->dst, get_spill_slot(ctx, def), NULL, block);
+ ir3_reg_interval_remove_all(&ctx->reg_ctx, &interval->interval);
+}
+
+static void
+spill_live_outs(struct ra_spill_ctx *ctx, struct ir3_block *block)
+{
+ struct ra_spill_block_state *state = &ctx->blocks[block->index];
+ rb_tree_foreach_safe (struct ra_spill_interval, interval,
+ &ctx->reg_ctx.intervals, interval.node) {
+ if (!BITSET_TEST(state->live_out, interval->interval.reg->name)) {
+ spill_live_out(ctx, interval, block);
+ }
+ }
+}
+
+static void
+reload_live_out(struct ra_spill_ctx *ctx, struct ir3_register *def,
+ struct ir3_block *block)
+{
+ struct ra_spill_interval *interval = ctx->intervals[def->name];
+ ir3_reg_interval_insert(&ctx->reg_ctx, &interval->interval);
+
+ reload_def(ctx, def, NULL, block);
+}
+
+static void
+reload_live_outs(struct ra_spill_ctx *ctx, struct ir3_block *block)
+{
+ struct ra_spill_block_state *state = &ctx->blocks[block->index];
+ unsigned name;
+ BITSET_FOREACH_SET (name, state->live_out, ctx->live->definitions_count) {
+ struct ir3_register *reg = ctx->live->definitions[name];
+ struct ra_spill_interval *interval = ctx->intervals[name];
+ if (!interval->interval.inserted)
+ reload_live_out(ctx, reg, block);
+ }
+}
+
+static void
+update_live_out_phis(struct ra_spill_ctx *ctx, struct ir3_block *block)
+{
+ assert(!block->successors[1]);
+ struct ir3_block *succ = block->successors[0];
+ unsigned pred_idx = ir3_block_get_pred_index(succ, block);
+
+ foreach_instr (instr, &succ->instr_list) {
+ if (instr->opc != OPC_META_PHI)
+ break;
+
+ struct ir3_register *def = instr->srcs[pred_idx]->def;
+ if (!def)
+ continue;
+
+ struct ra_spill_interval *interval = ctx->intervals[def->name];
+ if (!interval->interval.inserted)
+ continue;
+ set_src_val(instr->srcs[pred_idx], &interval->dst);
+ }
+}
+
+static void
+record_pred_live_out(struct ra_spill_ctx *ctx,
+ struct ra_spill_interval *interval,
+ struct ir3_block *block, unsigned pred_idx)
+{
+ struct ir3_block *pred = block->predecessors[pred_idx];
+ struct ra_spill_block_state *state = &ctx->blocks[pred->index];
+
+ struct ir3_register *def = interval->interval.reg;
+ if (is_live_in_phi(def, block)) {
+ def = def->instr->srcs[pred_idx]->def;
+ }
+ BITSET_SET(state->live_out, def->name);
+
+ rb_tree_foreach (struct ra_spill_interval, child,
+ &interval->interval.children, interval.node) {
+ record_pred_live_out(ctx, child, block, pred_idx);
+ }
+}
+
+static void
+record_pred_live_outs(struct ra_spill_ctx *ctx, struct ir3_block *block)
+{
+ for (unsigned i = 0; i < block->predecessors_count; i++) {
+ struct ir3_block *pred = block->predecessors[i];
+ struct ra_spill_block_state *state = &ctx->blocks[pred->index];
+ if (state->visited)
+ continue;
+
+ state->live_out = rzalloc_array(ctx, BITSET_WORD,
+ BITSET_WORDS(ctx->live->definitions_count));
+
+
+ rb_tree_foreach (struct ra_spill_interval, interval,
+ &ctx->reg_ctx.intervals, interval.node) {
+ record_pred_live_out(ctx, interval, block, i);
+ }
+ }
+}
+
+static void
+record_live_out(struct ra_spill_ctx *ctx,
+ struct ra_spill_block_state *state,
+ struct ra_spill_interval *interval)
+{
+ if (!(interval->dst.flags & IR3_REG_SSA) ||
+ interval->dst.def) {
+ struct reg_or_immed *val = ralloc(ctx, struct reg_or_immed);
+ *val = interval->dst;
+ _mesa_hash_table_insert(state->remap, interval->interval.reg, val);
+ }
+ rb_tree_foreach (struct ra_spill_interval, child,
+ &interval->interval.children, interval.node) {
+ record_live_out(ctx, state, child);
+ }
+}
+
+static void
+record_live_outs(struct ra_spill_ctx *ctx, struct ir3_block *block)
+{
+ struct ra_spill_block_state *state = &ctx->blocks[block->index];
+ state->remap = _mesa_pointer_hash_table_create(ctx);
+
+ rb_tree_foreach (struct ra_spill_interval, interval, &ctx->reg_ctx.intervals,
+ interval.node) {
+ record_live_out(ctx, state, interval);
+ }
+}
+
+static void
+handle_block(struct ra_spill_ctx *ctx, struct ir3_block *block)
+{
memset(&ctx->cur_pressure, 0, sizeof(ctx->cur_pressure));
rb_tree_init(&ctx->reg_ctx.intervals);
+ rb_tree_init(&ctx->full_live_intervals);
+ rb_tree_init(&ctx->half_live_intervals);
unsigned name;
BITSET_FOREACH_SET (name, ctx->live->live_in[block->index],
ctx->live->definitions_count) {
struct ir3_register *reg = ctx->live->definitions[name];
- handle_live_in(ctx, reg);
+ handle_live_in(ctx, block, reg);
}
foreach_instr (instr, &block->instr_list) {
handle_input_phi(ctx, instr);
}
- update_max_pressure(ctx);
+ if (ctx->spilling) {
+ if (block->predecessors_count == 1) {
+ spill_single_pred_live_in(ctx, block);
+ } else {
+ spill_live_ins(ctx, block);
+ reload_live_ins(ctx, block);
+ record_pred_live_outs(ctx, block);
+ foreach_instr (instr, &block->instr_list) {
+ if (instr->opc != OPC_META_PHI)
+ break;
+ rewrite_phi(ctx, instr, block);
+ }
+ BITSET_FOREACH_SET (name, ctx->live->live_in[block->index],
+ ctx->live->definitions_count) {
+ struct ir3_register *reg = ctx->live->definitions[name];
+ add_live_in_phi(ctx, reg, block);
+ }
+ }
+ } else {
+ update_max_pressure(ctx);
+ }
foreach_instr (instr, &block->instr_list) {
+ di(instr, "processing");
+
if (instr->opc == OPC_META_PHI || instr->opc == OPC_META_INPUT ||
instr->opc == OPC_META_TEX_PREFETCH)
remove_input_phi(ctx, instr);
+ else if (ctx->spilling && instr->opc == OPC_META_PARALLEL_COPY)
+ handle_pcopy(ctx, instr);
+ else if (ctx->spilling && instr->opc == OPC_MOV &&
+ instr->dsts[0] == ctx->base_reg)
+ /* skip */;
else
handle_instr(ctx, instr);
}
+
+ if (ctx->spilling && block->successors[0]) {
+ struct ra_spill_block_state *state =
+ &ctx->blocks[block->successors[0]->index];
+ if (state->visited) {
+ assert(!block->successors[1]);
+
+ spill_live_outs(ctx, block);
+ reload_live_outs(ctx, block);
+ update_live_out_phis(ctx, block);
+ }
+ }
+
+ if (ctx->spilling) {
+ record_live_outs(ctx, block);
+ ctx->blocks[block->index].visited = true;
+ }
+}
+
+static bool
+simplify_phi_node(struct ir3_instruction *phi)
+{
+ struct ir3_register *def = NULL;
+ foreach_src (src, phi) {
+ /* Ignore phi sources which point to the phi itself. */
+ if (src->def == phi->dsts[0])
+ continue;
+ /* If it's undef or it doesn't match the previous sources, bail */
+ if (!src->def || (def && def != src->def))
+ return false;
+ def = src->def;
+ }
+
+ phi->data = def;
+ phi->flags |= IR3_INSTR_UNUSED;
+ return true;
+}
+
+static void
+simplify_phi_srcs(struct ir3_instruction *instr)
+{
+ foreach_src (src, instr) {
+ if (src->def && src->def->instr->opc == OPC_META_PHI) {
+ struct ir3_instruction *phi = src->def->instr;
+ if (phi->data)
+ src->def = phi->data;
+ }
+ }
+}
+
+/* We insert phi nodes for all live-ins of loops in case we need to split the
+ * live range. This pass cleans that up for the case where the live range didn't
+ * actually need to be split.
+ */
+static void
+simplify_phi_nodes(struct ir3 *ir)
+{
+ foreach_block (block, &ir->block_list) {
+ foreach_instr (instr, &block->instr_list) {
+ if (instr->opc != OPC_META_PHI)
+ break;
+ instr->data = NULL;
+ }
+ }
+
+ bool progress;
+ do {
+ progress = false;
+ foreach_block (block, &ir->block_list) {
+ foreach_instr (instr, &block->instr_list) {
+ if (instr->opc == OPC_META_PHI || (instr->flags & IR3_INSTR_UNUSED))
+ continue;
+
+ simplify_phi_srcs(instr);
+ }
+
+ for (unsigned i = 0; i < 2; i++) {
+ struct ir3_block *succ = block->successors[i];
+ if (!succ)
+ continue;
+ foreach_instr (instr, &succ->instr_list) {
+ if (instr->opc != OPC_META_PHI)
+ break;
+ if (instr->flags & IR3_INSTR_UNUSED)
+ continue;
+
+ simplify_phi_srcs(instr);
+ progress |= simplify_phi_node(instr);
+ }
+ }
+ }
+ } while (progress);
+}
+
+static void
+unmark_dead(struct ir3 *ir)
+{
+ foreach_block (block, &ir->block_list) {
+ foreach_instr (instr, &block->instr_list) {
+ instr->flags &= ~IR3_INSTR_UNUSED;
+ }
+ }
+}
+
+/* Simple pass to remove now-dead phi nodes and pcopy instructions. We mark
+ * which ones are dead along the way, so there's nothing to compute here.
+ */
+static void
+cleanup_dead(struct ir3 *ir)
+{
+ foreach_block (block, &ir->block_list) {
+ foreach_instr_safe (instr, &block->instr_list) {
+ if (instr->flags & IR3_INSTR_UNUSED)
+ list_delinit(&instr->node);
+ }
+ }
+}
+
+/* Deal with merge sets after spilling. Spilling generally leaves the merge sets
+ * in a mess, and even if we properly cleaned up after ourselves, we would want
+ * to recompute the merge sets afterward anway. That's because
+ * spilling/reloading can "break up" phi webs and split/collect webs so that
+ * allocating them to the same register no longer gives any benefit. For
+ * example, imagine we have this:
+ *
+ * if (...) {
+ * foo = ...
+ * } else {
+ * bar = ...
+ * }
+ * baz = phi(foo, bar)
+ *
+ * and we spill "baz":
+ *
+ * if (...) {
+ * foo = ...
+ * spill(foo)
+ * } else {
+ * bar = ...
+ * spill(bar)
+ * }
+ * baz = reload()
+ *
+ * now foo, bar, and baz don't have to be allocated to the same register. How
+ * exactly the merge sets change can be complicated, so it's easier just to
+ * recompute them.
+ *
+ * However, there's a wrinkle in this: those same merge sets determine the
+ * register pressure, due to multiple values inhabiting the same register! And
+ * we assume that this sharing happens when spilling. Therefore we need a
+ * three-step procedure:
+ *
+ * 1. Drop the original merge sets.
+ * 2. Calculate which values *must* be merged, being careful to only use the
+ * interval information which isn't trashed by spilling, and forcibly merge
+ * them.
+ * 3. Let ir3_merge_regs() finish the job, including recalculating the
+ * intervals.
+ */
+
+static void
+fixup_merge_sets(struct ir3_liveness *live, struct ir3 *ir)
+{
+ foreach_block (block, &ir->block_list) {
+ foreach_instr (instr, &block->instr_list) {
+ ra_foreach_dst (dst, instr) {
+ dst->merge_set = NULL;
+ dst->merge_set_offset = 0;
+ }
+ }
+ }
+
+ foreach_block (block, &ir->block_list) {
+ foreach_instr (instr, &block->instr_list) {
+ if (instr->opc != OPC_META_SPLIT &&
+ instr->opc != OPC_META_COLLECT)
+ continue;
+
+ struct ir3_register *dst = instr->dsts[0];
+ ra_foreach_src (src, instr) {
+ if (!(src->flags & IR3_REG_KILL) &&
+ src->def->interval_start < dst->interval_end &&
+ dst->interval_start < src->def->interval_end) {
+ ir3_force_merge(dst, src->def,
+ src->def->interval_start - dst->interval_start);
+ }
+ }
+ }
+ }
+
+ ir3_merge_regs(live, ir);
}
void
ir3_calc_pressure(struct ir3_shader_variant *v, struct ir3_liveness *live,
struct ir3_pressure *max_pressure)
{
- struct ra_spill_ctx ctx = {};
- ctx.live = live;
- ctx.intervals = calloc(live->definitions_count, sizeof(*ctx.intervals));
- ctx.compiler = v->shader->compiler;
- spill_ctx_init(&ctx);
+ struct ra_spill_ctx *ctx = rzalloc(NULL, struct ra_spill_ctx);
+ spill_ctx_init(ctx, v, live);
foreach_block (block, &v->ir->block_list) {
- handle_block(&ctx, block);
+ handle_block(ctx, block);
+ }
+
+ assert(ctx->cur_pressure.full == 0);
+ assert(ctx->cur_pressure.half == 0);
+ assert(ctx->cur_pressure.shared == 0);
+
+ *max_pressure = ctx->max_pressure;
+ ralloc_free(ctx);
+}
+
+bool
+ir3_spill(struct ir3 *ir, struct ir3_shader_variant *v,
+ struct ir3_liveness **live,
+ const struct ir3_pressure *limit_pressure)
+{
+ struct ra_spill_ctx *ctx = rzalloc(NULL, struct ra_spill_ctx);
+ spill_ctx_init(ctx, v, *live);
+
+ ctx->spilling = true;
+
+ ctx->blocks = rzalloc_array(ctx, struct ra_spill_block_state,
+ ctx->live->block_count);
+ rb_tree_init(&ctx->full_live_intervals);
+ rb_tree_init(&ctx->half_live_intervals);
+
+ ctx->limit_pressure = *limit_pressure;
+ ctx->spill_slot = v->pvtmem_size;
+
+ add_base_reg(ctx, ir);
+ compute_next_distance(ctx, ir);
+
+ unmark_dead(ir);
+
+ foreach_block (block, &ir->block_list) {
+ handle_block(ctx, block);
}
- assert(ctx.cur_pressure.full == 0);
- assert(ctx.cur_pressure.half == 0);
- assert(ctx.cur_pressure.shared == 0);
+ simplify_phi_nodes(ir);
+
+ cleanup_dead(ir);
+
+ ir3_create_parallel_copies(ir);
+
+ /* After this point, we're done mutating the IR. Liveness has been trashed,
+ * so recalculate it. We'll need it for recalculating the merge sets.
+ */
+ ralloc_free(ctx->live);
+ *live = ir3_calc_liveness(v);
+
+ fixup_merge_sets(*live, ir);
- free(ctx.intervals);
+ v->pvtmem_size = ctx->spill_slot;
+ ralloc_free(ctx);
- *max_pressure = ctx.max_pressure;
+ return true;
}