From c8748771bb68951a2921e76ceab61b68fca9417d Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 15 Jan 2021 00:35:19 -0600 Subject: [PATCH] nir/lower_io: Support global addresses for UBOs in nir_lower_explicit_io For nir_address_format_64bit_global_32bit_offset and nir_address_format_64bit_bounded_global, we use a new intrinsics which take the base address and offset as separate parameters. For bounds- checked access, the bound is also included in the intrinsic. This gives the drive more control over the bounds checking so that UBOs don't suddenly become massively more expensive. Reviewed-by: Kenneth Graunke Reviewed-by: Caio Marcelo de Oliveira Filho Part-of: --- src/compiler/nir/nir_intrinsics.py | 6 ++++++ src/compiler/nir/nir_lower_io.c | 30 ++++++++++++++++++++++++++---- 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index db24434..96e22ea 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -937,6 +937,12 @@ load("global", [1], [ACCESS, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE]) # src[] = { address }. load("global_constant", [1], [ACCESS, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE, CAN_REORDER]) +# src[] = { base_address, offset }. +load("global_constant_offset", [1, 1], [ACCESS, ALIGN_MUL, ALIGN_OFFSET], + [CAN_ELIMINATE, CAN_REORDER]) +# src[] = { base_address, offset, bound }. +load("global_constant_bounded", [1, 1, 1], [ACCESS, ALIGN_MUL, ALIGN_OFFSET], + [CAN_ELIMINATE, CAN_REORDER]) # src[] = { address }. load("kernel_input", [1], [BASE, RANGE, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE, CAN_REORDER]) # src[] = { offset }. diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_io.c index e051548..f794019 100644 --- a/src/compiler/nir/nir_lower_io.c +++ b/src/compiler/nir/nir_lower_io.c @@ -1251,7 +1251,14 @@ build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin, case nir_intrinsic_load_deref: switch (mode) { case nir_var_mem_ubo: - op = nir_intrinsic_load_ubo; + if (addr_format == nir_address_format_64bit_global_32bit_offset) + op = nir_intrinsic_load_global_constant_offset; + else if (addr_format == nir_address_format_64bit_bounded_global) + op = nir_intrinsic_load_global_constant_bounded; + else if (addr_format_is_global(addr_format, mode)) + op = nir_intrinsic_load_global_constant; + else + op = nir_intrinsic_load_ubo; break; case nir_var_mem_ssbo: if (addr_format_is_global(addr_format, mode)) @@ -1323,7 +1330,18 @@ build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin, nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, op); - if (addr_format_is_global(addr_format, mode)) { + if (op == nir_intrinsic_load_global_constant_offset) { + assert(addr_format == nir_address_format_64bit_global_32bit_offset); + load->src[0] = nir_src_for_ssa( + nir_pack_64_2x32(b, nir_channels(b, addr, 0x3))); + load->src[1] = nir_src_for_ssa(nir_channel(b, addr, 3)); + } else if (op == nir_intrinsic_load_global_constant_bounded) { + assert(addr_format == nir_address_format_64bit_bounded_global); + load->src[0] = nir_src_for_ssa( + nir_pack_64_2x32(b, nir_channels(b, addr, 0x3))); + load->src[1] = nir_src_for_ssa(nir_channel(b, addr, 3)); + load->src[2] = nir_src_for_ssa(nir_channel(b, addr, 2)); + } else if (addr_format_is_global(addr_format, mode)) { load->src[0] = nir_src_for_ssa(addr_to_global(b, addr, addr_format)); } else if (addr_format_is_offset(addr_format, mode)) { assert(addr->num_components == 1); @@ -1372,8 +1390,12 @@ build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin, assert(bit_size % 8 == 0); nir_ssa_def *result; - if (addr_format_needs_bounds_check(addr_format)) { - /* The Vulkan spec for robustBufferAccess gives us quite a few options + if (addr_format_needs_bounds_check(addr_format) && + op != nir_intrinsic_load_global_constant_bounded) { + /* We don't need to bounds-check global_constant_bounded because bounds + * checking is handled by the intrinsic itself. + * + * The Vulkan spec for robustBufferAccess gives us quite a few options * as to what we can do with an OOB read. Unfortunately, returning * undefined values isn't one of them so we return an actual zero. */ -- 2.7.4