From 0324706764b9d0a1a6a6c1af13fc7cfb01500d80 Mon Sep 17 00:00:00 2001 From: "Kristian H. Kristensen" Date: Thu, 10 Oct 2019 15:15:37 -0700 Subject: [PATCH] freedreno/ir3: Add intrinsics that map to LDLW/STLW These intrinsics will let us do all the offset calculations in nir, which is nicer to work with and lets nir_opt_algebraic eat it all up. Signed-off-by: Kristian H. Kristensen --- src/compiler/nir/nir_intrinsics.py | 8 ++++ src/freedreno/ir3/ir3_compiler_nir.c | 75 ++++++++++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+) diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 40430b3..ae62a85 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -771,6 +771,14 @@ intrinsic("ssbo_atomic_xor_ir3", src_comp=[1, 1, 1, 1], dest_comp=1) intrinsic("ssbo_atomic_exchange_ir3", src_comp=[1, 1, 1, 1], dest_comp=1) intrinsic("ssbo_atomic_comp_swap_ir3", src_comp=[1, 1, 1, 1, 1], dest_comp=1) +# IR3-specific load/store intrinsics. These access a buffer used to pass data +# between geometry stages - perhaps it's explicit access to the vertex cache. + +# src[] = { value, offset }. +store("shared_ir3", 2, [BASE, WRMASK, ALIGN_MUL, ALIGN_OFFSET]) +# src[] = { offset }. +load("shared_ir3", 1, [BASE, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE]) + # Intrinsics used by the Midgard/Bifrost blend pipeline. These are defined # within a blend shader to read/write the raw value from the tile buffer, # without applying any format conversion in the process. If the shader needs diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index 118e778..2cc7206 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -843,6 +843,75 @@ emit_intrinsic_store_shared(struct ir3_context *ctx, nir_intrinsic_instr *intr) } } +/* src[] = { offset }. const_index[] = { base } */ +static void +emit_intrinsic_load_shared_ir3(struct ir3_context *ctx, nir_intrinsic_instr *intr, + struct ir3_instruction **dst) +{ + struct ir3_block *b = ctx->block; + struct ir3_instruction *load, *offset; + unsigned base; + + offset = ir3_get_src(ctx, &intr->src[0])[0]; + base = nir_intrinsic_base(intr); + + load = ir3_LDLW(b, offset, 0, + create_immed(b, intr->num_components), 0, + create_immed(b, base), 0); + + load->cat6.type = utype_dst(intr->dest); + load->regs[0]->wrmask = MASK(intr->num_components); + + load->barrier_class = IR3_BARRIER_SHARED_R; + load->barrier_conflict = IR3_BARRIER_SHARED_W; + + ir3_split_dest(b, dst, load, 0, intr->num_components); +} + +/* src[] = { value, offset }. const_index[] = { base, write_mask } */ +static void +emit_intrinsic_store_shared_ir3(struct ir3_context *ctx, nir_intrinsic_instr *intr) +{ + struct ir3_block *b = ctx->block; + struct ir3_instruction *store, *offset; + struct ir3_instruction * const *value; + unsigned base, wrmask; + + value = ir3_get_src(ctx, &intr->src[0]); + offset = ir3_get_src(ctx, &intr->src[1])[0]; + + base = nir_intrinsic_base(intr); + wrmask = nir_intrinsic_write_mask(intr); + + /* Combine groups of consecutive enabled channels in one write + * message. We use ffs to find the first enabled channel and then ffs on + * the bit-inverse, down-shifted writemask to determine the length of + * the block of enabled bits. + * + * (trick stolen from i965's fs_visitor::nir_emit_cs_intrinsic()) + */ + while (wrmask) { + unsigned first_component = ffs(wrmask) - 1; + unsigned length = ffs(~(wrmask >> first_component)) - 1; + + store = ir3_STLW(b, offset, 0, + ir3_create_collect(ctx, &value[first_component], length), 0, + create_immed(b, length), 0); + + store->cat6.dst_offset = first_component + base; + store->cat6.type = utype_src(intr->src[0]); + store->barrier_class = IR3_BARRIER_SHARED_W; + store->barrier_conflict = IR3_BARRIER_SHARED_R | IR3_BARRIER_SHARED_W; + + array_insert(b, b->keeps, store); + + /* Clear the bits in the writemask that we just wrote, then try + * again to see if more channels are left. + */ + wrmask &= (15 << (first_component + length)); + } +} + /* * CS shared variable atomic intrinsics * @@ -1582,6 +1651,12 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr) break; } + case nir_intrinsic_load_shared_ir3: + emit_intrinsic_load_shared_ir3(ctx, intr, dst); + break; + case nir_intrinsic_store_shared_ir3: + emit_intrinsic_store_shared_ir3(ctx, intr); + break; default: ir3_context_error(ctx, "Unhandled intrinsic type: %s\n", nir_intrinsic_infos[intr->intrinsic].name); -- 2.7.4