gallivm: Use unified atomics
authorAlyssa Rosenzweig <alyssa@rosenzweig.io>
Mon, 8 May 2023 23:38:16 +0000 (19:38 -0400)
committerMarge Bot <emma+marge@anholt.net>
Fri, 12 May 2023 20:39:46 +0000 (20:39 +0000)
This is a huge win because gallivm duplicated the translations in a zillion
places.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Jesse Natalie <jenatali@microsoft.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22914>

src/gallium/auxiliary/gallivm/lp_bld_nir.c
src/gallium/auxiliary/gallivm/lp_bld_nir.h
src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c

index 45a50b9..75f2d44 100644 (file)
@@ -1603,10 +1603,10 @@ visit_ssbo_atomic(struct lp_build_nir_context *bld_base,
    LLVMValueRef val = get_src(bld_base, instr->src[2]);
    LLVMValueRef val2 = NULL;
    int bitsize = nir_src_bit_size(instr->src[2]);
-   if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap)
+   if (instr->intrinsic == nir_intrinsic_ssbo_atomic_swap)
       val2 = get_src(bld_base, instr->src[3]);
 
-   bld_base->atomic_mem(bld_base, instr->intrinsic, bitsize, idx,
+   bld_base->atomic_mem(bld_base, nir_intrinsic_atomic_op(instr), bitsize, idx,
                         offset, val, val2, &result[0]);
 }
 
@@ -1681,6 +1681,27 @@ visit_store_image(struct lp_build_nir_context *bld_base,
    bld_base->image_op(bld_base, &params);
 }
 
+LLVMAtomicRMWBinOp
+lp_translate_atomic_op(nir_atomic_op op)
+{
+   switch (op) {
+   case nir_atomic_op_iadd: return LLVMAtomicRMWBinOpAdd;
+   case nir_atomic_op_xchg: return LLVMAtomicRMWBinOpXchg;
+   case nir_atomic_op_iand: return LLVMAtomicRMWBinOpAnd;
+   case nir_atomic_op_ior:  return LLVMAtomicRMWBinOpOr;
+   case nir_atomic_op_ixor: return LLVMAtomicRMWBinOpXor;
+   case nir_atomic_op_umin: return LLVMAtomicRMWBinOpUMin;
+   case nir_atomic_op_umax: return LLVMAtomicRMWBinOpUMax;
+   case nir_atomic_op_imin: return LLVMAtomicRMWBinOpMin;
+   case nir_atomic_op_imax: return LLVMAtomicRMWBinOpMax;
+   case nir_atomic_op_fadd: return LLVMAtomicRMWBinOpFAdd;
+#if LLVM_VERSION_MAJOR >= 15
+   case nir_atomic_op_fmin: return LLVMAtomicRMWBinOpFMin;
+   case nir_atomic_op_fmax: return LLVMAtomicRMWBinOpFMax;
+#endif
+   default:          unreachable("Unexpected atomic");
+   }
+}
 
 static void
 visit_atomic_image(struct lp_build_nir_context *bld_base,
@@ -1696,48 +1717,8 @@ visit_atomic_image(struct lp_build_nir_context *bld_base,
 
    memset(&params, 0, sizeof(params));
 
-   switch (instr->intrinsic) {
-   case nir_intrinsic_image_atomic_add:
-      params.op = LLVMAtomicRMWBinOpAdd;
-      break;
-   case nir_intrinsic_image_atomic_exchange:
-      params.op = LLVMAtomicRMWBinOpXchg;
-      break;
-   case nir_intrinsic_image_atomic_and:
-      params.op = LLVMAtomicRMWBinOpAnd;
-      break;
-   case nir_intrinsic_image_atomic_or:
-      params.op = LLVMAtomicRMWBinOpOr;
-      break;
-   case nir_intrinsic_image_atomic_xor:
-      params.op = LLVMAtomicRMWBinOpXor;
-      break;
-   case nir_intrinsic_image_atomic_umin:
-      params.op = LLVMAtomicRMWBinOpUMin;
-      break;
-   case nir_intrinsic_image_atomic_umax:
-      params.op = LLVMAtomicRMWBinOpUMax;
-      break;
-   case nir_intrinsic_image_atomic_imin:
-      params.op = LLVMAtomicRMWBinOpMin;
-      break;
-   case nir_intrinsic_image_atomic_imax:
-      params.op = LLVMAtomicRMWBinOpMax;
-      break;
-   case nir_intrinsic_image_atomic_fadd:
-      params.op = LLVMAtomicRMWBinOpFAdd;
-      break;
-#if LLVM_VERSION_MAJOR >= 15
-   case nir_intrinsic_image_atomic_fmin:
-      params.op = LLVMAtomicRMWBinOpFMin;
-      break;
-   case nir_intrinsic_image_atomic_fmax:
-      params.op = LLVMAtomicRMWBinOpFMax;
-      break;
-#endif
-   default:
-      break;
-   }
+   if (instr->intrinsic != nir_intrinsic_image_atomic_swap)
+      params.op = lp_translate_atomic_op(nir_intrinsic_atomic_op(instr));
 
    params.target = glsl_sampler_to_pipe(nir_intrinsic_image_dim(instr),
                                         nir_intrinsic_image_array(instr));
@@ -1752,7 +1733,7 @@ visit_atomic_image(struct lp_build_nir_context *bld_base,
 
    if (nir_intrinsic_image_dim(instr) == GLSL_SAMPLER_DIM_MS)
       params.ms_index = get_src(bld_base, instr->src[2]);
-   if (instr->intrinsic == nir_intrinsic_image_atomic_comp_swap) {
+   if (instr->intrinsic == nir_intrinsic_image_atomic_swap) {
       LLVMValueRef cas_val = get_src(bld_base, instr->src[4]);
       params.indata[0] = in_val;
       params.indata2[0] = cas_val;
@@ -1762,7 +1743,7 @@ visit_atomic_image(struct lp_build_nir_context *bld_base,
 
    params.outdata = result;
    params.img_op =
-      (instr->intrinsic == nir_intrinsic_image_atomic_comp_swap)
+      (instr->intrinsic == nir_intrinsic_image_atomic_swap)
       ? LP_IMG_ATOMIC_CAS : LP_IMG_ATOMIC;
    if (nir_src_is_const(instr->src[0]))
       params.image_index = nir_src_as_int(instr->src[0]);
@@ -1849,10 +1830,10 @@ visit_shared_atomic(struct lp_build_nir_context *bld_base,
    LLVMValueRef val = get_src(bld_base, instr->src[1]);
    LLVMValueRef val2 = NULL;
    int bitsize = nir_src_bit_size(instr->src[1]);
-   if (instr->intrinsic == nir_intrinsic_shared_atomic_comp_swap)
+   if (instr->intrinsic == nir_intrinsic_shared_atomic_swap)
       val2 = get_src(bld_base, instr->src[2]);
 
-   bld_base->atomic_mem(bld_base, instr->intrinsic, bitsize, NULL,
+   bld_base->atomic_mem(bld_base, nir_intrinsic_atomic_op(instr), bitsize, NULL,
                         offset, val, val2, &result[0]);
 }
 
@@ -1931,11 +1912,12 @@ visit_global_atomic(struct lp_build_nir_context *bld_base,
    LLVMValueRef val2 = NULL;
    int addr_bitsize = nir_src_bit_size(instr->src[0]);
    int val_bitsize = nir_src_bit_size(instr->src[1]);
-   if (instr->intrinsic == nir_intrinsic_global_atomic_comp_swap)
+   if (instr->intrinsic == nir_intrinsic_global_atomic_swap)
       val2 = get_src(bld_base, instr->src[2]);
 
-   bld_base->atomic_global(bld_base, instr->intrinsic, addr_bitsize,
-                           val_bitsize, addr, val, val2, &result[0]);
+   bld_base->atomic_global(bld_base, nir_intrinsic_atomic_op(instr),
+                           addr_bitsize, val_bitsize, addr, val, val2,
+                           &result[0]);
 }
 
 #if LLVM_VERSION_MAJOR >= 10
@@ -2086,19 +2068,8 @@ visit_intrinsic(struct lp_build_nir_context *bld_base,
    case nir_intrinsic_end_primitive:
       bld_base->end_primitive(bld_base, nir_intrinsic_stream_id(instr));
       break;
-   case nir_intrinsic_ssbo_atomic_add:
-   case nir_intrinsic_ssbo_atomic_imin:
-   case nir_intrinsic_ssbo_atomic_imax:
-   case nir_intrinsic_ssbo_atomic_umin:
-   case nir_intrinsic_ssbo_atomic_umax:
-   case nir_intrinsic_ssbo_atomic_and:
-   case nir_intrinsic_ssbo_atomic_or:
-   case nir_intrinsic_ssbo_atomic_xor:
-   case nir_intrinsic_ssbo_atomic_exchange:
-   case nir_intrinsic_ssbo_atomic_comp_swap:
-   case nir_intrinsic_ssbo_atomic_fadd:
-   case nir_intrinsic_ssbo_atomic_fmin:
-   case nir_intrinsic_ssbo_atomic_fmax:
+   case nir_intrinsic_ssbo_atomic:
+   case nir_intrinsic_ssbo_atomic_swap:
       visit_ssbo_atomic(bld_base, instr, result);
       break;
    case nir_intrinsic_image_load:
@@ -2107,19 +2078,8 @@ visit_intrinsic(struct lp_build_nir_context *bld_base,
    case nir_intrinsic_image_store:
       visit_store_image(bld_base, instr);
       break;
-   case nir_intrinsic_image_atomic_add:
-   case nir_intrinsic_image_atomic_imin:
-   case nir_intrinsic_image_atomic_imax:
-   case nir_intrinsic_image_atomic_umin:
-   case nir_intrinsic_image_atomic_umax:
-   case nir_intrinsic_image_atomic_and:
-   case nir_intrinsic_image_atomic_or:
-   case nir_intrinsic_image_atomic_xor:
-   case nir_intrinsic_image_atomic_exchange:
-   case nir_intrinsic_image_atomic_comp_swap:
-   case nir_intrinsic_image_atomic_fadd:
-   case nir_intrinsic_image_atomic_fmin:
-   case nir_intrinsic_image_atomic_fmax:
+   case nir_intrinsic_image_atomic:
+   case nir_intrinsic_image_atomic_swap:
       visit_atomic_image(bld_base, instr, result);
       break;
    case nir_intrinsic_image_size:
@@ -2134,19 +2094,8 @@ visit_intrinsic(struct lp_build_nir_context *bld_base,
    case nir_intrinsic_store_shared:
       visit_shared_store(bld_base, instr);
       break;
-   case nir_intrinsic_shared_atomic_add:
-   case nir_intrinsic_shared_atomic_imin:
-   case nir_intrinsic_shared_atomic_umin:
-   case nir_intrinsic_shared_atomic_imax:
-   case nir_intrinsic_shared_atomic_umax:
-   case nir_intrinsic_shared_atomic_and:
-   case nir_intrinsic_shared_atomic_or:
-   case nir_intrinsic_shared_atomic_xor:
-   case nir_intrinsic_shared_atomic_exchange:
-   case nir_intrinsic_shared_atomic_comp_swap:
-   case nir_intrinsic_shared_atomic_fadd:
-   case nir_intrinsic_shared_atomic_fmin:
-   case nir_intrinsic_shared_atomic_fmax:
+   case nir_intrinsic_shared_atomic:
+   case nir_intrinsic_shared_atomic_swap:
       visit_shared_atomic(bld_base, instr, result);
       break;
    case nir_intrinsic_scoped_barrier:
@@ -2163,19 +2112,8 @@ visit_intrinsic(struct lp_build_nir_context *bld_base,
    case nir_intrinsic_store_global:
       visit_store_global(bld_base, instr);
       break;
-   case nir_intrinsic_global_atomic_add:
-   case nir_intrinsic_global_atomic_imin:
-   case nir_intrinsic_global_atomic_umin:
-   case nir_intrinsic_global_atomic_imax:
-   case nir_intrinsic_global_atomic_umax:
-   case nir_intrinsic_global_atomic_and:
-   case nir_intrinsic_global_atomic_or:
-   case nir_intrinsic_global_atomic_xor:
-   case nir_intrinsic_global_atomic_exchange:
-   case nir_intrinsic_global_atomic_comp_swap:
-   case nir_intrinsic_global_atomic_fadd:
-   case nir_intrinsic_global_atomic_fmin:
-   case nir_intrinsic_global_atomic_fmax:
+   case nir_intrinsic_global_atomic:
+   case nir_intrinsic_global_atomic_swap:
       visit_global_atomic(bld_base, instr, result);
       break;
    case nir_intrinsic_vote_all:
@@ -2730,6 +2668,7 @@ bool lp_build_nir_llvm(struct lp_build_nir_context *bld_base,
    nir_lower_locals_to_regs(nir);
    nir_remove_dead_derefs(nir);
    nir_remove_dead_variables(nir, nir_var_function_temp, NULL);
+   nir_lower_legacy_atomics(nir);
 
    if (is_aos(bld_base)) {
       nir_move_vec_src_uses_to_dest(nir);
index d623b81..2921432 100644 (file)
@@ -101,7 +101,7 @@ struct lp_build_nir_context
                         LLVMValueRef addr, LLVMValueRef dst);
 
    void (*atomic_global)(struct lp_build_nir_context *bld_base,
-                         nir_intrinsic_op op,
+                         nir_atomic_op nir_op,
                          unsigned addr_bit_size,
                          unsigned val_bit_size,
                          LLVMValueRef addr,
@@ -119,7 +119,7 @@ struct lp_build_nir_context
                      LLVMValueRef index, LLVMValueRef offset, LLVMValueRef dst);
 
    void (*atomic_mem)(struct lp_build_nir_context *bld_base,
-                      nir_intrinsic_op op,
+                      nir_atomic_op op,
                       unsigned bit_size,
                       LLVMValueRef index, LLVMValueRef offset,
                       LLVMValueRef val, LLVMValueRef val2,
@@ -359,5 +359,7 @@ get_int_bld(struct lp_build_nir_context *bld_base,
 unsigned
 lp_nir_aos_swizzle(struct lp_build_nir_context *bld_base, unsigned chan);
 
+LLVMAtomicRMWBinOp
+lp_translate_atomic_op(nir_atomic_op op);
 
 #endif
index e8bd7d0..01e5c32 100644 (file)
@@ -991,27 +991,8 @@ static void emit_store_global(struct lp_build_nir_context *bld_base,
    }
 }
 
-static bool atomic_op_is_float(nir_intrinsic_op nir_op)
-{
-   switch (nir_op) {
-   case nir_intrinsic_shared_atomic_fadd:
-   case nir_intrinsic_shared_atomic_fmin:
-   case nir_intrinsic_shared_atomic_fmax:
-   case nir_intrinsic_global_atomic_fadd:
-   case nir_intrinsic_global_atomic_fmin:
-   case nir_intrinsic_global_atomic_fmax:
-   case nir_intrinsic_ssbo_atomic_fadd:
-   case nir_intrinsic_ssbo_atomic_fmin:
-   case nir_intrinsic_ssbo_atomic_fmax:
-      return true;
-   default:
-      break;
-   }
-   return false;
-}
-
 static void emit_atomic_global(struct lp_build_nir_context *bld_base,
-                               nir_intrinsic_op nir_op,
+                               nir_atomic_op nir_op,
                                unsigned addr_bit_size,
                                unsigned val_bit_size,
                                LLVMValueRef addr,
@@ -1021,7 +1002,7 @@ static void emit_atomic_global(struct lp_build_nir_context *bld_base,
    struct gallivm_state *gallivm = bld_base->base.gallivm;
    LLVMBuilderRef builder = gallivm->builder;
    struct lp_build_context *uint_bld = &bld_base->uint_bld;
-   bool is_flt = atomic_op_is_float(nir_op);
+   bool is_flt = nir_atomic_op_type(nir_op) == nir_type_float;
    struct lp_build_context *atom_bld = is_flt ? get_flt_bld(bld_base, val_bit_size) : get_int_bld(bld_base, true, val_bit_size);
    if (is_flt)
       val = LLVMBuildBitCast(builder, val, atom_bld->vec_type, "");
@@ -1046,7 +1027,7 @@ static void emit_atomic_global(struct lp_build_nir_context *bld_base,
    lp_build_if(&ifthen, gallivm, cond);
 
    addr_ptr = LLVMBuildBitCast(gallivm->builder, addr_ptr, LLVMPointerType(LLVMTypeOf(value_ptr), 0), "");
-   if (nir_op == nir_intrinsic_global_atomic_comp_swap) {
+   if (val2 != NULL /* compare-and-swap */) {
       LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, val2,
                                                          loop_state.counter, "");
       cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, atom_bld->elem_type, "");
@@ -1057,52 +1038,7 @@ static void emit_atomic_global(struct lp_build_nir_context *bld_base,
                                       false);
       scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, "");
    } else {
-      LLVMAtomicRMWBinOp op;
-      switch (nir_op) {
-      case nir_intrinsic_global_atomic_add:
-         op = LLVMAtomicRMWBinOpAdd;
-         break;
-      case nir_intrinsic_global_atomic_exchange:
-
-         op = LLVMAtomicRMWBinOpXchg;
-         break;
-      case nir_intrinsic_global_atomic_and:
-         op = LLVMAtomicRMWBinOpAnd;
-         break;
-      case nir_intrinsic_global_atomic_or:
-         op = LLVMAtomicRMWBinOpOr;
-         break;
-      case nir_intrinsic_global_atomic_xor:
-         op = LLVMAtomicRMWBinOpXor;
-         break;
-      case nir_intrinsic_global_atomic_umin:
-         op = LLVMAtomicRMWBinOpUMin;
-         break;
-      case nir_intrinsic_global_atomic_umax:
-         op = LLVMAtomicRMWBinOpUMax;
-         break;
-      case nir_intrinsic_global_atomic_imin:
-         op = LLVMAtomicRMWBinOpMin;
-         break;
-      case nir_intrinsic_global_atomic_imax:
-         op = LLVMAtomicRMWBinOpMax;
-         break;
-      case nir_intrinsic_global_atomic_fadd:
-         op = LLVMAtomicRMWBinOpFAdd;
-         break;
-#if LLVM_VERSION_MAJOR >= 15
-      case nir_intrinsic_global_atomic_fmin:
-         op = LLVMAtomicRMWBinOpFMin;
-         break;
-      case nir_intrinsic_global_atomic_fmax:
-         op = LLVMAtomicRMWBinOpFMax;
-         break;
-#endif
-      default:
-         unreachable("unknown atomic op");
-      }
-
-      scalar = LLVMBuildAtomicRMW(builder, op,
+      scalar = LLVMBuildAtomicRMW(builder, lp_translate_atomic_op(nir_op),
                                   addr_ptr, value_ptr,
                                   LLVMAtomicOrderingSequentiallyConsistent,
                                   false);
@@ -1506,7 +1442,7 @@ static void emit_store_mem(struct lp_build_nir_context *bld_base,
 
 
 static void emit_atomic_mem(struct lp_build_nir_context *bld_base,
-                            nir_intrinsic_op nir_op,
+                            nir_atomic_op nir_op,
                             uint32_t bit_size,
                             LLVMValueRef index, LLVMValueRef offset,
                             LLVMValueRef val, LLVMValueRef val2,
@@ -1517,7 +1453,7 @@ static void emit_atomic_mem(struct lp_build_nir_context *bld_base,
    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
    struct lp_build_context *uint_bld = &bld_base->uint_bld;
    uint32_t shift_val = bit_size_to_shift_size(bit_size);
-   bool is_float = atomic_op_is_float(nir_op);
+   bool is_float = nir_atomic_op_type(nir_op) == nir_type_float;
    struct lp_build_context *atomic_bld = is_float ? get_flt_bld(bld_base, bit_size) : get_int_bld(bld_base, true, bit_size);
 
    offset = lp_build_shr_imm(uint_bld, offset, shift_val);
@@ -1557,7 +1493,7 @@ static void emit_atomic_mem(struct lp_build_nir_context *bld_base,
    inner_cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, do_fetch, lp_build_const_int32(gallivm, 0), "");
    lp_build_if(&ifthen, gallivm, inner_cond);
 
-   if (nir_op == nir_intrinsic_ssbo_atomic_comp_swap || nir_op == nir_intrinsic_shared_atomic_comp_swap) {
+   if (val2 != NULL) {
       LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, val2,
                                                          loop_state.counter, "");
       cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, atomic_bld->elem_type, "");
@@ -1568,63 +1504,7 @@ static void emit_atomic_mem(struct lp_build_nir_context *bld_base,
                                       false);
       scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, "");
    } else {
-      LLVMAtomicRMWBinOp op;
-
-      switch (nir_op) {
-      case nir_intrinsic_shared_atomic_add:
-      case nir_intrinsic_ssbo_atomic_add:
-         op = LLVMAtomicRMWBinOpAdd;
-         break;
-      case nir_intrinsic_shared_atomic_exchange:
-      case nir_intrinsic_ssbo_atomic_exchange:
-         op = LLVMAtomicRMWBinOpXchg;
-         break;
-      case nir_intrinsic_shared_atomic_and:
-      case nir_intrinsic_ssbo_atomic_and:
-         op = LLVMAtomicRMWBinOpAnd;
-         break;
-      case nir_intrinsic_shared_atomic_or:
-      case nir_intrinsic_ssbo_atomic_or:
-         op = LLVMAtomicRMWBinOpOr;
-         break;
-      case nir_intrinsic_shared_atomic_xor:
-      case nir_intrinsic_ssbo_atomic_xor:
-         op = LLVMAtomicRMWBinOpXor;
-         break;
-      case nir_intrinsic_shared_atomic_umin:
-      case nir_intrinsic_ssbo_atomic_umin:
-         op = LLVMAtomicRMWBinOpUMin;
-         break;
-      case nir_intrinsic_shared_atomic_umax:
-      case nir_intrinsic_ssbo_atomic_umax:
-         op = LLVMAtomicRMWBinOpUMax;
-         break;
-      case nir_intrinsic_ssbo_atomic_imin:
-      case nir_intrinsic_shared_atomic_imin:
-         op = LLVMAtomicRMWBinOpMin;
-         break;
-      case nir_intrinsic_ssbo_atomic_imax:
-      case nir_intrinsic_shared_atomic_imax:
-         op = LLVMAtomicRMWBinOpMax;
-         break;
-      case nir_intrinsic_shared_atomic_fadd:
-      case nir_intrinsic_ssbo_atomic_fadd:
-         op = LLVMAtomicRMWBinOpFAdd;
-         break;
-#if LLVM_VERSION_MAJOR >= 15
-      case nir_intrinsic_shared_atomic_fmin:
-      case nir_intrinsic_ssbo_atomic_fmin:
-         op = LLVMAtomicRMWBinOpFMin;
-         break;
-      case nir_intrinsic_shared_atomic_fmax:
-      case nir_intrinsic_ssbo_atomic_fmax:
-         op = LLVMAtomicRMWBinOpFMax;
-         break;
-#endif
-      default:
-         unreachable("unknown atomic op");
-      }
-      scalar = LLVMBuildAtomicRMW(builder, op,
+      scalar = LLVMBuildAtomicRMW(builder, lp_translate_atomic_op(nir_op),
                                   scalar_ptr, value_ptr,
                                   LLVMAtomicOrderingSequentiallyConsistent,
                                   false);