Add nocapture to pointer parameters of masked stores/loads

author Benjamin Maxwell <benjamin.maxwell@arm.com>

Mon, 24 Oct 2022 09:41:19 +0000 (09:41 +0000)

committer Matt Devereau <matthew.devereau@arm.com>

Mon, 24 Oct 2022 11:15:55 +0000 (11:15 +0000)
author Benjamin Maxwell <benjamin.maxwell@arm.com>
Mon, 24 Oct 2022 09:41:19 +0000 (09:41 +0000)
committer Matt Devereau <matthew.devereau@arm.com>
Mon, 24 Oct 2022 11:15:55 +0000 (11:15 +0000)
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td

index 2d0b8bc..025dd80 100644 (file)
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -669,7 +669,7 @@ def int_memset_inline
      : Intrinsic<[],
        [llvm_anyptr_ty, llvm_i8_ty, llvm_anyint_ty, llvm_i1_ty],
        [IntrWriteMem, IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback,
-       NoCapture<ArgIndex<0>>, WriteOnly<ArgIndex<0>>, 
+       NoCapture<ArgIndex<0>>, WriteOnly<ArgIndex<0>>,
         ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>]>;
  
  // FIXME: Add version of these floating point intrinsics which allow non-default
@@ -1799,14 +1799,15 @@ def int_masked_load:
    DefaultAttrsIntrinsic<[llvm_anyvector_ty],
              [LLVMAnyPointerType<LLVMMatchType<0>>, llvm_i32_ty,
               LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<0>],
-            [IntrReadMem, IntrArgMemOnly, IntrWillReturn, ImmArg<ArgIndex<1>>]>;
+            [IntrReadMem, IntrArgMemOnly, IntrWillReturn, ImmArg<ArgIndex<1>>,
+             NoCapture<ArgIndex<0>>]>;
  
  def int_masked_store:
    DefaultAttrsIntrinsic<[],
              [llvm_anyvector_ty, LLVMAnyPointerType<LLVMMatchType<0>>,
               llvm_i32_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
              [IntrWriteMem, IntrArgMemOnly, IntrWillReturn,
-             ImmArg<ArgIndex<2>>]>;
+             ImmArg<ArgIndex<2>>, NoCapture<ArgIndex<1>>]>;
  
  def int_masked_gather:
    DefaultAttrsIntrinsic<[llvm_anyvector_ty],
@@ -1824,13 +1825,14 @@ def int_masked_expandload:
    DefaultAttrsIntrinsic<[llvm_anyvector_ty],
              [LLVMPointerToElt<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
               LLVMMatchType<0>],
-            [IntrReadMem, IntrWillReturn]>;
+            [IntrReadMem, IntrWillReturn, NoCapture<ArgIndex<0>>]>;
  
  def int_masked_compressstore:
    DefaultAttrsIntrinsic<[],
              [llvm_anyvector_ty, LLVMPointerToElt<0>,
               LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
-            [IntrWriteMem, IntrArgMemOnly, IntrWillReturn]>;
+            [IntrWriteMem, IntrArgMemOnly, IntrWillReturn,
+             NoCapture<ArgIndex<1>>]>;
  
  // Test whether a pointer is associated with a type metadata identifier.
  def int_type_test : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty, llvm_metadata_ty],
diff --git a/llvm/test/Assembler/masked-load-store-intrinsics-attributes.ll b/llvm/test/Assembler/masked-load-store-intrinsics-attributes.ll

new file mode 100644 (file)

index 0000000..afa90ec
--- /dev/null
+++ b/llvm/test/Assembler/masked-load-store-intrinsics-attributes.ll
@@ -0,0 +1,20 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+
+; Make sure some masked/load store intrinsics have the expected attributes
+; Specifically `nocapture' should be added to the pointer paramters for the loads/stores
+
+; CHECK: declare <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr nocapture, i32 immarg, <vscale x 2 x i1>, <vscale x 2 x i64>) [[ARGMEMONLY_NOCALLBACK_NOFREE_NOSYNC_NOUNWIND_READONLY_WILLRETURN:#[0-9]+]]
+declare <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr, i32, <vscale x 2 x i1>, <vscale x 2 x i64>)
+
+; CHECK: declare void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64>, ptr nocapture, i32 immarg, <vscale x 2 x i1>) [[ARGMEMONLY_NOCALLBACK_NOFREE_NOSYNC_NOUNWIND_WILLRETURN_WRITEONLY:#[0-9]+]]
+declare void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64>, ptr, i32, <vscale x 2 x i1>)
+
+; CHECK: declare <16 x float> @llvm.masked.expandload.v16f32(ptr nocapture, <16 x i1>, <16 x float>) [[NOCALLBACK_NOFREE_NOSYNC_NOUNWIND_READONLY_WILLRETURN:#[0-9]+]]
+declare <16 x float> @llvm.masked.expandload.v16f32 (ptr, <16 x i1>, <16 x float>)
+
+; CHECK: declare void @llvm.masked.compressstore.v8i32(<8 x i32>, ptr nocapture, <8 x i1>) [[ARGMEMONLY_NOCALLBACK_NOFREE_NOSYNC_NOUNWIND_WILLRETURN_WRITEONLY:#[0-9]+]]
+declare void @llvm.masked.compressstore.v8i32(<8 x i32>, ptr, <8  x i1>)
+
+; CHECK: attributes [[ARGMEMONLY_NOCALLBACK_NOFREE_NOSYNC_NOUNWIND_READONLY_WILLRETURN]] = { argmemonly nocallback nofree nosync nounwind readonly willreturn }
+; CHECK: attributes [[ARGMEMONLY_NOCALLBACK_NOFREE_NOSYNC_NOUNWIND_WILLRETURN_WRITEONLY]] = { argmemonly nocallback nofree nosync nounwind willreturn writeonly }
+; CHECK: attributes [[NOCALLBACK_NOFREE_NOSYNC_NOUNWIND_READONLY_WILLRETURN]] = { nocallback nofree nosync nounwind readonly willreturn }
diff --git a/llvm/test/Transforms/InstCombine/load-store-masked-constant-array.ll b/llvm/test/Transforms/InstCombine/load-store-masked-constant-array.ll

index 78506f1..58a9c81 100644 (file)
--- a/llvm/test/Transforms/InstCombine/load-store-masked-constant-array.ll
+++ b/llvm/test/Transforms/InstCombine/load-store-masked-constant-array.ll
@@ -1,16 +1,14 @@
  ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -opaque-pointers -passes=instcombine < %s | FileCheck %s
+; RUN: opt -S -passes=instcombine < %s | FileCheck %s
  
  @contant_int_array = private unnamed_addr constant [10 x i64] [i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9]
  
  ; InstCombine should be able to optimize out the alloca and memcpy:
  define void @combine_masked_load_store_from_constant_array(ptr %ptr) {
  ; CHECK-LABEL: @combine_masked_load_store_from_constant_array(
-; CHECK-NEXT:    [[TMP1:%.*]] = alloca [10 x i64], align 8
-; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(80) [[TMP1]], ptr noundef nonnull align 16 dereferenceable(80) @contant_int_array, i64 80, i1 false)
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilelt.nxv2i1.i32(i32 0, i32 10)
-; CHECK-NEXT:    [[TMP3:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr nonnull [[TMP1]], i32 8, <vscale x 2 x i1> [[TMP2]], <vscale x 2 x i64> zeroinitializer)
-; CHECK-NEXT:    call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP3]], ptr [[PTR:%.*]], i32 1, <vscale x 2 x i1> [[TMP2]])
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilelt.nxv2i1.i32(i32 0, i32 10)
+; CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr nonnull @contant_int_array, i32 8, <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> zeroinitializer)
+; CHECK-NEXT:    call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP2]], ptr [[PTR:%.*]], i32 1, <vscale x 2 x i1> [[TMP1]])
  ; CHECK-NEXT:    ret void
  ;
    %1 = alloca [10 x i64]
@@ -21,7 +19,22 @@ define void @combine_masked_load_store_from_constant_array(ptr %ptr) {
    ret void
  }
  
+define void @combine_masked_expandload_compressstore_from_constant_array(ptr %ptr) {
+; CHECK-LABEL: @combine_masked_expandload_compressstore_from_constant_array(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <10 x i64> @llvm.masked.expandload.v10i64(ptr nonnull @contant_int_array, <10 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <10 x i64> zeroinitializer)
+; CHECK-NEXT:    call void @llvm.masked.compressstore.v10i64(<10 x i64> [[TMP1]], ptr [[PTR:%.*]], <10 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+; CHECK-NEXT:    ret void
+;
+  %1 = alloca [10 x i64]
+  call void @llvm.memcpy.p0.p0.i64(ptr %1, ptr @contant_int_array, i64 80, i1 false)
+  %2 = call <10 x i64> @llvm.masked.expandload.v10i64(ptr nonnull %1, <10 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <10 x i64> zeroinitializer)
+  call void @llvm.masked.compressstore.nxv10i64.p0(<10 x i64> %2, ptr %ptr, <10 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>)
+  ret void
+}
+
  declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1)
  declare <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr, i32, <vscale x 2 x i1>, <vscale x 2 x i64>)
  declare void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64>, ptr, i32, <vscale x 2 x i1>)
  declare <vscale x 2 x i1> @llvm.aarch64.sve.whilelt.nxv2i1.i32(i32, i32)
+declare <10 x i64> @llvm.masked.expandload.v10i64(ptr, <10 x i1>,  <10 x i64>)
+declare void @llvm.masked.compressstore.nxv10i64.p0(<10 x i64>, ptr, <10 x i1>)
diff --git a/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir b/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir

index 78fc159..e6ff83f 100644 (file)
--- a/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir
+++ b/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir
@@ -752,12 +752,12 @@ llvm.func @lifetime(%p: !llvm.ptr) {
  // CHECK-DAG: declare <48 x float> @llvm.matrix.column.major.load.v48f32.i64(ptr nocapture, i64, i1 immarg, i32 immarg, i32 immarg)
  // CHECK-DAG: declare void @llvm.matrix.column.major.store.v48f32.i64(<48 x float>, ptr nocapture writeonly, i64, i1 immarg, i32 immarg, i32 immarg)
  // CHECK-DAG: declare <7 x i1> @llvm.get.active.lane.mask.v7i1.i64(i64, i64)
-// CHECK-DAG: declare <7 x float> @llvm.masked.load.v7f32.p0(ptr, i32 immarg, <7 x i1>, <7 x float>)
-// CHECK-DAG: declare void @llvm.masked.store.v7f32.p0(<7 x float>, ptr, i32 immarg, <7 x i1>)
+// CHECK-DAG: declare <7 x float> @llvm.masked.load.v7f32.p0(ptr nocapture, i32 immarg, <7 x i1>, <7 x float>)
+// CHECK-DAG: declare void @llvm.masked.store.v7f32.p0(<7 x float>, ptr nocapture, i32 immarg, <7 x i1>)
  // CHECK-DAG: declare <7 x float> @llvm.masked.gather.v7f32.v7p0(<7 x ptr>, i32 immarg, <7 x i1>, <7 x float>)
  // CHECK-DAG: declare void @llvm.masked.scatter.v7f32.v7p0(<7 x float>, <7 x ptr>, i32 immarg, <7 x i1>)
-// CHECK-DAG: declare <7 x float> @llvm.masked.expandload.v7f32(ptr, <7 x i1>, <7 x float>)
-// CHECK-DAG: declare void @llvm.masked.compressstore.v7f32(<7 x float>, ptr, <7 x i1>)
+// CHECK-DAG: declare <7 x float> @llvm.masked.expandload.v7f32(ptr nocapture, <7 x i1>, <7 x float>)
+// CHECK-DAG: declare void @llvm.masked.compressstore.v7f32(<7 x float>, ptr nocapture, <7 x i1>)
  // CHECK-DAG: declare void @llvm.memcpy.p0.p0.i32(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i32, i1 immarg)
  // CHECK-DAG: declare void @llvm.memcpy.inline.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64 immarg, i1 immarg)
  // CHECK-DAG: declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32)
author	Benjamin Maxwell <benjamin.maxwell@arm.com>
	Mon, 24 Oct 2022 09:41:19 +0000 (09:41 +0000)
committer	Matt Devereau <matthew.devereau@arm.com>
	Mon, 24 Oct 2022 11:15:55 +0000 (11:15 +0000)
llvm/include/llvm/IR/Intrinsics.td		patch \| blob \| history
llvm/test/Assembler/masked-load-store-intrinsics-attributes.ll	[new file with mode: 0644]	patch \| blob
llvm/test/Transforms/InstCombine/load-store-masked-constant-array.ll		patch \| blob \| history
mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir		patch \| blob \| history