From 0935875c40b33e80c17c3fcf489c3aa2aa5ef1d6 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Sat, 25 Mar 2017 20:20:23 +0000 Subject: [PATCH] Change the default attributes for llvm.prefetch to inaccessiblemem_or_argmemonly so that we can perform some optimizations across it. Fixes PR32365 llvm-svn: 298781 --- llvm/include/llvm/IR/Intrinsics.td | 14 +++++----- llvm/test/Bitcode/compatibility-3.6.ll | 5 ++-- llvm/test/Bitcode/compatibility-3.7.ll | 5 ++-- llvm/test/Bitcode/compatibility-3.8.ll | 5 ++-- llvm/test/Bitcode/compatibility-3.9.ll | 9 +++--- llvm/test/Bitcode/compatibility-4.0.ll | 9 +++--- llvm/test/Bitcode/compatibility.ll | 9 +++--- llvm/test/Transforms/InstCombine/prefetch-load.ll | 34 +++++++++++++++++++++++ 8 files changed, 65 insertions(+), 25 deletions(-) create mode 100644 llvm/test/Transforms/InstCombine/prefetch-load.ll diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index c0305a0..c1a36f1 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -331,13 +331,13 @@ def int_get_dynamic_area_offset : Intrinsic<[llvm_anyint_ty]>; def int_thread_pointer : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>, GCCBuiltin<"__builtin_thread_pointer">; -// IntrArgMemOnly is more pessimistic than strictly necessary for prefetch, -// however it does conveniently prevent the prefetch from being reordered -// with respect to nearby accesses to the same memory. -def int_prefetch : Intrinsic<[], - [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty], - [IntrArgMemOnly, NoCapture<0>]>; +// IntrInaccessibleMemOrArgMemOnly is a little more pessimistic than strictly +// necessary for prefetch, however it does conveniently prevent the prefetch +// from being reordered overly much with respect to nearby access to the same +// memory while not impeding optimization. +def int_prefetch + : Intrinsic<[], [ llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty ], + [ IntrInaccessibleMemOrArgMemOnly, ReadOnly<0>, NoCapture<0> ]>; def int_pcmarker : Intrinsic<[], [llvm_i32_ty]>; def int_readcyclecounter : Intrinsic<[llvm_i64_ty]>; diff --git a/llvm/test/Bitcode/compatibility-3.6.ll b/llvm/test/Bitcode/compatibility-3.6.ll index 87958fc..8d51ee1 100644 --- a/llvm/test/Bitcode/compatibility-3.6.ll +++ b/llvm/test/Bitcode/compatibility-3.6.ll @@ -981,7 +981,7 @@ exit: ; CHECK: select <2 x i1> , <2 x i8> , <2 x i8> call void @f.nobuiltin() builtin - ; CHECK: call void @f.nobuiltin() #33 + ; CHECK: call void @f.nobuiltin() #34 call fastcc noalias i32* @f.noalias() noinline ; CHECK: call fastcc noalias i32* @f.noalias() #11 @@ -1183,7 +1183,8 @@ define void @intrinsics.codegen() { ; CHECK: attributes #30 = { argmemonly nounwind readonly } ; CHECK: attributes #31 = { argmemonly nounwind } ; CHECK: attributes #32 = { nounwind readonly } -; CHECK: attributes #33 = { builtin } +; CHECK: attributes #33 = { inaccessiblemem_or_argmemonly nounwind } +; CHECK: attributes #34 = { builtin } ;; Metadata diff --git a/llvm/test/Bitcode/compatibility-3.7.ll b/llvm/test/Bitcode/compatibility-3.7.ll index 4ae0aed..ebdf4c3 100644 --- a/llvm/test/Bitcode/compatibility-3.7.ll +++ b/llvm/test/Bitcode/compatibility-3.7.ll @@ -1022,7 +1022,7 @@ exit: ; CHECK: select <2 x i1> , <2 x i8> , <2 x i8> call void @f.nobuiltin() builtin - ; CHECK: call void @f.nobuiltin() #36 + ; CHECK: call void @f.nobuiltin() #37 call fastcc noalias i32* @f.noalias() noinline ; CHECK: call fastcc noalias i32* @f.noalias() #12 @@ -1246,7 +1246,8 @@ define void @misc.metadata() { ; CHECK: attributes #33 = { argmemonly nounwind readonly } ; CHECK: attributes #34 = { argmemonly nounwind } ; CHECK: attributes #35 = { nounwind readonly } -; CHECK: attributes #36 = { builtin } +; CHECK: attributes #36 = { inaccessiblemem_or_argmemonly nounwind } +; CHECK: attributes #37 = { builtin } ;; Metadata diff --git a/llvm/test/Bitcode/compatibility-3.8.ll b/llvm/test/Bitcode/compatibility-3.8.ll index 79c1ecf..57ea3e0 100644 --- a/llvm/test/Bitcode/compatibility-3.8.ll +++ b/llvm/test/Bitcode/compatibility-3.8.ll @@ -1170,7 +1170,7 @@ exit: ; CHECK: select <2 x i1> , <2 x i8> , <2 x i8> call void @f.nobuiltin() builtin - ; CHECK: call void @f.nobuiltin() #39 + ; CHECK: call void @f.nobuiltin() #40 call fastcc noalias i32* @f.noalias() noinline ; CHECK: call fastcc noalias i32* @f.noalias() #12 @@ -1556,7 +1556,8 @@ normal: ; CHECK: attributes #36 = { argmemonly nounwind readonly } ; CHECK: attributes #37 = { argmemonly nounwind } ; CHECK: attributes #38 = { nounwind readonly } -; CHECK: attributes #39 = { builtin } +; CHECK: attributes #39 = { inaccessiblemem_or_argmemonly nounwind } +; CHECK: attributes #40 = { builtin } ;; Metadata diff --git a/llvm/test/Bitcode/compatibility-3.9.ll b/llvm/test/Bitcode/compatibility-3.9.ll index 300be33..2a6cfe1 100644 --- a/llvm/test/Bitcode/compatibility-3.9.ll +++ b/llvm/test/Bitcode/compatibility-3.9.ll @@ -1241,7 +1241,7 @@ exit: ; CHECK: select <2 x i1> , <2 x i8> , <2 x i8> call void @f.nobuiltin() builtin - ; CHECK: call void @f.nobuiltin() #40 + ; CHECK: call void @f.nobuiltin() #41 call fastcc noalias i32* @f.noalias() noinline ; CHECK: call fastcc noalias i32* @f.noalias() #12 @@ -1588,7 +1588,7 @@ normal: } declare void @f.writeonly() writeonly -; CHECK: declare void @f.writeonly() #39 +; CHECK: declare void @f.writeonly() #40 ; CHECK: attributes #0 = { alignstack=4 } ; CHECK: attributes #1 = { alignstack=8 } @@ -1629,8 +1629,9 @@ declare void @f.writeonly() writeonly ; CHECK: attributes #36 = { argmemonly nounwind readonly } ; CHECK: attributes #37 = { argmemonly nounwind } ; CHECK: attributes #38 = { nounwind readonly } -; CHECK: attributes #39 = { writeonly } -; CHECK: attributes #40 = { builtin } +; CHECK: attributes #39 = { inaccessiblemem_or_argmemonly nounwind } +; CHECK: attributes #40 = { writeonly } +; CHECK: attributes #41 = { builtin } ;; Metadata diff --git a/llvm/test/Bitcode/compatibility-4.0.ll b/llvm/test/Bitcode/compatibility-4.0.ll index fa7a9b1..c83c107 100644 --- a/llvm/test/Bitcode/compatibility-4.0.ll +++ b/llvm/test/Bitcode/compatibility-4.0.ll @@ -1241,7 +1241,7 @@ exit: ; CHECK: select <2 x i1> , <2 x i8> , <2 x i8> call void @f.nobuiltin() builtin - ; CHECK: call void @f.nobuiltin() #40 + ; CHECK: call void @f.nobuiltin() #41 call fastcc noalias i32* @f.noalias() noinline ; CHECK: call fastcc noalias i32* @f.noalias() #12 @@ -1606,7 +1606,7 @@ normal: declare void @f.writeonly() writeonly -; CHECK: declare void @f.writeonly() #39 +; CHECK: declare void @f.writeonly() #40 ;; Constant Expressions @@ -1654,8 +1654,9 @@ define i8** @constexpr() { ; CHECK: attributes #36 = { argmemonly nounwind readonly } ; CHECK: attributes #37 = { argmemonly nounwind } ; CHECK: attributes #38 = { nounwind readonly } -; CHECK: attributes #39 = { writeonly } -; CHECK: attributes #40 = { builtin } +; CHECK: attributes #39 = { inaccessiblemem_or_argmemonly nounwind } +; CHECK: attributes #40 = { writeonly } +; CHECK: attributes #41 = { builtin } ;; Metadata diff --git a/llvm/test/Bitcode/compatibility.ll b/llvm/test/Bitcode/compatibility.ll index e2b13f4..f1a883e 100644 --- a/llvm/test/Bitcode/compatibility.ll +++ b/llvm/test/Bitcode/compatibility.ll @@ -1244,7 +1244,7 @@ exit: ; CHECK: select <2 x i1> , <2 x i8> , <2 x i8> call void @f.nobuiltin() builtin - ; CHECK: call void @f.nobuiltin() #40 + ; CHECK: call void @f.nobuiltin() #41 call fastcc noalias i32* @f.noalias() noinline ; CHECK: call fastcc noalias i32* @f.noalias() #12 @@ -1609,7 +1609,7 @@ normal: declare void @f.writeonly() writeonly -; CHECK: declare void @f.writeonly() #39 +; CHECK: declare void @f.writeonly() #40 ;; Constant Expressions @@ -1657,8 +1657,9 @@ define i8** @constexpr() { ; CHECK: attributes #36 = { argmemonly nounwind readonly } ; CHECK: attributes #37 = { argmemonly nounwind } ; CHECK: attributes #38 = { nounwind readonly } -; CHECK: attributes #39 = { writeonly } -; CHECK: attributes #40 = { builtin } +; CHECK: attributes #39 = { inaccessiblemem_or_argmemonly nounwind } +; CHECK: attributes #40 = { writeonly } +; CHECK: attributes #41 = { builtin } ;; Metadata diff --git a/llvm/test/Transforms/InstCombine/prefetch-load.ll b/llvm/test/Transforms/InstCombine/prefetch-load.ll new file mode 100644 index 0000000..f98b7ae --- /dev/null +++ b/llvm/test/Transforms/InstCombine/prefetch-load.ll @@ -0,0 +1,34 @@ +; RUN: opt < %s -instcombine -S | FileCheck %s + +%struct.C = type { %struct.C*, i32 } + +; Check that we instcombine the load across the prefetch. + +; CHECK-LABEL: define signext i32 @foo +define signext i32 @foo(%struct.C* %c) local_unnamed_addr #0 { +; CHECK: store i32 %dec, i32* %length_ +; CHECK-NOT: load +; CHECK: llvm.prefetch +; CHECK-NEXT: ret +entry: + %next_ = getelementptr inbounds %struct.C, %struct.C* %c, i32 0, i32 0 + %0 = load %struct.C*, %struct.C** %next_, align 8 + %next_1 = getelementptr inbounds %struct.C, %struct.C* %0, i32 0, i32 0 + %1 = load %struct.C*, %struct.C** %next_1, align 8 + store %struct.C* %1, %struct.C** %next_, align 8 + %length_ = getelementptr inbounds %struct.C, %struct.C* %c, i32 0, i32 1 + %2 = load i32, i32* %length_, align 8 + %dec = add nsw i32 %2, -1 + store i32 %dec, i32* %length_, align 8 + %3 = bitcast %struct.C* %1 to i8* + call void @llvm.prefetch(i8* %3, i32 0, i32 0, i32 1) + %4 = load i32, i32* %length_, align 8 + ret i32 %4 +} + +; Function Attrs: inaccessiblemem_or_argmemonly nounwind +declare void @llvm.prefetch(i8* nocapture readonly, i32, i32, i32) + +attributes #0 = { noinline nounwind } +; We've explicitly removed the function attrs from llvm.prefetch so we get the defaults. +; attributes #1 = { inaccessiblemem_or_argmemonly nounwind } -- 2.7.4