From 107cd5f5f63602796cd825858bf8dfcdf53fdce2 Mon Sep 17 00:00:00 2001 From: Tobias Grosser Date: Sat, 6 Feb 2016 21:23:39 +0000 Subject: [PATCH] IslNodeBuilder: Invariant load hoisting of elements with differing sizes Always use access-instruction pointer type to load the invariant values. Otherwise mismatches between ScopArrayInfo element type and memory access element type will result in invalid casts. These type mismatches are after r259784 a lot more common and also arise with types of different size, which have not been handled before. Interestingly, this change actually simplifies the code, as we now have only one code path that is always taken, rather then a standard code path for the common case and a "fixup" code path that replaces the standard code path in case of mismatching types. llvm-svn: 260009 --- polly/lib/CodeGen/IslNodeBuilder.cpp | 25 +++------- .../invariant_load_different_sized_types.ll | 54 ++++++++++++++++++++++ .../Isl/CodeGen/multiple-types-invariant-load.ll | 2 +- ...iant_load_access_classes_different_base_type.ll | 6 +-- ..._access_classes_different_base_type_escaping.ll | 4 +- ...ess_classes_different_base_type_same_pointer.ll | 10 ++-- ...es_different_base_type_same_pointer_escaping.ll | 8 ++-- 7 files changed, 75 insertions(+), 34 deletions(-) create mode 100644 polly/test/Isl/CodeGen/invariant_load_different_sized_types.ll diff --git a/polly/lib/CodeGen/IslNodeBuilder.cpp b/polly/lib/CodeGen/IslNodeBuilder.cpp index 21a4e6a..a2a5698 100644 --- a/polly/lib/CodeGen/IslNodeBuilder.cpp +++ b/polly/lib/CodeGen/IslNodeBuilder.cpp @@ -900,32 +900,21 @@ Value *IslNodeBuilder::preloadUnconditionally(isl_set *AccessRange, PWAccRel = isl_pw_multi_aff_gist_params(PWAccRel, S.getContext()); isl_ast_expr *Access = isl_ast_build_access_from_pw_multi_aff(Build, PWAccRel); - Value *PreloadVal = ExprBuilder.create(Access); - - if (LoadInst *PreloadInst = dyn_cast(PreloadVal)) - PreloadInst->setAlignment(dyn_cast(AccInst)->getAlignment()); + auto *Address = isl_ast_expr_address_of(Access); + auto *AddressValue = ExprBuilder.create(Address); + Value *PreloadVal; // Correct the type as the SAI might have a different type than the user // expects, especially if the base pointer is a struct. Type *Ty = AccInst->getType(); - if (Ty == PreloadVal->getType()) - return PreloadVal; - if (!Ty->isFloatingPointTy() && !PreloadVal->getType()->isFloatingPointTy()) - return PreloadVal = Builder.CreateBitOrPointerCast(PreloadVal, Ty); - - // We do not want to cast floating point to non-floating point types and vice - // versa, thus we simply create a new load with a casted pointer expression. - auto *LInst = dyn_cast(PreloadVal); - assert(LInst && "Preloaded value was not a load instruction"); - auto *Ptr = LInst->getPointerOperand(); - Ptr = Builder.CreatePointerCast(Ptr, Ty->getPointerTo(), - Ptr->getName() + ".cast"); - PreloadVal = Builder.CreateLoad(Ptr, LInst->getName()); + auto *Ptr = AddressValue; + auto Name = Ptr->getName(); + Ptr = Builder.CreatePointerCast(Ptr, Ty->getPointerTo(), Name + ".cast"); + PreloadVal = Builder.CreateLoad(Ptr, Name + ".load"); if (LoadInst *PreloadInst = dyn_cast(PreloadVal)) PreloadInst->setAlignment(dyn_cast(AccInst)->getAlignment()); - LInst->eraseFromParent(); return PreloadVal; } diff --git a/polly/test/Isl/CodeGen/invariant_load_different_sized_types.ll b/polly/test/Isl/CodeGen/invariant_load_different_sized_types.ll new file mode 100644 index 0000000..32008bd --- /dev/null +++ b/polly/test/Isl/CodeGen/invariant_load_different_sized_types.ll @@ -0,0 +1,54 @@ +; RUN: opt %loadPolly -polly-codegen -S < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +; CHECK: polly.preload.begin: ; preds = %polly.split_new_and_old +; CHECK-NEXT: %polly.access.cast.tmp2 = bitcast %struct.hoge* %tmp2 to i32* +; CHECK-NEXT: %polly.access.tmp2 = getelementptr i32, i32* %polly.access.cast.tmp2, i64 1 +; CHECK-NEXT: %polly.access.tmp2.load = load i32, i32* %polly.access.tmp2, align 1 +; CHECK-NEXT: store i32 %polly.access.tmp2.load, i32* %tmp.preload.s2a + + +%struct.hoge = type { [4 x i8], i32, i32, i32, i32, i32, [16 x i8], [16 x i8], i64, i64, i64, i64, i64 } + +; Function Attrs: nounwind uwtable +define void @widget() #0 { +bb: + %tmp2 = alloca %struct.hoge, align 1 + br label %bb3 + +bb3: ; preds = %bb + %tmp4 = getelementptr inbounds %struct.hoge, %struct.hoge* %tmp2, i64 0, i32 10 + %tmp5 = add nsw i32 undef, 1 + %tmp6 = getelementptr inbounds %struct.hoge, %struct.hoge* %tmp2, i64 0, i32 1 + %tmp = load i32, i32* %tmp6, align 1, !tbaa !1 + %tmp7 = icmp slt i32 %tmp, 3 + br i1 %tmp7, label %bb8, label %bb10 + +bb8: ; preds = %bb3 + %tmp9 = load i64, i64* %tmp4, align 1, !tbaa !7 + br label %bb10 + +bb10: ; preds = %bb8, %bb3 + %tmp11 = icmp eq i32 %tmp5, 0 + br i1 %tmp11, label %bb13, label %bb12 + +bb12: ; preds = %bb10 + unreachable + +bb13: ; preds = %bb10 + unreachable +} + +attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.ident = !{!0} + +!0 = !{!"clang version 3.9.0 (trunk 259751) (llvm/trunk 259771)"} +!1 = !{!2, !5, i64 4} +!2 = !{!"itsf_header_tag", !3, i64 0, !5, i64 4, !5, i64 8, !5, i64 12, !5, i64 16, !5, i64 20, !3, i64 24, !3, i64 40, !6, i64 56, !6, i64 64, !6, i64 72, !6, i64 80, !6, i64 88} +!3 = !{!"omnipotent char", !4, i64 0} +!4 = !{!"Simple C/C++ TBAA"} +!5 = !{!"int", !3, i64 0} +!6 = !{!"long", !3, i64 0} +!7 = !{!2, !6, i64 72} diff --git a/polly/test/Isl/CodeGen/multiple-types-invariant-load.ll b/polly/test/Isl/CodeGen/multiple-types-invariant-load.ll index 759142f..52c2275 100644 --- a/polly/test/Isl/CodeGen/multiple-types-invariant-load.ll +++ b/polly/test/Isl/CodeGen/multiple-types-invariant-load.ll @@ -7,7 +7,7 @@ ; CHECK: %polly.access.cast.global.load1 = bitcast %struct.hoge* %global.load to i32* ; CHECK: %polly.access.global.load2 = getelementptr i32, i32* %polly.access.cast.global.load1, i64 2 ; CHECK: %polly.access.global.load2.cast = bitcast i32* %polly.access.global.load2 to double* -; CHECK: %polly.access.global.load2.load3 = load double, double* %polly.access.global.load2.cast +; CHECK: %polly.access.global.load2.load = load double, double* %polly.access.global.load2.cast target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/polly/test/ScopInfo/invariant_load_access_classes_different_base_type.ll b/polly/test/ScopInfo/invariant_load_access_classes_different_base_type.ll index 5410146..52a4f2f 100644 --- a/polly/test/ScopInfo/invariant_load_access_classes_different_base_type.ll +++ b/polly/test/ScopInfo/invariant_load_access_classes_different_base_type.ll @@ -25,12 +25,12 @@ ; ; CODEGEN: %.load = load i32, i32* getelementptr inbounds (%struct.anon, %struct.anon* @S, i32 0, i32 0) ; CODEGEN: store i32 %.load, i32* %S.a.preload.s2a -; CODEGEN: %.load12 = load float, float* bitcast (i32* getelementptr (i32, i32* getelementptr inbounds (%struct.anon, %struct.anon* @S, i32 0, i32 0), i64 1) to float*) -; CODEGEN: store float %.load12, float* %S.b.preload.s2a +; CODEGEN: %.load1 = load float, float* bitcast (i32* getelementptr (i32, i32* getelementptr inbounds (%struct.anon, %struct.anon* @S, i32 0, i32 0), i64 1) to float*) +; CODEGEN: store float %.load1, float* %S.b.preload.s2a ; ; CODEGEN: polly.stmt.for.body: ; CODEGEN: %p_conv = sitofp i32 %.load to float -; CODEGEN: %p_add = fadd float %p_conv, %.load12 +; CODEGEN: %p_add = fadd float %p_conv, %.load1 ; CODEGEN: %p_conv1 = fptosi float %p_add to i32 %struct.anon = type { i32, float } diff --git a/polly/test/ScopInfo/invariant_load_access_classes_different_base_type_escaping.ll b/polly/test/ScopInfo/invariant_load_access_classes_different_base_type_escaping.ll index f6e3dc8..8f725fe 100644 --- a/polly/test/ScopInfo/invariant_load_access_classes_different_base_type_escaping.ll +++ b/polly/test/ScopInfo/invariant_load_access_classes_different_base_type_escaping.ll @@ -44,8 +44,8 @@ ; CODEGEN: polly.preload.begin: ; CODEGEN: %.load = load i32, i32* getelementptr inbounds (%struct.anon, %struct.anon* @S, i32 0, i32 0) ; CODEGEN: store i32 %.load, i32* %S.a.preload.s2a -; CODEGEN: %.load12 = load float, float* bitcast (i32* getelementptr (i32, i32* getelementptr inbounds (%struct.anon, %struct.anon* @S, i32 0, i32 0), i64 1) to float*) -; CODEGEN: store float %.load12, float* %S.b.preload.s2a +; CODEGEN: %.load1 = load float, float* bitcast (i32* getelementptr (i32, i32* getelementptr inbounds (%struct.anon, %struct.anon* @S, i32 0, i32 0), i64 1) to float*) +; CODEGEN: store float %.load1, float* %S.b.preload.s2a ; ; CODEGEN: polly.merge_new_and_old: ; CODEGEN-DAG: %S.b.merge = phi float [ %S.b.final_reload, %polly.exiting ], [ %S.b, %do.cond ] diff --git a/polly/test/ScopInfo/invariant_load_access_classes_different_base_type_same_pointer.ll b/polly/test/ScopInfo/invariant_load_access_classes_different_base_type_same_pointer.ll index 795fd28..3b6b24d 100644 --- a/polly/test/ScopInfo/invariant_load_access_classes_different_base_type_same_pointer.ll +++ b/polly/test/ScopInfo/invariant_load_access_classes_different_base_type_same_pointer.ll @@ -28,11 +28,9 @@ ; CODEGEN: br label %polly.split_new_and_old ; ; CODEGEN: polly.preload.begin: -; CODEGEN: %U.load1 = load float, float* bitcast (i32* @U to float*) -; TODO FIXME There should not be a bitcast but either a real conversion or -; another load as one type is FP the other is not. -; CODEGEN: %0 = bitcast float %U.load1 to i32 -; CODEGEN: store float %U.load1, float* %U.f.preload.s2a +; CODEGEN: %U.load = load float, float* bitcast (i32* @U to float*) +; CODEGEN: %0 = bitcast float %U.load to i32 +; CODEGEN: store float %U.load, float* %U.f.preload.s2a ; ; CODEGEN: polly.merge_new_and_old: ; CODEGEN-NOT: merge = phi @@ -41,7 +39,7 @@ ; CODEGEN-NOT: final_reload ; ; CODEGEN: polly.stmt.for.body: -; CODEGEN: %p_conv = fptosi float %U.load1 to i32 +; CODEGEN: %p_conv = fptosi float %U.load to i32 ; CODEGEN: %p_add = add nsw i32 %0, %p_conv ; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/polly/test/ScopInfo/invariant_load_access_classes_different_base_type_same_pointer_escaping.ll b/polly/test/ScopInfo/invariant_load_access_classes_different_base_type_same_pointer_escaping.ll index 96353dd..b208859 100644 --- a/polly/test/ScopInfo/invariant_load_access_classes_different_base_type_same_pointer_escaping.ll +++ b/polly/test/ScopInfo/invariant_load_access_classes_different_base_type_same_pointer_escaping.ll @@ -33,9 +33,9 @@ ; CODEGEN: br label %polly.split_new_and_old ; ; CODEGEN: polly.preload.begin: -; CODEGEN: %U.load1 = load float, float* bitcast (i32* @U to float*) -; CODEGEN: %0 = bitcast float %U.load1 to i32 -; CODEGEN: store float %U.load1, float* %U.f.preload.s2a +; CODEGEN: %U.load = load float, float* bitcast (i32* @U to float*) +; CODEGEN: %0 = bitcast float %U.load to i32 +; CODEGEN: store float %U.load, float* %U.f.preload.s2a ; ; CODEGEN: polly.merge_new_and_old: ; CODEGEN-DAG: %U.f.merge = phi float [ %U.f.final_reload, %polly.exiting ], [ %U.f, %do.cond ] @@ -47,7 +47,7 @@ ; CODEGEN-DAG: %5 = bitcast float %U.i.final_reload to i32 ; ; CODEGEN: polly.stmt.do.body: -; CODEGEN: %p_conv = fptosi float %U.load1 to i32 +; CODEGEN: %p_conv = fptosi float %U.load to i32 ; CODEGEN: %p_add = add nsw i32 %0, %p_conv ; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -- 2.7.4