From ed797a30497edd9010d45125f0fc949b7ded651e Mon Sep 17 00:00:00 2001 From: Stefan Maksimovic Date: Thu, 22 Feb 2018 13:40:42 +0000 Subject: [PATCH] [mips] Generate memory dependencies for byVal arguments There were no memory dependencies made between stores generated when lowering formal arguments and loads generated when call lowering byVal arguments which made the Post-RA scheduler place a load before a matching store. Make the fixed object stored to mutable so that the load instructions can have their memory dependencies added Set the frame object as isAliased which clears the underlying objects vector in ScheduleDAGInstrs::buildSchedGraph(). This results in addition of all stores as dependenies for loads. This problem appeared when passing a byVal parameter coupled with a fastcc function call. Differential Revision: https://reviews.llvm.org/D37515 llvm-svn: 325782 --- llvm/lib/Target/Mips/MipsISelLowering.cpp | 7 ++++++- llvm/test/CodeGen/Mips/fastcc_byval.ll | 27 +++++++++++++++++++++++++++ llvm/test/CodeGen/Mips/o32_cc_byval.ll | 3 +-- 3 files changed, 34 insertions(+), 3 deletions(-) create mode 100644 llvm/test/CodeGen/Mips/fastcc_byval.ll diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp index f739f9a..99abbff 100644 --- a/llvm/lib/Target/Mips/MipsISelLowering.cpp +++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp @@ -4063,7 +4063,12 @@ void MipsTargetLowering::copyByValRegs( // Create frame object. EVT PtrTy = getPointerTy(DAG.getDataLayout()); - int FI = MFI.CreateFixedObject(FrameObjSize, FrameObjOffset, true); + // Make the fixed object stored to mutable so that the load instructions + // referencing it have their memory dependencies added. + // Set the frame object as isAliased which clears the underlying objects + // vector in ScheduleDAGInstrs::buildSchedGraph() resulting in addition of all + // stores as dependencies for loads referencing this fixed object. + int FI = MFI.CreateFixedObject(FrameObjSize, FrameObjOffset, false, true); SDValue FIN = DAG.getFrameIndex(FI, PtrTy); InVals.push_back(FIN); diff --git a/llvm/test/CodeGen/Mips/fastcc_byval.ll b/llvm/test/CodeGen/Mips/fastcc_byval.ll new file mode 100644 index 0000000..4a55ac7 --- /dev/null +++ b/llvm/test/CodeGen/Mips/fastcc_byval.ll @@ -0,0 +1,27 @@ +; RUN: llc -mtriple=mipsel-linux-gnu -O3 -relocation-model=pic < %s | FileCheck %s + +; Test that a load comes after a store to the same memory location when passing +; a byVal parameter to a function which has a fastcc function call + +%struct.str = type { i32, i32, [3 x i32*] } + +declare fastcc void @_Z1F3str(%struct.str* noalias nocapture sret %agg.result, %struct.str* byval nocapture readonly align 4 %s) + +define i32 @_Z1g3str(%struct.str* byval nocapture readonly align 4 %s) { +; CHECK-LABEL: _Z1g3str: +; CHECK: sw $7, [[OFFSET:[0-9]+]]($sp) +; CHECK: lw ${{[0-9]+}}, [[OFFSET]]($sp) +entry: + %ref.tmp = alloca %struct.str, align 4 + %0 = bitcast %struct.str* %ref.tmp to i8* + call void @llvm.lifetime.start.p0i8(i64 20, i8* nonnull %0) + call fastcc void @_Z1F3str(%struct.str* nonnull sret %ref.tmp, %struct.str* byval nonnull align 4 %s) + %cl.sroa.3.0..sroa_idx2 = getelementptr inbounds %struct.str, %struct.str* %ref.tmp, i32 0, i32 1 + %cl.sroa.3.0.copyload = load i32, i32* %cl.sroa.3.0..sroa_idx2, align 4 + call void @llvm.lifetime.end.p0i8(i64 20, i8* nonnull %0) + ret i32 %cl.sroa.3.0.copyload +} + +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) + +declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) diff --git a/llvm/test/CodeGen/Mips/o32_cc_byval.ll b/llvm/test/CodeGen/Mips/o32_cc_byval.ll index 634d02d..3f267ad 100644 --- a/llvm/test/CodeGen/Mips/o32_cc_byval.ll +++ b/llvm/test/CodeGen/Mips/o32_cc_byval.ll @@ -243,10 +243,9 @@ define void @f5(i64 %a0, %struct.S4* nocapture byval %a1) nounwind { ; CHECK-NEXT: lw $7, 52($sp) ; CHECK-NEXT: lw $6, 48($sp) ; CHECK-NEXT: lw $5, 44($sp) -; CHECK-NEXT: lw $4, 40($sp) ; CHECK-NEXT: lw $25, %call16(f6)($gp) ; CHECK-NEXT: jalr $25 -; CHECK-NEXT: nop +; CHECK-NEXT: lw $4, 40($sp) ; CHECK-NEXT: lw $ra, 28($sp) # 4-byte Folded Reload ; CHECK-NEXT: jr $ra ; CHECK-NEXT: addiu $sp, $sp, 32 -- 2.7.4