From 29f553398fe653718b34b9b1c873df3008deafce Mon Sep 17 00:00:00 2001 From: Quentin Colombet Date: Sat, 24 Jan 2015 01:25:54 +0000 Subject: [PATCH] [AArch64][LoadStoreOptimizer] Form LDPSW when possible. This patch adds the missing LD[U]RSW variants to the load store optimizer, so that we generate LDPSW when possible. llvm-svn: 226978 --- .../Target/AArch64/AArch64LoadStoreOptimizer.cpp | 16 +++- llvm/test/CodeGen/AArch64/arm64-ldp.ll | 85 ++++++++++++++++++++++ 2 files changed, 100 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index 8157981..b4d97ed 100644 --- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -135,6 +135,8 @@ static bool isUnscaledLdst(unsigned Opc) { return true; case AArch64::LDURXi: return true; + case AArch64::LDURSWi: + return true; } } @@ -173,6 +175,9 @@ int AArch64LoadStoreOpt::getMemSize(MachineInstr *MemMI) { case AArch64::LDRXui: case AArch64::LDURXi: return 8; + case AArch64::LDRSWui: + case AArch64::LDURSWi: + return 4; } } @@ -210,6 +215,9 @@ static unsigned getMatchingPairOpcode(unsigned Opc) { case AArch64::LDRXui: case AArch64::LDURXi: return AArch64::LDPXi; + case AArch64::LDRSWui: + case AArch64::LDURSWi: + return AArch64::LDPSWi; } } @@ -237,6 +245,8 @@ static unsigned getPreIndexedOpcode(unsigned Opc) { return AArch64::LDRWpre; case AArch64::LDRXui: return AArch64::LDRXpre; + case AArch64::LDRSWui: + return AArch64::LDRSWpre; } } @@ -264,6 +274,8 @@ static unsigned getPostIndexedOpcode(unsigned Opc) { return AArch64::LDRWpost; case AArch64::LDRXui: return AArch64::LDRXpost; + case AArch64::LDRSWui: + return AArch64::LDRSWpost; } } @@ -780,6 +792,7 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) { case AArch64::LDRQui: case AArch64::LDRXui: case AArch64::LDRWui: + case AArch64::LDRSWui: // do the unscaled versions as well case AArch64::STURSi: case AArch64::STURDi: @@ -790,7 +803,8 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) { case AArch64::LDURDi: case AArch64::LDURQi: case AArch64::LDURWi: - case AArch64::LDURXi: { + case AArch64::LDURXi: + case AArch64::LDURSWi: { // If this is a volatile load/store, don't mess with it. if (MI->hasOrderedMemoryRef()) { ++MBBI; diff --git a/llvm/test/CodeGen/AArch64/arm64-ldp.ll b/llvm/test/CodeGen/AArch64/arm64-ldp.ll index 5a98626..a9fa4ca 100644 --- a/llvm/test/CodeGen/AArch64/arm64-ldp.ll +++ b/llvm/test/CodeGen/AArch64/arm64-ldp.ll @@ -12,6 +12,18 @@ define i32 @ldp_int(i32* %p) nounwind { ret i32 %add } +; CHECK: ldp_sext_int +; CHECK: ldpsw +define i64 @ldp_sext_int(i32* %p) nounwind { + %tmp = load i32* %p, align 4 + %add.ptr = getelementptr inbounds i32* %p, i64 1 + %tmp1 = load i32* %add.ptr, align 4 + %sexttmp = sext i32 %tmp to i64 + %sexttmp1 = sext i32 %tmp1 to i64 + %add = add nsw i64 %sexttmp1, %sexttmp + ret i64 %add +} + ; CHECK: ldp_long ; CHECK: ldp define i64 @ldp_long(i64* %p) nounwind { @@ -56,6 +68,21 @@ define i32 @ldur_int(i32* %a) nounwind { ret i32 %tmp3 } +define i64 @ldur_sext_int(i32* %a) nounwind { +; LDUR_CHK: ldur_sext_int +; LDUR_CHK: ldpsw [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-8] +; LDUR_CHK-NEXT: add x{{[0-9]+}}, [[DST2]], [[DST1]] +; LDUR_CHK-NEXT: ret + %p1 = getelementptr inbounds i32* %a, i32 -1 + %tmp1 = load i32* %p1, align 2 + %p2 = getelementptr inbounds i32* %a, i32 -2 + %tmp2 = load i32* %p2, align 2 + %sexttmp1 = sext i32 %tmp1 to i64 + %sexttmp2 = sext i32 %tmp2 to i64 + %tmp3 = add i64 %sexttmp1, %sexttmp2 + ret i64 %tmp3 +} + define i64 @ldur_long(i64* %a) nounwind ssp { ; LDUR_CHK: ldur_long ; LDUR_CHK: ldp [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-16] @@ -110,6 +137,22 @@ define i64 @pairUpBarelyIn(i64* %a) nounwind ssp { ret i64 %tmp3 } +define i64 @pairUpBarelyInSext(i32* %a) nounwind ssp { +; LDUR_CHK: pairUpBarelyInSext +; LDUR_CHK-NOT: ldur +; LDUR_CHK: ldpsw [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-256] +; LDUR_CHK-NEXT: add x{{[0-9]+}}, [[DST2]], [[DST1]] +; LDUR_CHK-NEXT: ret + %p1 = getelementptr inbounds i32* %a, i64 -63 + %tmp1 = load i32* %p1, align 2 + %p2 = getelementptr inbounds i32* %a, i64 -64 + %tmp2 = load i32* %p2, align 2 + %sexttmp1 = sext i32 %tmp1 to i64 + %sexttmp2 = sext i32 %tmp2 to i64 + %tmp3 = add i64 %sexttmp1, %sexttmp2 + ret i64 %tmp3 +} + define i64 @pairUpBarelyOut(i64* %a) nounwind ssp { ; LDUR_CHK: pairUpBarelyOut ; LDUR_CHK-NOT: ldp @@ -125,6 +168,23 @@ define i64 @pairUpBarelyOut(i64* %a) nounwind ssp { ret i64 %tmp3 } +define i64 @pairUpBarelyOutSext(i32* %a) nounwind ssp { +; LDUR_CHK: pairUpBarelyOutSext +; LDUR_CHK-NOT: ldp +; Don't be fragile about which loads or manipulations of the base register +; are used---just check that there isn't an ldp before the add +; LDUR_CHK: add +; LDUR_CHK-NEXT: ret + %p1 = getelementptr inbounds i32* %a, i64 -64 + %tmp1 = load i32* %p1, align 2 + %p2 = getelementptr inbounds i32* %a, i64 -65 + %tmp2 = load i32* %p2, align 2 + %sexttmp1 = sext i32 %tmp1 to i64 + %sexttmp2 = sext i32 %tmp2 to i64 + %tmp3 = add i64 %sexttmp1, %sexttmp2 + ret i64 %tmp3 +} + define i64 @pairUpNotAligned(i64* %a) nounwind ssp { ; LDUR_CHK: pairUpNotAligned ; LDUR_CHK-NOT: ldp @@ -147,3 +207,28 @@ define i64 @pairUpNotAligned(i64* %a) nounwind ssp { %tmp3 = add i64 %tmp1, %tmp2 ret i64 %tmp3 } + +define i64 @pairUpNotAlignedSext(i32* %a) nounwind ssp { +; LDUR_CHK: pairUpNotAlignedSext +; LDUR_CHK-NOT: ldp +; LDUR_CHK: ldursw +; LDUR_CHK-NEXT: ldursw +; LDUR_CHK-NEXT: add +; LDUR_CHK-NEXT: ret + %p1 = getelementptr inbounds i32* %a, i64 -18 + %bp1 = bitcast i32* %p1 to i8* + %bp1p1 = getelementptr inbounds i8* %bp1, i64 1 + %dp1 = bitcast i8* %bp1p1 to i32* + %tmp1 = load i32* %dp1, align 1 + + %p2 = getelementptr inbounds i32* %a, i64 -17 + %bp2 = bitcast i32* %p2 to i8* + %bp2p1 = getelementptr inbounds i8* %bp2, i64 1 + %dp2 = bitcast i8* %bp2p1 to i32* + %tmp2 = load i32* %dp2, align 1 + + %sexttmp1 = sext i32 %tmp1 to i64 + %sexttmp2 = sext i32 %tmp2 to i64 + %tmp3 = add i64 %sexttmp1, %sexttmp2 + ret i64 %tmp3 +} -- 2.7.4