From 7c078fc035098853a7622f902d9ff80b1281d213 Mon Sep 17 00:00:00 2001 From: Geoff Berry Date: Tue, 29 Nov 2016 18:28:32 +0000 Subject: [PATCH] [AArch64] Fold spills of COPY of WZR/XZR Summary: In AArch64InstrInfo::foldMemoryOperandImpl, catch more cases where the COPY being spilled is copying from WZR/XZR, but the source register is not in the COPY destination register's regclass. For example, when spilling: %vreg0 = COPY %XZR ; %vreg0:GPR64common without this change, the code in TargetInstrInfo::foldMemoryOperand() and canFoldCopy() that normally handles cases like this would fail to optimize since %XZR is not in GPR64common. So the spill code generated would be: %vreg0 = COPY %XZR STR %vreg instead of the new code generated: STR %XZR Reviewers: qcolombet, MatzeB Subscribers: mcrosier, aemerson, t.p.northover, llvm-commits, rengolin Differential Revision: https://reviews.llvm.org/D26976 llvm-svn: 288176 --- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 25 +++++++++++++ llvm/test/CodeGen/AArch64/zero-reg.ll | 54 ++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 465137f..7a3f39d 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -2598,6 +2598,31 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( } } + // Handle the case where a WZR/XZR copy is being spilled but the destination + // register class doesn't contain WZR/XZR. For example: + // + // %vreg0 = COPY %XZR; GPR64common:%vreg0 + // + // In this case we can still safely fold away the COPY and generate the + // following spill code: + // + // STRXui %XZR, + // + if (MI.isFullCopy() && Ops.size() == 1 && Ops[0] == 0) { + MachineBasicBlock &MBB = *MI.getParent(); + const MachineOperand &SrcMO = MI.getOperand(1); + unsigned SrcReg = SrcMO.getReg(); + if (SrcReg == AArch64::WZR || SrcReg == AArch64::XZR) { + const TargetRegisterInfo &TRI = getRegisterInfo(); + const TargetRegisterClass &RC = SrcReg == AArch64::WZR + ? AArch64::GPR32RegClass + : AArch64::GPR64RegClass; + storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex, + &RC, &TRI); + return &*--InsertPt; + } + } + // Cannot fold. return nullptr; } diff --git a/llvm/test/CodeGen/AArch64/zero-reg.ll b/llvm/test/CodeGen/AArch64/zero-reg.ll index 62b2ea3..9b8afad 100644 --- a/llvm/test/CodeGen/AArch64/zero-reg.ll +++ b/llvm/test/CodeGen/AArch64/zero-reg.ll @@ -28,3 +28,57 @@ define void @test_sp(i32 %val) { ret void ; CHECK: ret } + +declare i32 @bar() +declare i32 @baz() + +; Check that the spill of the zero value gets stored directly instead +; of being copied from wzr and then stored. +define i32 @test_zr_spill_copyprop1(i1 %c) { +; CHECK-LABEL: test_zr_spill_copyprop1: +entry: + br i1 %c, label %if.else, label %if.then + +if.else: +; CHECK: bl bar +; CHECK-NEXT: str w0, [sp, #[[SLOT:[0-9]+]]] + %call1 = tail call i32 @bar() + br label %if.end + +if.then: +; CHECK: bl baz +; CHECK-NEXT: str wzr, [sp, #[[SLOT]]] + %call2 = tail call i32 @baz() + br label %if.end + +if.end: + %x.0 = phi i32 [ 0, %if.then ], [ %call1, %if.else ] + call void asm sideeffect "", "~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{fp},~{lr},~{sp}"() nounwind + ret i32 %x.0 +} + +; Similar to test_zr_spill_copyprop1, but with mis-matched register +; class between %x.0 and the 0 from %if.then. +define i32 @test_zr_spill_copyprop2(i1 %c) { +; CHECK-LABEL: test_zr_spill_copyprop2: +entry: + br i1 %c, label %if.else, label %if.then + +if.else: +; CHECK: bl bar +; CHECK-NEXT: str w0, [sp, #[[SLOT:[0-9]+]]] + %call1 = tail call i32 @bar() + br label %if.end + +if.then: +; CHECK: bl baz +; CHECK-NEXT: str wzr, [sp, #[[SLOT]]] + %call2 = tail call i32 @baz() + br label %if.end + +if.end: + %x.0 = phi i32 [ 0, %if.then ], [ %call1, %if.else ] + call void asm sideeffect "", "~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{fp},~{lr},~{sp}"() nounwind + %x.1 = add i32 %x.0, 1 + ret i32 %x.1 +} -- 2.7.4