From 8211cfb7c8bd1bedd5b3ed936d5b8f784c6bfd21 Mon Sep 17 00:00:00 2001 From: John Brawn Date: Wed, 22 Apr 2020 12:07:15 +0100 Subject: [PATCH] [ARM] Don't shrink STM if it would cause an unknown base register store If a 16-bit thumb STM with writeback stores the base register but it isn't the first register in the list, then an unknown value is stored. The load/store optimizer knows this and generates a 32-bit STM without writeback instead, but thumb2 size reduction converts it into a 16-bit STM. Fix this by having thumb2 size reduction notice such STMs and leave them as they are. Differential Revision: https://reviews.llvm.org/D78493 --- llvm/lib/Target/ARM/Thumb2SizeReduction.cpp | 16 +++++- llvm/test/CodeGen/Thumb/stm-deprecated.ll | 88 ++++++++++++++++++++++++----- 2 files changed, 88 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp b/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp index d3e22b7..ae66159 100644 --- a/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -516,13 +516,23 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, isLdStMul = true; break; } - case ARM::t2STMIA: - // If the base register is killed, we don't care what its value is after the - // instruction, so we can use an updating STMIA. + case ARM::t2STMIA: { + // t2STMIA is reduced to tSTMIA_UPD which has writeback. We can only do this + // if the base register is killed, as then it doesn't matter what its value + // is after the instruction. if (!MI->getOperand(0).isKill()) return false; + // If the base register is in the register list and isn't the lowest + // numbered register (i.e. it's in operand 4 onwards) then with writeback + // the stored value is unknown, so we can't convert to tSTMIA_UPD. + Register BaseReg = MI->getOperand(0).getReg(); + for (unsigned i = 4; i < MI->getNumOperands(); ++i) + if (MI->getOperand(i).getReg() == BaseReg) + return false; + break; + } case ARM::t2LDMIA_RET: { Register BaseReg = MI->getOperand(1).getReg(); if (BaseReg != ARM::SP) diff --git a/llvm/test/CodeGen/Thumb/stm-deprecated.ll b/llvm/test/CodeGen/Thumb/stm-deprecated.ll index ffe2c0a..a3fd756 100644 --- a/llvm/test/CodeGen/Thumb/stm-deprecated.ll +++ b/llvm/test/CodeGen/Thumb/stm-deprecated.ll @@ -1,19 +1,81 @@ -; RUN: llc -mtriple=thumbv6m-eabi -verify-machineinstrs %s -o - | FileCheck %s -; RUN: llc -mtriple=thumbv5e-linux-gnueabi -verify-machineinstrs %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv6m-eabi -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-T1 +; RUN: llc -mtriple=thumbv5e-linux-gnueabi -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-T1 +; RUN: llc -mtriple=thumbv7m -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-T2 +; RUN: llc -mtriple=thumbv7a -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-T2 -%0 = type { %0*, %0*, i32 } +%struct1 = type { %struct1*, %struct1*, i32 } +%struct2 = type { i32, i32, i32* } -@x1 = external global %0, align 4 -@x2 = external global %0, align 4 +@x1 = external global %struct1, align 4 +@x2 = external global %struct1, align 4 -; CHECK: str r0, [r1] -; CHECK-NEXT: str r1, [r1, #4] +declare void @fn1(i32, i32) +declare void @fn2(%struct2*) + +; CHECK-LABEL: test1: +; CHECK-T1: str r0, [r1] +; CHECK-T1-NEXT: str r1, [r1, #4] +; CHECK-T2: strd r0, r1, [r1] ; CHECK-NOT: stm +define void @test1(i32 %unused, %struct1* %x) { + %first = getelementptr inbounds %struct1, %struct1* %x, i32 0, i32 0 + %second = getelementptr inbounds %struct1, %struct1* %x, i32 0, i32 1 + store %struct1* @x1, %struct1** %first + store %struct1* %x, %struct1** %second + ret void +} + +; CHECK-LABEL: test2: +; CHECK-T1: str r0, [r2] +; CHECK-T1-NEXT: str r1, [r2, #4] +; CHECK-T1-NEXT: str r2, [r2, #8] +; CHECK-T2: stm.w r2, {r0, r1, r2} +; CHECK-NOT: stm r2!, {r0, r1, r2} +define i32 @test2(i32 %a, i32 %b, %struct2* %p) { +entry: + %p1 = getelementptr inbounds %struct2, %struct2* %p, i32 0, i32 0 + %p2 = getelementptr inbounds %struct2, %struct2* %p, i32 0, i32 1 + %p3 = getelementptr inbounds %struct2, %struct2* %p, i32 0, i32 2 + store i32 %a, i32* %p1, align 4 + store i32 %b, i32* %p2, align 4 + store i32* %p1, i32** %p3, align 4 + call void @fn1(i32 %a, i32 %b) + ret i32 0 +} + +; CHECK-LABEL: test3: +; CHECK-T1: str r0, [r2] +; CHECK-T1-NEXT: str r1, [r2, #4] +; CHECK-T1-NEXT: str r2, [r2, #8] +; CHECK-T2: stm.w r2, {r0, r1, r2} +; CHECK-NOT: stm r2!, {r0, r1, r2} +define i32 @test3(i32 %a, i32 %b, %struct2* %p) { +entry: + %p1 = getelementptr inbounds %struct2, %struct2* %p, i32 0, i32 0 + %p2 = getelementptr inbounds %struct2, %struct2* %p, i32 0, i32 1 + %p3 = getelementptr inbounds %struct2, %struct2* %p, i32 0, i32 2 + store i32 %a, i32* %p1, align 4 + store i32 %b, i32* %p2, align 4 + store i32* %p1, i32** %p3, align 4 + %p4 = getelementptr inbounds %struct2, %struct2* %p, i32 1 + call void @fn2(%struct2* %p4) + ret i32 0 +} -define void @foo(i32 %unused, %0* %x) { - %first = getelementptr inbounds %0, %0* %x, i32 0, i32 0 - %second = getelementptr inbounds %0, %0* %x, i32 0, i32 1 - store %0* @x1, %0** %first - store %0* %x, %0** %second - unreachable +; FIXME: We should be using stm in both thumb1 and thumb2 +; CHECK-LABEL: test4: +; CHECK-T1: str r0, [r0] +; CHECK-T1-NEXT: str r1, [r0, #4] +; CHECK-T1-NEXT: str r2, [r0, #8] +; CHECK-T2: stm r0!, {r0, r1, r2} +define i32 @test4(%struct1* %p, %struct1* %q, i32 %a) { +entry: + %p1 = getelementptr inbounds %struct1, %struct1* %p, i32 0, i32 0 + %p2 = getelementptr inbounds %struct1, %struct1* %p, i32 0, i32 1 + %p3 = getelementptr inbounds %struct1, %struct1* %p, i32 0, i32 2 + store %struct1* %p, %struct1** %p1, align 4 + store %struct1* %q, %struct1** %p2, align 4 + store i32 %a, i32* %p3, align 4 + call void @fn1(i32 %a, i32 %a) + ret i32 0 } -- 2.7.4