From 0c93879d9621057c02712f700e79d4afa4a9bf8f Mon Sep 17 00:00:00 2001 From: Zain Jaffal Date: Fri, 19 May 2023 15:24:30 +0100 Subject: [PATCH] [AArch64] merge scaled and unscaled zero narrow stores. This patch fixes a crash when a sclaed and unscaled zero stores are merged. Differential Revision: https://reviews.llvm.org/D150963 --- .../Target/AArch64/AArch64LoadStoreOptimizer.cpp | 34 ++-- .../test/CodeGen/AArch64/str-narrow-zero-merge.mir | 172 +++++++++++++++++++++ 2 files changed, 195 insertions(+), 11 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/str-narrow-zero-merge.mir diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index 4c13c09..41af552 100644 --- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -734,8 +734,11 @@ AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I, NextI = next_nodbg(NextI, E); unsigned Opc = I->getOpcode(); + unsigned MergeMIOpc = MergeMI->getOpcode(); bool IsScaled = !TII->hasUnscaledLdStOffset(Opc); - int OffsetStride = IsScaled ? 1 : TII->getMemScale(*I); + bool IsMergedMIScaled = !TII->hasUnscaledLdStOffset(MergeMIOpc); + int OffsetStride = IsScaled ? TII->getMemScale(*I) : 1; + int MergeMIOffsetStride = IsMergedMIScaled ? TII->getMemScale(*MergeMI) : 1; bool MergeForward = Flags.getMergeForward(); // Insert our new paired instruction after whichever of the paired @@ -748,18 +751,27 @@ AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I, : AArch64InstrInfo::getLdStBaseOp(*I); // Which register is Rt and which is Rt2 depends on the offset order. - MachineInstr *RtMI; - if (AArch64InstrInfo::getLdStOffsetOp(*I).getImm() == - AArch64InstrInfo::getLdStOffsetOp(*MergeMI).getImm() + OffsetStride) - RtMI = &*MergeMI; + int64_t IOffsetInBytes = + AArch64InstrInfo::getLdStOffsetOp(*I).getImm() * OffsetStride; + int64_t MIOffsetInBytes = + AArch64InstrInfo::getLdStOffsetOp(*MergeMI).getImm() * + MergeMIOffsetStride; + // Select final offset based on the offset order. + int64_t OffsetImm; + if (IOffsetInBytes > MIOffsetInBytes) + OffsetImm = MIOffsetInBytes; else - RtMI = &*I; + OffsetImm = IOffsetInBytes; - int OffsetImm = AArch64InstrInfo::getLdStOffsetOp(*RtMI).getImm(); - // Change the scaled offset from small to large type. - if (IsScaled) { - assert(((OffsetImm & 1) == 0) && "Unexpected offset to merge"); - OffsetImm /= 2; + int NewOpcode = getMatchingWideOpcode(Opc); + bool FinalIsScaled = !TII->hasUnscaledLdStOffset(NewOpcode); + + // Adjust final offset if the result opcode is a scaled store. + if (FinalIsScaled) { + int NewOffsetStride = FinalIsScaled ? TII->getMemScale(NewOpcode) : 1; + assert(((OffsetImm % NewOffsetStride) == 0) && + "Offset should be a multiple of the store memory scale"); + OffsetImm = OffsetImm / NewOffsetStride; } // Construct the new instruction. diff --git a/llvm/test/CodeGen/AArch64/str-narrow-zero-merge.mir b/llvm/test/CodeGen/AArch64/str-narrow-zero-merge.mir new file mode 100644 index 0000000..e995c40 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/str-narrow-zero-merge.mir @@ -0,0 +1,172 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2 +# RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -run-pass=aarch64-ldst-opt %s -o - | FileCheck %s + + +--- +name: merge_unscaled_str_with_unscaled_str_8 +body: | + bb.0: + ; CHECK-LABEL: name: merge_unscaled_str_with_unscaled_str_8 + ; CHECK: STURHHi $wzr, $x0, 4 :: (store (s8)) + ; CHECK-NEXT: RET undef $lr + STURBBi $wzr, $x0, 4 :: (store (s8)) + STURBBi $wzr, $x0, 5 :: (store (s8)) + RET undef $lr +... +--- +name: merge_scaled_str_with_scaled_str_8 +body: | + bb.0.entry: + ; CHECK-LABEL: name: merge_scaled_str_with_scaled_str_8 + ; CHECK: STRHHui $wzr, $x0, 2 :: (store (s8)) + ; CHECK-NEXT: RET undef $lr + STRBBui $wzr, $x0, 4 :: (store (s8)) + STRBBui $wzr, $x0, 5 :: (store (s8)) + RET undef $lr +... +--- +name: merge_scaled_str_with_unscaled_8 +body: | + bb.0.entry: + ; CHECK-LABEL: name: merge_scaled_str_with_unscaled_8 + ; CHECK: STRBBui $wzr, $x0, 4 :: (store (s8)) + ; CHECK-NEXT: STURBBi $wzr, $x0, 5 :: (store (s8)) + ; CHECK-NEXT: RET undef $lr + STRBBui $wzr, $x0, 4 :: (store (s8)) + STURBBi $wzr, $x0, 5 :: (store (s8)) + RET undef $lr +... +--- +name: merge_unscaled_str_with_scaled_8 +body: | + bb.0.entry: + ; CHECK-LABEL: name: merge_unscaled_str_with_scaled_8 + ; CHECK: STURBBi $wzr, $x0, 4 :: (store (s8)) + ; CHECK-NEXT: STRBBui $wzr, $x0, 5 :: (store (s8)) + ; CHECK-NEXT: RET undef $lr + STURBBi $wzr, $x0, 4 :: (store (s8)) + STRBBui $wzr, $x0, 5 :: (store (s8)) + RET undef $lr +... +--- +name: merge_unscaled_str_with_unscaled_str_16 +body: | + bb.0: + ; CHECK-LABEL: name: merge_unscaled_str_with_unscaled_str_16 + ; CHECK: STURWi $wzr, $x0, 4 :: (store (s16)) + ; CHECK-NEXT: RET undef $lr + STURHHi $wzr, $x0, 4 :: (store (s16)) + STURHHi $wzr, $x0, 6 :: (store (s16)) + RET undef $lr +... +--- +name: merge_scaled_str_with_scaled_str_16 +body: | + bb.0.entry: + ; CHECK-LABEL: name: merge_scaled_str_with_scaled_str_16 + ; CHECK: STRWui $wzr, $x0, 2 :: (store (s16)) + ; CHECK-NEXT: RET undef $lr + STRHHui $wzr, $x0, 4 :: (store (s16)) + STRHHui $wzr, $x0, 5 :: (store (s16)) + RET undef $lr +... +--- +name: merge_scaled_str_with_unscaled_16 +body: | + bb.0.entry: + ; CHECK-LABEL: name: merge_scaled_str_with_unscaled_16 + ; CHECK: STRHHui $wzr, $x0, 2 :: (store (s16)) + ; CHECK-NEXT: STURHHi $wzr, $x0, 6 :: (store (s16)) + ; CHECK-NEXT: RET undef $lr + STRHHui $wzr, $x0, 2 :: (store (s16)) + STURHHi $wzr, $x0, 6 :: (store (s16)) + RET undef $lr +... +--- +name: merge_unscaled_str_with_scaled_16 +body: | + bb.0.entry: + ; CHECK-LABEL: name: merge_unscaled_str_with_scaled_16 + ; CHECK: STURHHi $wzr, $x0, 4 :: (store (s16)) + ; CHECK-NEXT: STRHHui $wzr, $x0, 3 :: (store (s16)) + ; CHECK-NEXT: RET undef $lr + STURHHi $wzr, $x0, 4 :: (store (s16)) + STRHHui $wzr, $x0, 3 :: (store (s16)) + RET undef $lr +... +--- +name: merge_unscaled_str_with_unscaled_32 +body: | + bb.0.entry: + ; CHECK-LABEL: name: merge_unscaled_str_with_unscaled_32 + ; CHECK: STURXi $xzr, $x0, 4 :: (store (s32)) + ; CHECK-NEXT: RET undef $lr + STURWi $wzr, $x0, 4 :: (store (s32)) + STURWi $wzr, $x0, 8 :: (store (s32)) + RET undef $lr +... +--- +name: merge_scaled_str_with_scaled_32 +body: | + bb.0.entry: + ; CHECK-LABEL: name: merge_scaled_str_with_scaled_32 + ; CHECK: STRXui $xzr, $x0, 1 :: (store (s32)) + ; CHECK-NEXT: RET undef $lr + STRWui $wzr, $x0, 2 :: (store (s32)) + STRWui $wzr, $x0, 3 :: (store (s32)) + RET undef $lr +... +--- +name: merge_scaled_str_with_unscaled_32 +body: | + bb.0.entry: + ; CHECK-LABEL: name: merge_scaled_str_with_unscaled_32 + ; CHECK: STRXui $xzr, $x0, 1 :: (store (s32)) + ; CHECK-NEXT: RET undef $lr + STRWui $wzr, $x0, 2 :: (store (s32)) + STURWi $wzr, $x0, 12 :: (store (s32)) + RET undef $lr +... +--- +name: merge_unscaled_str_with_scaled_32 +body: | + bb.0.entry: + ; CHECK-LABEL: name: merge_unscaled_str_with_scaled_32 + ; CHECK: STURXi $xzr, $x0, 8 :: (store (s32)) + ; CHECK-NEXT: RET undef $lr + STURWi $wzr, $x0, 8 :: (store (s32)) + STRWui $wzr, $x0, 3 :: (store (s32)) + RET undef $lr +... +--- +name: merge_scaled_str_with_unscaled_32_lower_address_second +body: | + bb.0.entry: + ; CHECK-LABEL: name: merge_scaled_str_with_unscaled_32_lower_address_second + ; CHECK: STRXui $xzr, $x0, 1 :: (store (s32)) + ; CHECK-NEXT: RET undef $lr + STRWui $wzr, $x0, 3 :: (store (s32)) + STURWi $wzr, $x0, 8 :: (store (s32)) + RET undef $lr +... +--- +name: merge_unscaled_str_with_scaled_32_lower_address_second +body: | + bb.0.entry: + ; CHECK-LABEL: name: merge_unscaled_str_with_scaled_32_lower_address_second + ; CHECK: STURXi $xzr, $x0, 4 :: (store (s32)) + ; CHECK-NEXT: RET undef $lr + STURWi $wzr, $x0, 8 :: (store (s32)) + STRWui $wzr, $x0, 1 :: (store (s32)) + RET undef $lr +... +--- +name: merge_scaled_str_with_unscaled_32_negative_address +body: | + bb.0.entry: + ; CHECK-LABEL: name: merge_scaled_str_with_unscaled_32_negative_address + ; CHECK: STPWi $wzr, $wzr, $x0, -1 :: (store (s32)) + ; CHECK-NEXT: RET undef $lr + STRWui $wzr, $x0, 0 :: (store (s32)) + STURWi $wzr, $x0, -4 :: (store (s32)) + RET undef $lr -- 2.7.4