From 65cd2c7a8015577fea15c861f41d2e4b5768961f Mon Sep 17 00:00:00 2001 From: Jean-Michel Gorius Date: Fri, 22 May 2020 21:26:46 +0200 Subject: [PATCH] Revert "[CodeGen] Add support for multiple memory operands in MachineInstr::mayAlias" This temporarily reverts commit 7019cea26dfef5882c96f278c32d0f9c49a5e516. It seems that, for some targets, there are instructions with a lot of memory operands (probably more than would be expected). This causes a lot of buildbots to timeout and notify failed builds. While investigations are ongoing to find out why this happens, revert the changes. --- llvm/lib/CodeGen/MachineInstr.cpp | 137 ++++++++++---------- llvm/lib/CodeGen/ScheduleDAGInstrs.cpp | 5 - .../test/CodeGen/AArch64/merge-store-dependency.ll | 4 +- .../CodeGen/ARM/big-endian-neon-fp16-bitconv.ll | 4 +- .../CodeGen/ARM/cortex-a57-misched-vldm-wrback.ll | 2 +- .../CodeGen/ARM/cortex-a57-misched-vstm-wrback.ll | 2 +- llvm/test/CodeGen/ARM/cortex-a57-misched-vstm.ll | 2 +- llvm/test/CodeGen/Thumb2/mve-float32regloops.ll | 8 +- llvm/test/CodeGen/Thumb2/mve-phireg.ll | 10 +- llvm/test/CodeGen/Thumb2/mve-vst3.ll | 2 +- .../Thumb2/umulo-128-legalisation-lowering.ll | 8 +- .../CodeGen/X86/instr-sched-multiple-memops.mir | 144 --------------------- llvm/test/CodeGen/X86/store_op_load_fold2.ll | 7 +- 13 files changed, 90 insertions(+), 245 deletions(-) delete mode 100644 llvm/test/CodeGen/X86/instr-sched-multiple-memops.mir diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp index 67c2438..7afa61f 100644 --- a/llvm/lib/CodeGen/MachineInstr.cpp +++ b/llvm/lib/CodeGen/MachineInstr.cpp @@ -1228,88 +1228,81 @@ bool MachineInstr::mayAlias(AAResults *AA, const MachineInstr &Other, if (TII->areMemAccessesTriviallyDisjoint(*this, Other)) return false; - if (memoperands_empty() || Other.memoperands_empty()) + // FIXME: Need to handle multiple memory operands to support all targets. + if (!hasOneMemOperand() || !Other.hasOneMemOperand()) return true; - auto HasAlias = [&](const MachineMemOperand &MMOa, - const MachineMemOperand &MMOb) { - // The following interface to AA is fashioned after DAGCombiner::isAlias - // and operates with MachineMemOperand offset with some important - // assumptions: - // - LLVM fundamentally assumes flat address spaces. - // - MachineOperand offset can *only* result from legalization and - // cannot affect queries other than the trivial case of overlap - // checking. - // - These offsets never wrap and never step outside - // of allocated objects. - // - There should never be any negative offsets here. - // - // FIXME: Modify API to hide this math from "user" - // Even before we go to AA we can reason locally about some - // memory objects. It can save compile time, and possibly catch some - // corner cases not currently covered. - - int64_t OffsetA = MMOa.getOffset(); - int64_t OffsetB = MMOb.getOffset(); - int64_t MinOffset = std::min(OffsetA, OffsetB); - - uint64_t WidthA = MMOa.getSize(); - uint64_t WidthB = MMOb.getSize(); - bool KnownWidthA = WidthA != MemoryLocation::UnknownSize; - bool KnownWidthB = WidthB != MemoryLocation::UnknownSize; - - const Value *ValA = MMOa.getValue(); - const Value *ValB = MMOb.getValue(); - bool SameVal = (ValA && ValB && (ValA == ValB)); - if (!SameVal) { - const PseudoSourceValue *PSVa = MMOa.getPseudoValue(); - const PseudoSourceValue *PSVb = MMOb.getPseudoValue(); - if (PSVa && ValB && !PSVa->mayAlias(&MFI)) - return false; - if (PSVb && ValA && !PSVb->mayAlias(&MFI)) - return false; - if (PSVa && PSVb && (PSVa == PSVb)) - SameVal = true; - } - - if (SameVal) { - if (!KnownWidthA || !KnownWidthB) - return true; - int64_t MaxOffset = std::max(OffsetA, OffsetB); - int64_t LowWidth = (MinOffset == OffsetA) ? WidthA : WidthB; - return (MinOffset + LowWidth > MaxOffset); - } + MachineMemOperand *MMOa = *memoperands_begin(); + MachineMemOperand *MMOb = *Other.memoperands_begin(); + + // The following interface to AA is fashioned after DAGCombiner::isAlias + // and operates with MachineMemOperand offset with some important + // assumptions: + // - LLVM fundamentally assumes flat address spaces. + // - MachineOperand offset can *only* result from legalization and + // cannot affect queries other than the trivial case of overlap + // checking. + // - These offsets never wrap and never step outside + // of allocated objects. + // - There should never be any negative offsets here. + // + // FIXME: Modify API to hide this math from "user" + // Even before we go to AA we can reason locally about some + // memory objects. It can save compile time, and possibly catch some + // corner cases not currently covered. + + int64_t OffsetA = MMOa->getOffset(); + int64_t OffsetB = MMOb->getOffset(); + int64_t MinOffset = std::min(OffsetA, OffsetB); + + uint64_t WidthA = MMOa->getSize(); + uint64_t WidthB = MMOb->getSize(); + bool KnownWidthA = WidthA != MemoryLocation::UnknownSize; + bool KnownWidthB = WidthB != MemoryLocation::UnknownSize; + + const Value *ValA = MMOa->getValue(); + const Value *ValB = MMOb->getValue(); + bool SameVal = (ValA && ValB && (ValA == ValB)); + if (!SameVal) { + const PseudoSourceValue *PSVa = MMOa->getPseudoValue(); + const PseudoSourceValue *PSVb = MMOb->getPseudoValue(); + if (PSVa && ValB && !PSVa->mayAlias(&MFI)) + return false; + if (PSVb && ValA && !PSVb->mayAlias(&MFI)) + return false; + if (PSVa && PSVb && (PSVa == PSVb)) + SameVal = true; + } - if (!AA) + if (SameVal) { + if (!KnownWidthA || !KnownWidthB) return true; + int64_t MaxOffset = std::max(OffsetA, OffsetB); + int64_t LowWidth = (MinOffset == OffsetA) ? WidthA : WidthB; + return (MinOffset + LowWidth > MaxOffset); + } - if (!ValA || !ValB) - return true; + if (!AA) + return true; - assert((OffsetA >= 0) && "Negative MachineMemOperand offset"); - assert((OffsetB >= 0) && "Negative MachineMemOperand offset"); + if (!ValA || !ValB) + return true; - int64_t OverlapA = KnownWidthA ? WidthA + OffsetA - MinOffset - : MemoryLocation::UnknownSize; - int64_t OverlapB = KnownWidthB ? WidthB + OffsetB - MinOffset - : MemoryLocation::UnknownSize; + assert((OffsetA >= 0) && "Negative MachineMemOperand offset"); + assert((OffsetB >= 0) && "Negative MachineMemOperand offset"); - AliasResult AAResult = - AA->alias(MemoryLocation(ValA, OverlapA, - UseTBAA ? MMOa.getAAInfo() : AAMDNodes()), - MemoryLocation(ValB, OverlapB, - UseTBAA ? MMOb.getAAInfo() : AAMDNodes())); + int64_t OverlapA = KnownWidthA ? WidthA + OffsetA - MinOffset + : MemoryLocation::UnknownSize; + int64_t OverlapB = KnownWidthB ? WidthB + OffsetB - MinOffset + : MemoryLocation::UnknownSize; - return (AAResult != NoAlias); - }; + AliasResult AAResult = AA->alias( + MemoryLocation(ValA, OverlapA, + UseTBAA ? MMOa->getAAInfo() : AAMDNodes()), + MemoryLocation(ValB, OverlapB, + UseTBAA ? MMOb->getAAInfo() : AAMDNodes())); - for (auto &&MMOa : memoperands()) { - for (auto &&MMOb : Other.memoperands()) { - if (HasAlias(*MMOa, *MMOb)) - return true; - } - } - return false; + return (AAResult != NoAlias); } /// hasOrderedMemoryRef - Return true if this instruction may have an ordered diff --git a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp index fb3c9a7..edc5c4b 100644 --- a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -544,14 +544,9 @@ static inline bool isGlobalMemoryObject(AAResults *AA, MachineInstr *MI) { void ScheduleDAGInstrs::addChainDependency (SUnit *SUa, SUnit *SUb, unsigned Latency) { if (SUa->getInstr()->mayAlias(AAForDep, *SUb->getInstr(), UseTBAA)) { - LLVM_DEBUG(dbgs() << "Adding chain dependency\n from: " << *SUb->getInstr() - << " to: " << *SUa->getInstr()); SDep Dep(SUa, SDep::MayAliasMem); Dep.setLatency(Latency); SUb->addPred(Dep); - } else { - LLVM_DEBUG(dbgs() << "Not adding chain dependency\n from: " - << *SUb->getInstr() << " to: " << *SUa->getInstr()); } } diff --git a/llvm/test/CodeGen/AArch64/merge-store-dependency.ll b/llvm/test/CodeGen/AArch64/merge-store-dependency.ll index d419629..5613db1 100644 --- a/llvm/test/CodeGen/AArch64/merge-store-dependency.ll +++ b/llvm/test/CodeGen/AArch64/merge-store-dependency.ll @@ -19,11 +19,11 @@ define void @test(%struct1* %fde, i32 %fd, void (i32, i32, i8*)* %func, i8* %arg ; A53-NEXT: mov x19, x8 ; A53-NEXT: mov w0, w1 ; A53-NEXT: mov w9, #256 -; A53-NEXT: stp x2, x3, [x8, #32] -; A53-NEXT: mov x2, x8 ; A53-NEXT: str q0, [x19, #16]! ; A53-NEXT: str w1, [x19] ; A53-NEXT: mov w1, #4 +; A53-NEXT: stp x2, x3, [x8, #32] +; A53-NEXT: mov x2, x8 ; A53-NEXT: str q0, [x8] ; A53-NEXT: strh w9, [x8, #24] ; A53-NEXT: str wzr, [x8, #20] diff --git a/llvm/test/CodeGen/ARM/big-endian-neon-fp16-bitconv.ll b/llvm/test/CodeGen/ARM/big-endian-neon-fp16-bitconv.ll index 693f335..9942d6d 100644 --- a/llvm/test/CodeGen/ARM/big-endian-neon-fp16-bitconv.ll +++ b/llvm/test/CodeGen/ARM/big-endian-neon-fp16-bitconv.ll @@ -503,12 +503,12 @@ define void @conv_v8f16_to_i128( <8 x half> %a, i128* %store ) { ; CHECK-NEXT: vmov.32 r3, d16[1] ; CHECK-NEXT: vmov.32 r1, d16[0] ; CHECK-NEXT: subs r12, r12, #1 -; CHECK-NEXT: str r12, [r0, #12] ; CHECK-NEXT: sbcs r2, r2, #0 -; CHECK-NEXT: str r2, [r0, #8] ; CHECK-NEXT: sbcs r3, r3, #0 ; CHECK-NEXT: sbc r1, r1, #0 ; CHECK-NEXT: stm r0, {r1, r3} +; CHECK-NEXT: str r2, [r0, #8] +; CHECK-NEXT: str r12, [r0, #12] ; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: @ %bb.1: diff --git a/llvm/test/CodeGen/ARM/cortex-a57-misched-vldm-wrback.ll b/llvm/test/CodeGen/ARM/cortex-a57-misched-vldm-wrback.ll index 1a2ad4d..88b772c 100644 --- a/llvm/test/CodeGen/ARM/cortex-a57-misched-vldm-wrback.ll +++ b/llvm/test/CodeGen/ARM/cortex-a57-misched-vldm-wrback.ll @@ -9,7 +9,7 @@ ; CHECK: ********** MI Scheduling ********** ; We need second, post-ra scheduling to have VLDM instruction combined from single-loads ; CHECK: ********** MI Scheduling ********** -; CHECK: SU(1):{{.*}}VLDMDIA_UPD +; CHECK: VLDMDIA_UPD ; CHECK: rdefs left ; CHECK-NEXT: Latency : 6 ; CHECK: Successors: diff --git a/llvm/test/CodeGen/ARM/cortex-a57-misched-vstm-wrback.ll b/llvm/test/CodeGen/ARM/cortex-a57-misched-vstm-wrback.ll index 3007630..c517f46 100644 --- a/llvm/test/CodeGen/ARM/cortex-a57-misched-vstm-wrback.ll +++ b/llvm/test/CodeGen/ARM/cortex-a57-misched-vstm-wrback.ll @@ -5,7 +5,7 @@ ; We need second, post-ra scheduling to have VSTM instruction combined from single-stores ; CHECK: ********** MI Scheduling ********** ; CHECK: schedule starting -; CHECK: SU(2):{{.*}}VSTMDIA_UPD +; CHECK: VSTMDIA_UPD ; CHECK: rdefs left ; CHECK-NEXT: Latency : 4 ; CHECK: Successors: diff --git a/llvm/test/CodeGen/ARM/cortex-a57-misched-vstm.ll b/llvm/test/CodeGen/ARM/cortex-a57-misched-vstm.ll index f88bb47..5e9041c 100644 --- a/llvm/test/CodeGen/ARM/cortex-a57-misched-vstm.ll +++ b/llvm/test/CodeGen/ARM/cortex-a57-misched-vstm.ll @@ -5,7 +5,7 @@ ; We need second, post-ra scheduling to have VSTM instruction combined from single-stores ; CHECK: ********** MI Scheduling ********** ; CHECK: schedule starting -; CHECK: SU(3):{{.*}}VSTMDIA +; CHECK: VSTMDIA ; CHECK: rdefs left ; CHECK-NEXT: Latency : 2 diff --git a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll index 02bd955..111a587 100644 --- a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll @@ -1092,7 +1092,6 @@ define void @fir(%struct.arm_fir_instance_f32* nocapture readonly %S, float* noc ; CHECK-NEXT: ldrd lr, r10, [r12, #24] ; CHECK-NEXT: vstrb.8 q0, [r11], #16 ; CHECK-NEXT: vldrw.u32 q0, [r8], #32 -; CHECK-NEXT: strd r11, r1, [sp, #24] @ 8-byte Folded Spill ; CHECK-NEXT: vldrw.u32 q1, [r8, #-28] ; CHECK-NEXT: vmul.f32 q0, q0, r0 ; CHECK-NEXT: vldrw.u32 q6, [r8, #-24] @@ -1104,12 +1103,13 @@ define void @fir(%struct.arm_fir_instance_f32* nocapture readonly %S, float* noc ; CHECK-NEXT: vfma.f32 q0, q4, r6 ; CHECK-NEXT: vldrw.u32 q3, [r8, #-8] ; CHECK-NEXT: vfma.f32 q0, q5, r5 -; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: vfma.f32 q0, q2, r3 ; CHECK-NEXT: vldrw.u32 q1, [r8, #-4] +; CHECK-NEXT: vfma.f32 q0, q2, r3 +; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload ; CHECK-NEXT: vfma.f32 q0, q3, lr -; CHECK-NEXT: cmp r0, #16 +; CHECK-NEXT: strd r11, r1, [sp, #24] @ 8-byte Folded Spill ; CHECK-NEXT: vfma.f32 q0, q1, r10 +; CHECK-NEXT: cmp r0, #16 ; CHECK-NEXT: blo .LBB16_7 ; CHECK-NEXT: @ %bb.5: @ %for.body.preheader ; CHECK-NEXT: @ in Loop: Header=BB16_4 Depth=1 diff --git a/llvm/test/CodeGen/Thumb2/mve-phireg.ll b/llvm/test/CodeGen/Thumb2/mve-phireg.ll index 0fe26fb..e7d6a73 100644 --- a/llvm/test/CodeGen/Thumb2/mve-phireg.ll +++ b/llvm/test/CodeGen/Thumb2/mve-phireg.ll @@ -168,14 +168,16 @@ define dso_local i32 @e() #0 { ; CHECK-NEXT: vmov q1, q4 ; CHECK-NEXT: vmov s1, r7 ; CHECK-NEXT: vmov.32 q1[1], r6 -; CHECK-NEXT: vmov.32 q5[0], r7 +; CHECK-NEXT: mov.w r10, #0 ; CHECK-NEXT: vmov.32 q1[2], r5 -; CHECK-NEXT: vmov s9, r4 +; CHECK-NEXT: vmov.32 q5[0], r7 ; CHECK-NEXT: vmov.32 q1[3], r4 -; CHECK-NEXT: vdup.32 q6, r7 +; CHECK-NEXT: strd r0, r10, [sp, #24] ; CHECK-NEXT: vstrw.32 q1, [sp, #76] ; CHECK-NEXT: vmov q1, q5 +; CHECK-NEXT: vmov s9, r4 ; CHECK-NEXT: vmov.32 q1[1], r7 +; CHECK-NEXT: vdup.32 q6, r7 ; CHECK-NEXT: vmov.f32 s2, s1 ; CHECK-NEXT: vmov.f32 s8, s0 ; CHECK-NEXT: vmov.32 q1[2], r6 @@ -183,7 +185,6 @@ define dso_local i32 @e() #0 { ; CHECK-NEXT: vmov q7, q6 ; CHECK-NEXT: vmov.f32 s10, s1 ; CHECK-NEXT: mov.w r8, #4 -; CHECK-NEXT: mov.w r10, #0 ; CHECK-NEXT: vmov.32 q1[3], r4 ; CHECK-NEXT: vmov.32 q3[0], r4 ; CHECK-NEXT: vmov.32 q7[1], r4 @@ -191,7 +192,6 @@ define dso_local i32 @e() #0 { ; CHECK-NEXT: vmov.f32 s11, s3 ; CHECK-NEXT: movs r1, #64 ; CHECK-NEXT: strh.w r8, [sp, #390] -; CHECK-NEXT: strd r0, r10, [sp, #24] ; CHECK-NEXT: vstrw.32 q0, [sp, #44] ; CHECK-NEXT: str r0, [r0] ; CHECK-NEXT: vstrw.32 q2, [r0] diff --git a/llvm/test/CodeGen/Thumb2/mve-vst3.ll b/llvm/test/CodeGen/Thumb2/mve-vst3.ll index 1f35029..52de7a4 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vst3.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vst3.ll @@ -24,8 +24,8 @@ define void @vst3_v2i32(<2 x i32> *%src, <6 x i32> *%dst) { ; CHECK-NEXT: vmov.f32 s9, s6 ; CHECK-NEXT: vmov.f32 s10, s0 ; CHECK-NEXT: vmov.f32 s11, s5 -; CHECK-NEXT: vstrw.32 q2, [r1] ; CHECK-NEXT: strd r2, r0, [r1, #16] +; CHECK-NEXT: vstrw.32 q2, [r1] ; CHECK-NEXT: pop {r4, pc} entry: %s1 = getelementptr <2 x i32>, <2 x i32>* %src, i32 0 diff --git a/llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll index f57c922..ac1c814 100644 --- a/llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll +++ b/llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll @@ -8,17 +8,17 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 { ; THUMBV7-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; THUMBV7-NEXT: .pad #44 ; THUMBV7-NEXT: sub sp, #44 -; THUMBV7-NEXT: str r0, [sp, #40] @ 4-byte Spill -; THUMBV7-NEXT: movs r0, #0 ; THUMBV7-NEXT: ldrd r4, r7, [sp, #88] ; THUMBV7-NEXT: mov r5, r3 -; THUMBV7-NEXT: strd r0, r0, [sp, #8] +; THUMBV7-NEXT: str r0, [sp, #40] @ 4-byte Spill +; THUMBV7-NEXT: movs r0, #0 +; THUMBV7-NEXT: strd r4, r7, [sp] ; THUMBV7-NEXT: mov r1, r3 +; THUMBV7-NEXT: strd r0, r0, [sp, #8] ; THUMBV7-NEXT: mov r6, r2 ; THUMBV7-NEXT: mov r0, r2 ; THUMBV7-NEXT: movs r2, #0 ; THUMBV7-NEXT: movs r3, #0 -; THUMBV7-NEXT: strd r4, r7, [sp] ; THUMBV7-NEXT: bl __multi3 ; THUMBV7-NEXT: strd r1, r0, [sp, #32] @ 8-byte Folded Spill ; THUMBV7-NEXT: strd r3, r2, [sp, #24] @ 8-byte Folded Spill diff --git a/llvm/test/CodeGen/X86/instr-sched-multiple-memops.mir b/llvm/test/CodeGen/X86/instr-sched-multiple-memops.mir deleted file mode 100644 index 0259f42..0000000 --- a/llvm/test/CodeGen/X86/instr-sched-multiple-memops.mir +++ /dev/null @@ -1,144 +0,0 @@ -# RUN: llc -mtriple=i686-- -o - -run-pass=machine-scheduler -debug %s 2>&1 | FileCheck %s -# REQUIRES: asserts - ---- | - %struct.Macroblock.0.1.2.3.6.17 = type { i32, i32, i32, i32, i32, [8 x i32], %struct.Macroblock.0.1.2.3.6.17*, %struct.Macroblock.0.1.2.3.6.17*, i32, [2 x [4 x [4 x [2 x i32]]]], [16 x i8], [16 x i8], i32, i64, [4 x i32], [4 x i32], i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i16, double, i32, i32, i32, i32, i32, i32, i32, i32, i32 } - - define void @stepsystem(i32 %x) { - entry: - %0 = load i32, i32* undef, align 8 - %inc = add i32 %x, 1 - store i32 %inc, i32* undef, align 8 - store <2 x double> , <2 x double>* undef, align 8 - ret void - } - - define void @dct_chroma() { - cond_true2732.preheader: - %tmp2666 = getelementptr %struct.Macroblock.0.1.2.3.6.17, %struct.Macroblock.0.1.2.3.6.17* null, i32 0, i32 13 - %tmp2667.us.us = load i64, i64* %tmp2666, align 4 - %tmp2670.us.us = load i64, i64* null, align 4 - %tmp2675.us.us = shl i64 %tmp2670.us.us, 0 - %tmp2675not.us.us = xor i64 %tmp2675.us.us, -1 - %tmp2676.us.us = and i64 %tmp2667.us.us, %tmp2675not.us.us - store i64 %tmp2676.us.us, i64* %tmp2666, align 4 - ret void - } - -... ---- -name: stepsystem -alignment: 16 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -hasWinCFI: false -registers: - - { id: 0, class: gr32, preferred-register: '' } - - { id: 1, class: gr32, preferred-register: '' } - - { id: 2, class: gr32, preferred-register: '' } - - { id: 3, class: gr32, preferred-register: '' } - - { id: 4, class: gr32, preferred-register: '' } -liveins: [] -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 4 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - cvBytesOfCalleeSavedRegisters: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: - - { id: 0, type: default, offset: 0, size: 4, alignment: 4, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } -stack: [] -callSites: [] -constants: [] -machineFunctionInfo: {} -body: | - bb.0.entry: - %1:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load 4 from %fixed-stack.0) - %1:gr32 = INC32r %1, implicit-def dead $eflags - MOV32mr undef %2:gr32, 1, $noreg, 0, $noreg, %1 :: (store 4 into `i32* undef`, align 8) - MOV32mi undef %3:gr32, 1, $noreg, 0, $noreg, -729988434 :: (store 4 into `<2 x double>* undef` + 12) - MOV32mi undef %4:gr32, 1, $noreg, 0, $noreg, -1568170194 :: (store 4 into `<2 x double>* undef` + 8, align 8) - RET 0 - -# CHECK-LABEL: stepsystem -# CHECK: Not adding chain dependency{{[[:space:]]*}}from: MOV32mi {{.*}} :: (store 4 {{.*}}){{[[:space:]]*}}to: MOV32mi {{.*}} :: (store 4 {{.*}}) -# CHECK: Adding chain dependency{{[[:space:]]*}}from: MOV32mi {{.*}} :: (store 4 {{.*}}){{[[:space:]]*}}to: MOV32mr {{.*}} :: (store 4 {{.*}}) -... ---- -name: dct_chroma -alignment: 16 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -hasWinCFI: false -registers: - - { id: 0, class: gr32, preferred-register: '' } - - { id: 1, class: gr32, preferred-register: '' } - - { id: 2, class: gr32, preferred-register: '' } - - { id: 3, class: gr32, preferred-register: '' } - - { id: 4, class: gr32, preferred-register: '' } -liveins: [] -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 1 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - cvBytesOfCalleeSavedRegisters: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: [] -stack: [] -callSites: [] -constants: [] -machineFunctionInfo: {} -body: | - bb.0.cond_true2732.preheader: - %4:gr32 = MOV32rm $noreg, 1, $noreg, 0, $noreg :: (load 4 from `i64* null`) - %2:gr32 = MOV32rm $noreg, 1, $noreg, 4, $noreg :: (load 4 from `i64* null` + 4) - %2:gr32 = NOT32r %2 - %4:gr32 = NOT32r %4 - %4:gr32 = AND32rm %4, $noreg, 1, $noreg, 356, $noreg, implicit-def dead $eflags :: (load 4 from %ir.tmp2666) - AND32mr $noreg, 1, $noreg, 360, $noreg, %2, implicit-def dead $eflags :: (store 4 into %ir.tmp2666 + 4), (load 4 from %ir.tmp2666 + 4) - MOV32mr $noreg, 1, $noreg, 356, $noreg, %4 :: (store 4 into %ir.tmp2666) - RET 0 - -# Chain dependencies should not be systematically added when at least one of -# the instructions has more than one memory operand. It should only be added -# where it would be needed. -# CHECK-LABEL: dct_chroma -# CHECK: Not adding chain dependency{{[[:space:]]*}}from: MOV32mr {{.*}} :: (store 4 {{.*}}){{[[:space:]]*}}to: AND32mr {{.*}} :: (store 4 {{.*}}), (load 4 {{.*}}) -# CHECK: Adding chain dependency{{[[:space:]]*}}from: AND32mr {{.*}} :: (store 4 {{.*}}), (load 4 {{.*}}){{[[:space:]]*}}to: %{{.*}} = MOV32rm {{.*}} :: (load 4 {{.*}}) - diff --git a/llvm/test/CodeGen/X86/store_op_load_fold2.ll b/llvm/test/CodeGen/X86/store_op_load_fold2.ll index 00db079..674b8d8 100644 --- a/llvm/test/CodeGen/X86/store_op_load_fold2.ll +++ b/llvm/test/CodeGen/X86/store_op_load_fold2.ll @@ -17,12 +17,13 @@ cond_true2732.preheader: ; preds = %entry store i64 %tmp2676.us.us, i64* %tmp2666 ret i32 0 -; INTEL: and dword ptr [360], {{e..}} ; INTEL: and {{e..}}, dword ptr [356] -; INTEL: mov dword ptr [356], {{e..}} +; INTEL: and dword ptr [360], {{e..}} +; FIXME: mov dword ptr [356], {{e..}} +; The above line comes out as 'mov 360, eax', but when the register is ecx it works? -; ATT: andl %{{e..}}, 360 ; ATT: andl 356, %{{e..}} +; ATT: andl %{{e..}}, 360 ; ATT: movl %{{e..}}, 356 } -- 2.7.4