From 61f81f9637cf81ee25d941b09c41a372d9387f3e Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 8 Nov 2017 22:26:39 +0000 Subject: [PATCH] [X86] Preserve memory refs when folding loads into divides. This is similar to what we already do for multiplies. Without this we can't unfold and hoist an invariant load. llvm-svn: 317732 --- llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 6 +++++- llvm/test/CodeGen/X86/hoist-invariant-load.ll | 30 ++++++++++++++++++++++++++- 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 21fee9a..09954324 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -2885,11 +2885,15 @@ void X86DAGToDAGISel::Select(SDNode *Node) { if (foldedLoad) { SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), InFlag }; - SDNode *CNode = + MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops); InFlag = SDValue(CNode, 1); // Update the chain. ReplaceUses(N1.getValue(1), SDValue(CNode, 0)); + // Record the mem-refs + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = cast(N1)->getMemOperand(); + CNode->setMemRefs(MemOp, MemOp + 1); } else { InFlag = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag), 0); diff --git a/llvm/test/CodeGen/X86/hoist-invariant-load.ll b/llvm/test/CodeGen/X86/hoist-invariant-load.ll index e7929c9..a8b4e0a 100644 --- a/llvm/test/CodeGen/X86/hoist-invariant-load.ll +++ b/llvm/test/CodeGen/X86/hoist-invariant-load.ll @@ -1,10 +1,12 @@ ; REQUIRES: asserts -; RUN: llc -mcpu=haswell < %s -stats -O2 2>&1 | grep "4 machinelicm.*hoisted" +; RUN: llc -mcpu=haswell < %s -stats -O2 2>&1 | grep "7 machinelicm.*hoisted" ; For test: ; 2 invariant loads, 1 for OBJC_SELECTOR_REFERENCES_ ; and 1 for objc_msgSend from the GOT ; For test_multi_def: ; 2 invariant load (full multiply, both loads should be hoisted.) +; For test_div_def: +; 2 invariant load (full divide, both loads should be hoisted.) 1 additional instruction for a zeroing edx that gets hoisted and then rematerialized. target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.7.2" @@ -60,4 +62,30 @@ exit: ret void } +define void @test_div_def(i32* dereferenceable(8) %x1, + i32* dereferenceable(8) %x2, + i32* %y, i32 %count) nounwind { +entry: + br label %for.body + +for.check: + %inc = add nsw i32 %i, 1 + %done = icmp sge i32 %inc, %count + br i1 %done, label %exit, label %for.body + +for.body: + %i = phi i32 [ 0, %entry ], [ %inc, %for.check ] + %x1_load = load i32, i32* %x1, align 8, !invariant.load !0 + %x2_load = load i32, i32* %x2, align 8, !invariant.load !0 + %x_quot = udiv i32 %x1_load, %x2_load + %y_elem = getelementptr inbounds i32, i32* %y, i32 %i + %y_load = load i32, i32* %y_elem, align 8 + %y_plus = add i32 %x_quot, %y_load + store i32 %y_plus, i32* %y_elem, align 8 + br label %for.check + +exit: + ret void +} + !0 = !{} -- 2.7.4