[SystemZ] Rein back the use of block operations

author Richard Sandiford <rsandifo@linux.vnet.ibm.com>

Fri, 27 Sep 2013 15:29:20 +0000 (15:29 +0000)

committer Richard Sandiford <rsandifo@linux.vnet.ibm.com>

Fri, 27 Sep 2013 15:29:20 +0000 (15:29 +0000)
author Richard Sandiford <rsandifo@linux.vnet.ibm.com>
Fri, 27 Sep 2013 15:29:20 +0000 (15:29 +0000)
committer Richard Sandiford <rsandifo@linux.vnet.ibm.com>
Fri, 27 Sep 2013 15:29:20 +0000 (15:29 +0000)
diff --git a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp

index a97449a..da93590 100644 (file)
--- a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -290,6 +290,17 @@ class SystemZDAGToDAGISel : public SelectionDAGISel {
    SDNode *splitLargeImmediate(unsigned Opcode, SDNode *Node, SDValue Op0,
                                uint64_t UpperVal, uint64_t LowerVal);
  
+  // Return true if Load and Store are loads and stores of the same size
+  // and are guaranteed not to overlap.  Such operations can be implemented
+  // using block (SS-format) instructions.
+  //
+  // Partial overlap would lead to incorrect code, since the block operations
+  // are logically bytewise, even though they have a fast path for the
+  // non-overlapping case.  We also need to avoid full overlap (i.e. two
+  // addresses that might be equal at run time) because although that case
+  // would be handled correctly, it might be implemented by millicode.
+  bool canUseBlockOperation(StoreSDNode *Store, LoadSDNode *Load) const;
+
    // N is a (store (load Y), X) pattern.  Return true if it can use an MVC
    // from Y to X.
    bool storeLoadCanUseMVC(SDNode *N) const;
@@ -938,13 +949,8 @@ SDNode *SystemZDAGToDAGISel::splitLargeImmediate(unsigned Opcode, SDNode *Node,
    return Or.getNode();
  }
  
-// Return true if Load and Store:
-// - are loads and stores of the same size;
-// - do not partially overlap; and
-// - can be decomposed into what are logically individual character accesses
-//   without changing the semantics.
-static bool canUseBlockOperation(StoreSDNode *Store, LoadSDNode *Load,
-                                 AliasAnalysis *AA) {
+bool SystemZDAGToDAGISel::canUseBlockOperation(StoreSDNode *Store,
+                                               LoadSDNode *Load) const {
    // Check that the two memory operands have the same size.
    if (Load->getMemoryVT() != Store->getMemoryVT())
      return false;
@@ -957,19 +963,19 @@ static bool canUseBlockOperation(StoreSDNode *Store, LoadSDNode *Load,
    if (Load->isInvariant())
      return true;
  
-  // If both operands are aligned, they must be equal or not overlap.
-  uint64_t Size = Load->getMemoryVT().getStoreSize();
-  if (Load->getAlignment() >= Size && Store->getAlignment() >= Size)
-    return true;
-
    // Otherwise we need to check whether there's an alias.
    const Value *V1 = Load->getSrcValue();
    const Value *V2 = Store->getSrcValue();
    if (!V1 || !V2)
      return false;
  
+  // Reject equality.
+  uint64_t Size = Load->getMemoryVT().getStoreSize();
    int64_t End1 = Load->getSrcValueOffset() + Size;
    int64_t End2 = Store->getSrcValueOffset() + Size;
+  if (V1 == V2 && End1 == End2)
+    return false;
+
    return !AA->alias(AliasAnalysis::Location(V1, End1, Load->getTBAAInfo()),
                      AliasAnalysis::Location(V2, End2, Store->getTBAAInfo()));
  }
@@ -990,7 +996,7 @@ bool SystemZDAGToDAGISel::storeLoadCanUseMVC(SDNode *N) const {
        return false;
    }
  
-  return canUseBlockOperation(Store, Load, AA);
+  return canUseBlockOperation(Store, Load);
  }
  
  bool SystemZDAGToDAGISel::storeLoadCanUseBlockBinary(SDNode *N,
@@ -998,11 +1004,7 @@ bool SystemZDAGToDAGISel::storeLoadCanUseBlockBinary(SDNode *N,
    StoreSDNode *StoreA = cast<StoreSDNode>(N);
    LoadSDNode *LoadA = cast<LoadSDNode>(StoreA->getValue().getOperand(1 - I));
    LoadSDNode *LoadB = cast<LoadSDNode>(StoreA->getValue().getOperand(I));
-  if (LoadA->isVolatile() ||
-      LoadA->getMemoryVT() != StoreA->getMemoryVT() ||
-      LoadA->getBasePtr() != StoreA->getBasePtr())
-    return false;
-  return canUseBlockOperation(StoreA, LoadB, AA);
+  return !LoadA->isVolatile() && canUseBlockOperation(StoreA, LoadB);
  }
  
  SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp

index 6d19bdd..b77557e 100644 (file)
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -674,10 +674,14 @@ SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
    //
    // Although MVC is in practice a fast choice in these cases, it is still
    // logically a bytewise copy.  This means that we cannot use it if the
-  // load or store is volatile.  It also means that the transformation is
-  // not valid in cases where the two memories partially overlap; however,
-  // that is not a problem here, because we know that one of the memories
-  // is a full frame index.
+  // load or store is volatile.  We also wouldn't be able to use MVC if
+  // the two memories partially overlap, but that case cannot occur here,
+  // because we know that one of the memories is a full frame index.
+  //
+  // For performance reasons, we also want to avoid using MVC if the addresses
+  // might be equal.  We don't worry about that case here, because spill slot
+  // coloring happens later, and because we have special code to remove
+  // MVCs that turn out to be redundant.
    if (OpNum == 0 && MI->hasOneMemOperand()) {
      MachineMemOperand *MMO = *MI->memoperands_begin();
      if (MMO->getSize() == Size && !MMO->isVolatile()) {
diff --git a/llvm/test/CodeGen/SystemZ/and-08.ll b/llvm/test/CodeGen/SystemZ/and-08.ll

index f2ab669..519edc6 100644 (file)
--- a/llvm/test/CodeGen/SystemZ/and-08.ll
+++ b/llvm/test/CodeGen/SystemZ/and-08.ll
@@ -2,8 +2,10 @@
  ;
  ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
  
-@g1 = global i8 1
-@g2 = global i16 2
+@g1src = global i8 1
+@g1dst = global i8 1
+@g2src = global i16 2
+@g2dst = global i16 2
  
  ; Test the simple i8 case.
  define void @f1(i8 *%ptr1) {
@@ -239,11 +241,12 @@ define void @f16(i64 *%ptr1) {
    ret void
  }
  
-; Test that NC is used for aligned loads and stores, even if there is
-; no way of telling whether they alias.
+; Test that NC is not used for aligned loads and stores if there is
+; no way of telling whether they alias.  We don't want to use NC in
+; cases where the addresses could be equal.
  define void @f17(i64 *%ptr1, i64 *%ptr2) {
  ; CHECK-LABEL: f17:
-; CHECK: nc 0(8,%r3), 0(%r2)
+; CHECK-NOT: nc
  ; CHECK: br %r14
    %val = load i64 *%ptr1
    %old = load i64 *%ptr2
@@ -306,58 +309,34 @@ define void @f21(i64 %base) {
  ; Test that we can use NC for global addresses for i8.
  define void @f22(i8 *%ptr) {
  ; CHECK-LABEL: f22:
-; CHECK: larl [[REG:%r[0-5]]], g1
-; CHECK: nc 0(1,%r2), 0([[REG]])
-; CHECK: br %r14
-  %val = load i8 *@g1
-  %old = load i8 *%ptr
-  %and = and i8 %val, %old
-  store i8 %and, i8 *%ptr
-  ret void
-}
-
-; ...and again with the global on the store.
-define void @f23(i8 *%ptr) {
-; CHECK-LABEL: f23:
-; CHECK: larl [[REG:%r[0-5]]], g1
-; CHECK: nc 0(1,[[REG]]), 0(%r2)
+; CHECK-DAG: larl [[SRC:%r[0-5]]], g1src
+; CHECK-DAG: larl [[DST:%r[0-5]]], g1dst
+; CHECK: nc 0(1,[[DST]]), 0([[SRC]])
  ; CHECK: br %r14
-  %val = load i8 *%ptr
-  %old = load i8 *@g1
+  %val = load i8 *@g1src
+  %old = load i8 *@g1dst
    %and = and i8 %val, %old
-  store i8 %and, i8 *@g1
+  store i8 %and, i8 *@g1dst
    ret void
  }
  
  ; Test that we use NC even where LHRL and STHRL are available.
-define void @f24(i16 *%ptr) {
-; CHECK-LABEL: f24:
-; CHECK: larl [[REG:%r[0-5]]], g2
-; CHECK: nc 0(2,%r2), 0([[REG]])
-; CHECK: br %r14
-  %val = load i16 *@g2
-  %old = load i16 *%ptr
-  %and = and i16 %val, %old
-  store i16 %and, i16 *%ptr
-  ret void
-}
-
-; ...likewise on the other side.
-define void @f25(i16 *%ptr) {
-; CHECK-LABEL: f25:
-; CHECK: larl [[REG:%r[0-5]]], g2
-; CHECK: nc 0(2,[[REG]]), 0(%r2)
+define void @f23(i16 *%ptr) {
+; CHECK-LABEL: f23:
+; CHECK-DAG: larl [[SRC:%r[0-5]]], g2src
+; CHECK-DAG: larl [[DST:%r[0-5]]], g2dst
+; CHECK: nc 0(2,[[DST]]), 0([[SRC]])
  ; CHECK: br %r14
-  %val = load i16 *%ptr
-  %old = load i16 *@g2
+  %val = load i16 *@g2src
+  %old = load i16 *@g2dst
    %and = and i16 %val, %old
-  store i16 %and, i16 *@g2
+  store i16 %and, i16 *@g2dst
    ret void
  }
  
  ; Test a case where offset disambiguation is enough.
-define void @f26(i64 *%ptr1) {
-; CHECK-LABEL: f26:
+define void @f24(i64 *%ptr1) {
+; CHECK-LABEL: f24:
  ; CHECK: nc 8(8,%r2), 0(%r2)
  ; CHECK: br %r14
    %ptr2 = getelementptr i64 *%ptr1, i64 1
@@ -369,8 +348,8 @@ define void @f26(i64 *%ptr1) {
  }
  
  ; Test a case where TBAA tells us there is no alias.
-define void @f27(i64 *%ptr1, i64 *%ptr2) {
-; CHECK-LABEL: f27:
+define void @f25(i64 *%ptr1, i64 *%ptr2) {
+; CHECK-LABEL: f25:
  ; CHECK: nc 0(8,%r3), 0(%r2)
  ; CHECK: br %r14
    %val = load i64 *%ptr1, align 2, !tbaa !1
@@ -381,8 +360,8 @@ define void @f27(i64 *%ptr1, i64 *%ptr2) {
  }
  
  ; Test a case where TBAA information is present but doesn't help.
-define void @f28(i64 *%ptr1, i64 *%ptr2) {
-; CHECK-LABEL: f28:
+define void @f26(i64 *%ptr1, i64 *%ptr2) {
+; CHECK-LABEL: f26:
  ; CHECK-NOT: nc
  ; CHECK: br %r14
    %val = load i64 *%ptr1, align 2, !tbaa !1
diff --git a/llvm/test/CodeGen/SystemZ/fp-move-02.ll b/llvm/test/CodeGen/SystemZ/fp-move-02.ll

index b4f0428..6f9da9a 100644 (file)
--- a/llvm/test/CodeGen/SystemZ/fp-move-02.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-move-02.ll
@@ -63,11 +63,11 @@ define double @f5(i64 %a) {
  
  ; Test 128-bit moves from GPRs to FPRs.  i128 isn't a legitimate type,
  ; so this goes through memory.
-; FIXME: it would be better to use one MVC here.
  define void @f6(fp128 *%a, i128 *%b) {
  ; CHECK-LABEL: f6:
  ; CHECK: lg
-; CHECK: mvc
+; CHECK: lg
+; CHECK: stg
  ; CHECK: stg
  ; CHECK: br %r14
    %val = load i128 *%b
diff --git a/llvm/test/CodeGen/SystemZ/memcpy-02.ll b/llvm/test/CodeGen/SystemZ/memcpy-02.ll

index 83b2cd8..5cc432a 100644 (file)
--- a/llvm/test/CodeGen/SystemZ/memcpy-02.ll
+++ b/llvm/test/CodeGen/SystemZ/memcpy-02.ll
@@ -2,11 +2,14 @@
  ;
  ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
  
-@g1 = global i8 1
-@g2 = global i16 2
+@g1src = global i8 1
+@g1dst = global i8 1
+@g2src = global i16 2
+@g2dst = global i16 2
  @g3 = global i32 3
  @g4 = global i64 4
-@g5 = external global fp128, align 16
+@g5src = external global fp128, align 16
+@g5dst = external global fp128, align 16
  
  ; Test the simple i8 case.
  define void @f1(i8 *%ptr1) {
@@ -237,18 +240,19 @@ define void @f19(i64 *%ptr1) {
    ret void
  }
  
-; Test that MVC is used for aligned loads and stores, even if there is
-; no way of telling whether they alias.
+; Test that MVC is not used for aligned loads and stores if there is
+; no way of telling whether they alias.  We don't want to use MVC in
+; cases where the addresses could be equal.
  define void @f20(i64 *%ptr1, i64 *%ptr2) {
  ; CHECK-LABEL: f20:
-; CHECK: mvc 0(8,%r3), 0(%r2)
+; CHECK-NOT: mvc
  ; CHECK: br %r14
    %val = load i64 *%ptr1
    store i64 %val, i64 *%ptr2
    ret void
  }
  
-; ...but if the loads aren't aligned, we can't be sure.
+; ...and again for unaligned loads and stores.
  define void @f21(i64 *%ptr1, i64 *%ptr2) {
  ; CHECK-LABEL: f21:
  ; CHECK-NOT: mvc
@@ -274,50 +278,29 @@ define void @f22(i64 %base) {
  ; Test that we can use MVC for global addresses for i8.
  define void @f23(i8 *%ptr) {
  ; CHECK-LABEL: f23:
-; CHECK: larl [[REG:%r[0-5]]], g1
-; CHECK: mvc 0(1,%r2), 0([[REG]])
+; CHECK-DAG: larl [[SRC:%r[0-5]]], g1src
+; CHECK-DAG: larl [[DST:%r[0-5]]], g1dst
+; CHECK: mvc 0(1,[[DST]]), 0([[SRC]])
  ; CHECK: br %r14
-  %val = load i8 *@g1
-  store i8 %val, i8 *%ptr
+  %val = load i8 *@g1src
+  store i8 %val, i8 *@g1dst
    ret void
  }
  
-; ...and again with the global on the store.
-define void @f24(i8 *%ptr) {
+; Test that we use LHRL and STHRL for i16.
+define void @f24(i16 *%ptr) {
  ; CHECK-LABEL: f24:
-; CHECK: larl [[REG:%r[0-5]]], g1
-; CHECK: mvc 0(1,[[REG]]), 0(%r2)
-; CHECK: br %r14
-  %val = load i8 *%ptr
-  store i8 %val, i8 *@g1
-  ret void
-}
-
-; Test that we use LHRL for i16.
-define void @f25(i16 *%ptr) {
-; CHECK-LABEL: f25:
-; CHECK: lhrl [[REG:%r[0-5]]], g2
-; CHECK: sth [[REG]], 0(%r2)
+; CHECK: lhrl [[REG:%r[0-5]]], g2src
+; CHECK: sthrl [[REG]], g2dst
  ; CHECK: br %r14
-  %val = load i16 *@g2
-  store i16 %val, i16 *%ptr
-  ret void
-}
-
-; ...likewise STHRL.
-define void @f26(i16 *%ptr) {
-; CHECK-LABEL: f26:
-; CHECK: lh [[REG:%r[0-5]]], 0(%r2)
-; CHECK: sthrl [[REG]], g2
-; CHECK: br %r14
-  %val = load i16 *%ptr
-  store i16 %val, i16 *@g2
+  %val = load i16 *@g2src
+  store i16 %val, i16 *@g2dst
    ret void
  }
  
  ; Test that we use LRL for i32.
-define void @f27(i32 *%ptr) {
-; CHECK-LABEL: f27:
+define void @f25(i32 *%ptr) {
+; CHECK-LABEL: f25:
  ; CHECK: lrl [[REG:%r[0-5]]], g3
  ; CHECK: st [[REG]], 0(%r2)
  ; CHECK: br %r14
@@ -327,8 +310,8 @@ define void @f27(i32 *%ptr) {
  }
  
  ; ...likewise STRL.
-define void @f28(i32 *%ptr) {
-; CHECK-LABEL: f28:
+define void @f26(i32 *%ptr) {
+; CHECK-LABEL: f26:
  ; CHECK: l [[REG:%r[0-5]]], 0(%r2)
  ; CHECK: strl [[REG]], g3
  ; CHECK: br %r14
@@ -338,8 +321,8 @@ define void @f28(i32 *%ptr) {
  }
  
  ; Test that we use LGRL for i64.
-define void @f29(i64 *%ptr) {
-; CHECK-LABEL: f29:
+define void @f27(i64 *%ptr) {
+; CHECK-LABEL: f27:
  ; CHECK: lgrl [[REG:%r[0-5]]], g4
  ; CHECK: stg [[REG]], 0(%r2)
  ; CHECK: br %r14
@@ -349,8 +332,8 @@ define void @f29(i64 *%ptr) {
  }
  
  ; ...likewise STGRL.
-define void @f30(i64 *%ptr) {
-; CHECK-LABEL: f30:
+define void @f28(i64 *%ptr) {
+; CHECK-LABEL: f28:
  ; CHECK: lg [[REG:%r[0-5]]], 0(%r2)
  ; CHECK: stgrl [[REG]], g4
  ; CHECK: br %r14
@@ -360,30 +343,20 @@ define void @f30(i64 *%ptr) {
  }
  
  ; Test that we can use MVC for global addresses for fp128.
-define void @f31(fp128 *%ptr) {
-; CHECK-LABEL: f31:
-; CHECK: larl [[REG:%r[0-5]]], g5
-; CHECK: mvc 0(16,%r2), 0([[REG]])
-; CHECK: br %r14
-  %val = load fp128 *@g5, align 16
-  store fp128 %val, fp128 *%ptr, align 16
-  ret void
-}
-
-; ...and again with the global on the store.
-define void @f32(fp128 *%ptr) {
-; CHECK-LABEL: f32:
-; CHECK: larl [[REG:%r[0-5]]], g5
-; CHECK: mvc 0(16,[[REG]]), 0(%r2)
+define void @f29(fp128 *%ptr) {
+; CHECK-LABEL: f29:
+; CHECK-DAG: larl [[SRC:%r[0-5]]], g5src
+; CHECK-DAG: larl [[DST:%r[0-5]]], g5dst
+; CHECK: mvc 0(16,[[DST]]), 0([[SRC]])
  ; CHECK: br %r14
-  %val = load fp128 *%ptr, align 16
-  store fp128 %val, fp128 *@g5, align 16
+  %val = load fp128 *@g5src, align 16
+  store fp128 %val, fp128 *@g5dst, align 16
    ret void
  }
  
  ; Test a case where offset disambiguation is enough.
-define void @f33(i64 *%ptr1) {
-; CHECK-LABEL: f33:
+define void @f30(i64 *%ptr1) {
+; CHECK-LABEL: f30:
  ; CHECK: mvc 8(8,%r2), 0(%r2)
  ; CHECK: br %r14
    %ptr2 = getelementptr i64 *%ptr1, i64 1
@@ -393,8 +366,8 @@ define void @f33(i64 *%ptr1) {
  }
  
  ; Test f21 in cases where TBAA tells us there is no alias.
-define void @f34(i64 *%ptr1, i64 *%ptr2) {
-; CHECK-LABEL: f34:
+define void @f31(i64 *%ptr1, i64 *%ptr2) {
+; CHECK-LABEL: f31:
  ; CHECK: mvc 0(8,%r3), 0(%r2)
  ; CHECK: br %r14
    %val = load i64 *%ptr1, align 2, !tbaa !1
@@ -403,8 +376,8 @@ define void @f34(i64 *%ptr1, i64 *%ptr2) {
  }
  
  ; Test f21 in cases where TBAA is present but doesn't help.
-define void @f35(i64 *%ptr1, i64 *%ptr2) {
-; CHECK-LABEL: f35:
+define void @f32(i64 *%ptr1, i64 *%ptr2) {
+; CHECK-LABEL: f32:
  ; CHECK-NOT: mvc
  ; CHECK: br %r14
    %val = load i64 *%ptr1, align 2, !tbaa !1
author	Richard Sandiford <rsandifo@linux.vnet.ibm.com>
	Fri, 27 Sep 2013 15:29:20 +0000 (15:29 +0000)
committer	Richard Sandiford <rsandifo@linux.vnet.ibm.com>
	Fri, 27 Sep 2013 15:29:20 +0000 (15:29 +0000)
llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp		patch \| blob \| history
llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp		patch \| blob \| history
llvm/test/CodeGen/SystemZ/and-08.ll		patch \| blob \| history
llvm/test/CodeGen/SystemZ/fp-move-02.ll		patch \| blob \| history
llvm/test/CodeGen/SystemZ/memcpy-02.ll		patch \| blob \| history