SDNode *splitLargeImmediate(unsigned Opcode, SDNode *Node, SDValue Op0,
uint64_t UpperVal, uint64_t LowerVal);
+ // Return true if Load and Store are loads and stores of the same size
+ // and are guaranteed not to overlap. Such operations can be implemented
+ // using block (SS-format) instructions.
+ //
+ // Partial overlap would lead to incorrect code, since the block operations
+ // are logically bytewise, even though they have a fast path for the
+ // non-overlapping case. We also need to avoid full overlap (i.e. two
+ // addresses that might be equal at run time) because although that case
+ // would be handled correctly, it might be implemented by millicode.
+ bool canUseBlockOperation(StoreSDNode *Store, LoadSDNode *Load) const;
+
// N is a (store (load Y), X) pattern. Return true if it can use an MVC
// from Y to X.
bool storeLoadCanUseMVC(SDNode *N) const;
return Or.getNode();
}
-// Return true if Load and Store:
-// - are loads and stores of the same size;
-// - do not partially overlap; and
-// - can be decomposed into what are logically individual character accesses
-// without changing the semantics.
-static bool canUseBlockOperation(StoreSDNode *Store, LoadSDNode *Load,
- AliasAnalysis *AA) {
+bool SystemZDAGToDAGISel::canUseBlockOperation(StoreSDNode *Store,
+ LoadSDNode *Load) const {
// Check that the two memory operands have the same size.
if (Load->getMemoryVT() != Store->getMemoryVT())
return false;
if (Load->isInvariant())
return true;
- // If both operands are aligned, they must be equal or not overlap.
- uint64_t Size = Load->getMemoryVT().getStoreSize();
- if (Load->getAlignment() >= Size && Store->getAlignment() >= Size)
- return true;
-
// Otherwise we need to check whether there's an alias.
const Value *V1 = Load->getSrcValue();
const Value *V2 = Store->getSrcValue();
if (!V1 || !V2)
return false;
+ // Reject equality.
+ uint64_t Size = Load->getMemoryVT().getStoreSize();
int64_t End1 = Load->getSrcValueOffset() + Size;
int64_t End2 = Store->getSrcValueOffset() + Size;
+ if (V1 == V2 && End1 == End2)
+ return false;
+
return !AA->alias(AliasAnalysis::Location(V1, End1, Load->getTBAAInfo()),
AliasAnalysis::Location(V2, End2, Store->getTBAAInfo()));
}
return false;
}
- return canUseBlockOperation(Store, Load, AA);
+ return canUseBlockOperation(Store, Load);
}
bool SystemZDAGToDAGISel::storeLoadCanUseBlockBinary(SDNode *N,
StoreSDNode *StoreA = cast<StoreSDNode>(N);
LoadSDNode *LoadA = cast<LoadSDNode>(StoreA->getValue().getOperand(1 - I));
LoadSDNode *LoadB = cast<LoadSDNode>(StoreA->getValue().getOperand(I));
- if (LoadA->isVolatile() ||
- LoadA->getMemoryVT() != StoreA->getMemoryVT() ||
- LoadA->getBasePtr() != StoreA->getBasePtr())
- return false;
- return canUseBlockOperation(StoreA, LoadB, AA);
+ return !LoadA->isVolatile() && canUseBlockOperation(StoreA, LoadB);
}
SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
//
// Although MVC is in practice a fast choice in these cases, it is still
// logically a bytewise copy. This means that we cannot use it if the
- // load or store is volatile. It also means that the transformation is
- // not valid in cases where the two memories partially overlap; however,
- // that is not a problem here, because we know that one of the memories
- // is a full frame index.
+ // load or store is volatile. We also wouldn't be able to use MVC if
+ // the two memories partially overlap, but that case cannot occur here,
+ // because we know that one of the memories is a full frame index.
+ //
+ // For performance reasons, we also want to avoid using MVC if the addresses
+ // might be equal. We don't worry about that case here, because spill slot
+ // coloring happens later, and because we have special code to remove
+ // MVCs that turn out to be redundant.
if (OpNum == 0 && MI->hasOneMemOperand()) {
MachineMemOperand *MMO = *MI->memoperands_begin();
if (MMO->getSize() == Size && !MMO->isVolatile()) {
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
-@g1 = global i8 1
-@g2 = global i16 2
+@g1src = global i8 1
+@g1dst = global i8 1
+@g2src = global i16 2
+@g2dst = global i16 2
; Test the simple i8 case.
define void @f1(i8 *%ptr1) {
ret void
}
-; Test that NC is used for aligned loads and stores, even if there is
-; no way of telling whether they alias.
+; Test that NC is not used for aligned loads and stores if there is
+; no way of telling whether they alias. We don't want to use NC in
+; cases where the addresses could be equal.
define void @f17(i64 *%ptr1, i64 *%ptr2) {
; CHECK-LABEL: f17:
-; CHECK: nc 0(8,%r3), 0(%r2)
+; CHECK-NOT: nc
; CHECK: br %r14
%val = load i64 *%ptr1
%old = load i64 *%ptr2
; Test that we can use NC for global addresses for i8.
define void @f22(i8 *%ptr) {
; CHECK-LABEL: f22:
-; CHECK: larl [[REG:%r[0-5]]], g1
-; CHECK: nc 0(1,%r2), 0([[REG]])
-; CHECK: br %r14
- %val = load i8 *@g1
- %old = load i8 *%ptr
- %and = and i8 %val, %old
- store i8 %and, i8 *%ptr
- ret void
-}
-
-; ...and again with the global on the store.
-define void @f23(i8 *%ptr) {
-; CHECK-LABEL: f23:
-; CHECK: larl [[REG:%r[0-5]]], g1
-; CHECK: nc 0(1,[[REG]]), 0(%r2)
+; CHECK-DAG: larl [[SRC:%r[0-5]]], g1src
+; CHECK-DAG: larl [[DST:%r[0-5]]], g1dst
+; CHECK: nc 0(1,[[DST]]), 0([[SRC]])
; CHECK: br %r14
- %val = load i8 *%ptr
- %old = load i8 *@g1
+ %val = load i8 *@g1src
+ %old = load i8 *@g1dst
%and = and i8 %val, %old
- store i8 %and, i8 *@g1
+ store i8 %and, i8 *@g1dst
ret void
}
; Test that we use NC even where LHRL and STHRL are available.
-define void @f24(i16 *%ptr) {
-; CHECK-LABEL: f24:
-; CHECK: larl [[REG:%r[0-5]]], g2
-; CHECK: nc 0(2,%r2), 0([[REG]])
-; CHECK: br %r14
- %val = load i16 *@g2
- %old = load i16 *%ptr
- %and = and i16 %val, %old
- store i16 %and, i16 *%ptr
- ret void
-}
-
-; ...likewise on the other side.
-define void @f25(i16 *%ptr) {
-; CHECK-LABEL: f25:
-; CHECK: larl [[REG:%r[0-5]]], g2
-; CHECK: nc 0(2,[[REG]]), 0(%r2)
+define void @f23(i16 *%ptr) {
+; CHECK-LABEL: f23:
+; CHECK-DAG: larl [[SRC:%r[0-5]]], g2src
+; CHECK-DAG: larl [[DST:%r[0-5]]], g2dst
+; CHECK: nc 0(2,[[DST]]), 0([[SRC]])
; CHECK: br %r14
- %val = load i16 *%ptr
- %old = load i16 *@g2
+ %val = load i16 *@g2src
+ %old = load i16 *@g2dst
%and = and i16 %val, %old
- store i16 %and, i16 *@g2
+ store i16 %and, i16 *@g2dst
ret void
}
; Test a case where offset disambiguation is enough.
-define void @f26(i64 *%ptr1) {
-; CHECK-LABEL: f26:
+define void @f24(i64 *%ptr1) {
+; CHECK-LABEL: f24:
; CHECK: nc 8(8,%r2), 0(%r2)
; CHECK: br %r14
%ptr2 = getelementptr i64 *%ptr1, i64 1
}
; Test a case where TBAA tells us there is no alias.
-define void @f27(i64 *%ptr1, i64 *%ptr2) {
-; CHECK-LABEL: f27:
+define void @f25(i64 *%ptr1, i64 *%ptr2) {
+; CHECK-LABEL: f25:
; CHECK: nc 0(8,%r3), 0(%r2)
; CHECK: br %r14
%val = load i64 *%ptr1, align 2, !tbaa !1
}
; Test a case where TBAA information is present but doesn't help.
-define void @f28(i64 *%ptr1, i64 *%ptr2) {
-; CHECK-LABEL: f28:
+define void @f26(i64 *%ptr1, i64 *%ptr2) {
+; CHECK-LABEL: f26:
; CHECK-NOT: nc
; CHECK: br %r14
%val = load i64 *%ptr1, align 2, !tbaa !1
; Test 128-bit moves from GPRs to FPRs. i128 isn't a legitimate type,
; so this goes through memory.
-; FIXME: it would be better to use one MVC here.
define void @f6(fp128 *%a, i128 *%b) {
; CHECK-LABEL: f6:
; CHECK: lg
-; CHECK: mvc
+; CHECK: lg
+; CHECK: stg
; CHECK: stg
; CHECK: br %r14
%val = load i128 *%b
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
-@g1 = global i8 1
-@g2 = global i16 2
+@g1src = global i8 1
+@g1dst = global i8 1
+@g2src = global i16 2
+@g2dst = global i16 2
@g3 = global i32 3
@g4 = global i64 4
-@g5 = external global fp128, align 16
+@g5src = external global fp128, align 16
+@g5dst = external global fp128, align 16
; Test the simple i8 case.
define void @f1(i8 *%ptr1) {
ret void
}
-; Test that MVC is used for aligned loads and stores, even if there is
-; no way of telling whether they alias.
+; Test that MVC is not used for aligned loads and stores if there is
+; no way of telling whether they alias. We don't want to use MVC in
+; cases where the addresses could be equal.
define void @f20(i64 *%ptr1, i64 *%ptr2) {
; CHECK-LABEL: f20:
-; CHECK: mvc 0(8,%r3), 0(%r2)
+; CHECK-NOT: mvc
; CHECK: br %r14
%val = load i64 *%ptr1
store i64 %val, i64 *%ptr2
ret void
}
-; ...but if the loads aren't aligned, we can't be sure.
+; ...and again for unaligned loads and stores.
define void @f21(i64 *%ptr1, i64 *%ptr2) {
; CHECK-LABEL: f21:
; CHECK-NOT: mvc
; Test that we can use MVC for global addresses for i8.
define void @f23(i8 *%ptr) {
; CHECK-LABEL: f23:
-; CHECK: larl [[REG:%r[0-5]]], g1
-; CHECK: mvc 0(1,%r2), 0([[REG]])
+; CHECK-DAG: larl [[SRC:%r[0-5]]], g1src
+; CHECK-DAG: larl [[DST:%r[0-5]]], g1dst
+; CHECK: mvc 0(1,[[DST]]), 0([[SRC]])
; CHECK: br %r14
- %val = load i8 *@g1
- store i8 %val, i8 *%ptr
+ %val = load i8 *@g1src
+ store i8 %val, i8 *@g1dst
ret void
}
-; ...and again with the global on the store.
-define void @f24(i8 *%ptr) {
+; Test that we use LHRL and STHRL for i16.
+define void @f24(i16 *%ptr) {
; CHECK-LABEL: f24:
-; CHECK: larl [[REG:%r[0-5]]], g1
-; CHECK: mvc 0(1,[[REG]]), 0(%r2)
-; CHECK: br %r14
- %val = load i8 *%ptr
- store i8 %val, i8 *@g1
- ret void
-}
-
-; Test that we use LHRL for i16.
-define void @f25(i16 *%ptr) {
-; CHECK-LABEL: f25:
-; CHECK: lhrl [[REG:%r[0-5]]], g2
-; CHECK: sth [[REG]], 0(%r2)
+; CHECK: lhrl [[REG:%r[0-5]]], g2src
+; CHECK: sthrl [[REG]], g2dst
; CHECK: br %r14
- %val = load i16 *@g2
- store i16 %val, i16 *%ptr
- ret void
-}
-
-; ...likewise STHRL.
-define void @f26(i16 *%ptr) {
-; CHECK-LABEL: f26:
-; CHECK: lh [[REG:%r[0-5]]], 0(%r2)
-; CHECK: sthrl [[REG]], g2
-; CHECK: br %r14
- %val = load i16 *%ptr
- store i16 %val, i16 *@g2
+ %val = load i16 *@g2src
+ store i16 %val, i16 *@g2dst
ret void
}
; Test that we use LRL for i32.
-define void @f27(i32 *%ptr) {
-; CHECK-LABEL: f27:
+define void @f25(i32 *%ptr) {
+; CHECK-LABEL: f25:
; CHECK: lrl [[REG:%r[0-5]]], g3
; CHECK: st [[REG]], 0(%r2)
; CHECK: br %r14
}
; ...likewise STRL.
-define void @f28(i32 *%ptr) {
-; CHECK-LABEL: f28:
+define void @f26(i32 *%ptr) {
+; CHECK-LABEL: f26:
; CHECK: l [[REG:%r[0-5]]], 0(%r2)
; CHECK: strl [[REG]], g3
; CHECK: br %r14
}
; Test that we use LGRL for i64.
-define void @f29(i64 *%ptr) {
-; CHECK-LABEL: f29:
+define void @f27(i64 *%ptr) {
+; CHECK-LABEL: f27:
; CHECK: lgrl [[REG:%r[0-5]]], g4
; CHECK: stg [[REG]], 0(%r2)
; CHECK: br %r14
}
; ...likewise STGRL.
-define void @f30(i64 *%ptr) {
-; CHECK-LABEL: f30:
+define void @f28(i64 *%ptr) {
+; CHECK-LABEL: f28:
; CHECK: lg [[REG:%r[0-5]]], 0(%r2)
; CHECK: stgrl [[REG]], g4
; CHECK: br %r14
}
; Test that we can use MVC for global addresses for fp128.
-define void @f31(fp128 *%ptr) {
-; CHECK-LABEL: f31:
-; CHECK: larl [[REG:%r[0-5]]], g5
-; CHECK: mvc 0(16,%r2), 0([[REG]])
-; CHECK: br %r14
- %val = load fp128 *@g5, align 16
- store fp128 %val, fp128 *%ptr, align 16
- ret void
-}
-
-; ...and again with the global on the store.
-define void @f32(fp128 *%ptr) {
-; CHECK-LABEL: f32:
-; CHECK: larl [[REG:%r[0-5]]], g5
-; CHECK: mvc 0(16,[[REG]]), 0(%r2)
+define void @f29(fp128 *%ptr) {
+; CHECK-LABEL: f29:
+; CHECK-DAG: larl [[SRC:%r[0-5]]], g5src
+; CHECK-DAG: larl [[DST:%r[0-5]]], g5dst
+; CHECK: mvc 0(16,[[DST]]), 0([[SRC]])
; CHECK: br %r14
- %val = load fp128 *%ptr, align 16
- store fp128 %val, fp128 *@g5, align 16
+ %val = load fp128 *@g5src, align 16
+ store fp128 %val, fp128 *@g5dst, align 16
ret void
}
; Test a case where offset disambiguation is enough.
-define void @f33(i64 *%ptr1) {
-; CHECK-LABEL: f33:
+define void @f30(i64 *%ptr1) {
+; CHECK-LABEL: f30:
; CHECK: mvc 8(8,%r2), 0(%r2)
; CHECK: br %r14
%ptr2 = getelementptr i64 *%ptr1, i64 1
}
; Test f21 in cases where TBAA tells us there is no alias.
-define void @f34(i64 *%ptr1, i64 *%ptr2) {
-; CHECK-LABEL: f34:
+define void @f31(i64 *%ptr1, i64 *%ptr2) {
+; CHECK-LABEL: f31:
; CHECK: mvc 0(8,%r3), 0(%r2)
; CHECK: br %r14
%val = load i64 *%ptr1, align 2, !tbaa !1
}
; Test f21 in cases where TBAA is present but doesn't help.
-define void @f35(i64 *%ptr1, i64 *%ptr2) {
-; CHECK-LABEL: f35:
+define void @f32(i64 *%ptr1, i64 *%ptr2) {
+; CHECK-LABEL: f32:
; CHECK-NOT: mvc
; CHECK: br %r14
%val = load i64 *%ptr1, align 2, !tbaa !1