SDValue SplitIndexingFromLoad(LoadSDNode *LD);
bool SliceUpLoad(SDNode *N);
+ // Scalars have size 0 to distinguish from singleton vectors.
+ SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
+ bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
+ bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
+
/// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
/// load.
///
return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
}
+static inline int numVectorEltsOrZero(EVT T) {
+ return T.isVector() ? T.getVectorNumElements() : 0;
+}
+
+bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
+ Val = ST->getValue();
+ EVT STType = Val.getValueType();
+ EVT STMemType = ST->getMemoryVT();
+ if (STType == STMemType)
+ return true;
+ if (isTypeLegal(STMemType))
+ return false; // fail.
+ if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
+ TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
+ Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
+ return true;
+ }
+ if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
+ STType.isInteger() && STMemType.isInteger()) {
+ Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
+ return true;
+ }
+ if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
+ Val = DAG.getBitcast(STMemType, Val);
+ return true;
+ }
+ return false; // fail.
+}
+
+bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
+ EVT LDMemType = LD->getMemoryVT();
+ EVT LDType = LD->getValueType(0);
+ assert(Val.getValueType() == LDMemType &&
+ "Attempting to extend value of non-matching type");
+ if (LDType == LDMemType)
+ return true;
+ if (LDMemType.isInteger() && LDType.isInteger()) {
+ switch (LD->getExtensionType()) {
+ case ISD::NON_EXTLOAD:
+ Val = DAG.getBitcast(LDType, Val);
+ return true;
+ case ISD::EXTLOAD:
+ Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
+ return true;
+ case ISD::SEXTLOAD:
+ Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
+ return true;
+ case ISD::ZEXTLOAD:
+ Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
+ return true;
+ }
+ }
+ return false;
+}
+
+SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
+ if (OptLevel == CodeGenOpt::None || LD->isVolatile())
+ return SDValue();
+ SDValue Chain = LD->getOperand(0);
+ StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode());
+ if (!ST || ST->isVolatile())
+ return SDValue();
+
+ EVT LDType = LD->getValueType(0);
+ EVT LDMemType = LD->getMemoryVT();
+ EVT STMemType = ST->getMemoryVT();
+ EVT STType = ST->getValue().getValueType();
+
+ BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
+ BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG);
+ int64_t Offset;
+
+ bool STCoversLD =
+ BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset) && (Offset >= 0) &&
+ (Offset * 8 <= LDMemType.getSizeInBits()) &&
+ (Offset * 8 + LDMemType.getSizeInBits() <= STMemType.getSizeInBits());
+
+ if (!STCoversLD)
+ return SDValue();
+
+ // Memory as copy space (potentially masked).
+ if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
+ // Simple case: Direct non-truncating forwarding
+ if (LDType.getSizeInBits() == LDMemType.getSizeInBits())
+ return CombineTo(LD, ST->getValue(), Chain);
+ // Can we model the truncate and extension with an and mask?
+ if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
+ !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
+ // Mask to size of LDMemType
+ auto Mask =
+ DAG.getConstant(APInt::getLowBitsSet(STType.getSizeInBits(),
+ STMemType.getSizeInBits()),
+ SDLoc(ST), STType);
+ auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
+ return CombineTo(LD, Val, Chain);
+ }
+ }
+
+ // TODO: Deal with nonzero offset.
+ if (LD->getBasePtr().isUndef() || Offset != 0)
+ return SDValue();
+ // Model necessary truncations / extenstions.
+ SDValue Val;
+ // Truncate Value To Stored Memory Size.
+ do {
+ if (!getTruncatedStoreValue(ST, Val))
+ continue;
+ if (!isTypeLegal(LDMemType))
+ continue;
+ if (STMemType != LDMemType) {
+ if (numVectorEltsOrZero(STMemType) == numVectorEltsOrZero(LDMemType) &&
+ STMemType.isInteger() && LDMemType.isInteger())
+ Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
+ else
+ continue;
+ }
+ if (!extendLoadedValueToExtension(LD, Val))
+ continue;
+ return CombineTo(LD, Val, Chain);
+ } while (false);
+
+ // On failure, cleanup dead nodes we may have created.
+ if (Val->use_empty())
+ deleteAndRecombine(Val.getNode());
+ return SDValue();
+}
+
SDValue DAGCombiner::visitLOAD(SDNode *N) {
LoadSDNode *LD = cast<LoadSDNode>(N);
SDValue Chain = LD->getChain();
// If this load is directly stored, replace the load value with the stored
// value.
- // TODO: Handle store large -> read small portion.
- // TODO: Handle TRUNCSTORE/LOADEXT
- if (OptLevel != CodeGenOpt::None &&
- ISD::isNormalLoad(N) && !LD->isVolatile()) {
- if (ISD::isNON_TRUNCStore(Chain.getNode())) {
- StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
- if (PrevST->getBasePtr() == Ptr &&
- PrevST->getValue().getValueType() == N->getValueType(0))
- return CombineTo(N, PrevST->getOperand(1), Chain);
- }
- }
+ if (auto V = ForwardStoreValueToDirectLoad(LD))
+ return V;
// Try to infer better alignment information than the load already has.
if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
}
; CHECK-LABEL: Str64Ldr32_0
-; CHECK: and x0, x1, #0xffffffff
+; CHECK: mov w0, w1
define i32 @Str64Ldr32_0(i64* nocapture %P, i64 %v, i64 %n) {
entry:
%0 = bitcast i64* %P to i32*
}
; CHECK-LABEL: Str64Ldr16_0
-; CHECK: and x0, x1, #0xffff
+; CHECK: mov w0, w1
define i16 @Str64Ldr16_0(i64* nocapture %P, i64 %v, i64 %n) {
entry:
%0 = bitcast i64* %P to i16*
}
; CHECK-LABEL: Str64Ldr8_0
-; CHECK: and x0, x1, #0xff
+; CHECK: mov w0, w1
define i8 @Str64Ldr8_0(i64* nocapture %P, i64 %v, i64 %n) {
entry:
%0 = bitcast i64* %P to i8*
}
; CHECK-LABEL: Str32Ldr16_0
-; CHECK: and w0, w1, #0xffff
+; CHECK: mov w0, w1
define i16 @Str32Ldr16_0(i32* nocapture %P, i32 %v, i64 %n) {
entry:
%0 = bitcast i32* %P to i16*
}
; CHECK-LABEL: Str32Ldr8_0
-; CHECK: and w0, w1, #0xff
+; CHECK: mov w0, w1
define i8 @Str32Ldr8_0(i32* nocapture %P, i32 %v, i64 %n) {
entry:
%0 = bitcast i32* %P to i8*
}
; CHECK-LABEL: Str16Ldr16
-; CHECK: and w0, w1, #0xffff
+; CHECK: mov w0, w1
define i16 @Str16Ldr16(i16* nocapture %P, i16 %v, i64 %n) {
entry:
%0 = bitcast i16* %P to i16*
}
; CHECK-LABEL: Str16Ldr8_0
-; CHECK: and w0, w1, #0xff
+; CHECK: mov w0, w1
define i8 @Str16Ldr8_0(i16* nocapture %P, i16 %v, i64 %n) {
entry:
%0 = bitcast i16* %P to i8*
}
; CHECK-LABEL: Unscaled_Str64Ldr32_0
-; CHECK: and x0, x1, #0xffffffff
+; CHECK: mov w0, w1
define i32 @Unscaled_Str64Ldr32_0(i64* nocapture %P, i64 %v, i64 %n) {
entry:
%0 = bitcast i64* %P to i32*
}
; CHECK-LABEL: Unscaled_Str64Ldr16_0
-; CHECK: and x0, x1, #0xffff
+; CHECK: mov w0, w1
define i16 @Unscaled_Str64Ldr16_0(i64* nocapture %P, i64 %v, i64 %n) {
entry:
%0 = bitcast i64* %P to i16*
}
; CHECK-LABEL: Unscaled_Str64Ldr8_0
-; CHECK: and x0, x1, #0xff
+; CHECK: mov w0, w1
define i8 @Unscaled_Str64Ldr8_0(i64* nocapture %P, i64 %v, i64 %n) {
entry:
%0 = bitcast i64* %P to i8*
}
; CHECK-LABEL: Unscaled_Str32Ldr16_0
-; CHECK: and w0, w1, #0xffff
+; CHECK: mov w0, w1
define i16 @Unscaled_Str32Ldr16_0(i32* nocapture %P, i32 %v, i64 %n) {
entry:
%0 = bitcast i32* %P to i16*
}
; CHECK-LABEL: Unscaled_Str32Ldr8_0
-; CHECK: and w0, w1, #0xff
+; CHECK: mov w0, w1
define i8 @Unscaled_Str32Ldr8_0(i32* nocapture %P, i32 %v, i64 %n) {
entry:
%0 = bitcast i32* %P to i8*
}
; CHECK-LABEL: Unscaled_Str16Ldr16
-; CHECK: and w0, w1, #0xffff
+; CHECK: mov w0, w1
define i16 @Unscaled_Str16Ldr16(i16* nocapture %P, i16 %v, i64 %n) {
entry:
%0 = bitcast i16* %P to i16*
}
; CHECK-LABEL: Unscaled_Str16Ldr8_0
-; CHECK: and w0, w1, #0xff
+; CHECK: mov w0, w1
define i8 @Unscaled_Str16Ldr8_0(i16* nocapture %P, i16 %v, i64 %n) {
entry:
%0 = bitcast i16* %P to i8*
store i8 %inc.4, i8* %locvar
; CHECK: ldurb {{w[0-9]+}}, [x29, [[LOCADDR:#-?[0-9]+]]]
-; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, #1
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, #1
; CHECK: sturb w[[STRVAL:[0-9]+]], [x29, [[LOCADDR]]]
-; CHECK: and w0, w[[STRVAL]], #0xff
+; CHECK: and x0, x[[STRVAL]], #0xff
%ret.1 = load i8, i8* %locvar
%ret.2 = zext i8 %ret.1 to i64
define zeroext i16 @my_setbit(i16 zeroext %crc) nounwind {
entry:
; CHECK-LABEL: my_setbit
-; CHECK: memh(r{{[0-9]+}}+#{{[0-9]+}}) = setbit(#15)
+; CHECK: r{{[0-9]+}} = setbit(r{{[0-9]+}},#15)
%crc.addr = alloca i16, align 2
store i16 %crc, i16* %crc.addr, align 2
%0 = load i16, i16* %crc.addr, align 2
+++ /dev/null
-; RUN: llc -march=hexagon -enable-pipeliner -pipeliner-max-stages=2 \
-; RUN: -pipeliner-ignore-recmii -disable-hexagon-nv-schedule \
-; RUN: -hexagon-initial-cfg-cleanup=0 -stats -o /dev/null \
-; RUN: -enable-aa-sched-mi < %s 2>&1 | FileCheck %s --check-prefix=STATS
-; REQUIRES: asserts
-;
-; Test that we generate the correct phis in the last epilog block when
-; allowing multiple stages.
-;
-; STATS: 1 pipeliner - Number of loops software pipelined
-
-; Function Attrs: nounwind
-define void @f0() #0 {
-b0:
- br i1 undef, label %b6, label %b1
-
-b1: ; preds = %b0
- br i1 undef, label %b6, label %b2
-
-b2: ; preds = %b1
- br label %b4
-
-b3: ; preds = %b4, %b3
- %v0 = add nsw i32 0, 57344
- %v1 = trunc i32 %v0 to i16
- store i16 %v1, i16* null, align 2, !tbaa !0
- %v2 = getelementptr inbounds i8, i8* null, i32 undef
- %v3 = load i8, i8* %v2, align 1, !tbaa !4
- %v4 = zext i8 %v3 to i32
- %v5 = shl nuw nsw i32 %v4, 6
- %v6 = add nsw i32 %v5, 57344
- %v7 = trunc i32 %v6 to i16
- store i16 %v7, i16* undef, align 2, !tbaa !0
- br i1 undef, label %b5, label %b3
-
-b4: ; preds = %b5, %b2
- %v8 = phi i32 [ 0, %b2 ], [ %v9, %b5 ]
- br label %b3
-
-b5: ; preds = %b3
- %v9 = add i32 %v8, 1
- %v10 = icmp eq i32 %v9, undef
- br i1 %v10, label %b6, label %b4
-
-b6: ; preds = %b5, %b1, %b0
- ret void
-}
-
-attributes #0 = { nounwind "target-cpu"="hexagonv55" }
-
-!0 = !{!1, !1, i64 0}
-!1 = !{!"short", !2}
-!2 = !{!"omnipotent char", !3}
-!3 = !{!"Simple C/C++ TBAA"}
-!4 = !{!2, !2, i64 0}
+++ /dev/null
-; RUN: llc -march=hexagon -enable-pipeliner < %s | FileCheck %s
-
-; Test that a store and load, that alias, are not put in the same packet. The
-; pipeliner altered the size of the memrefs for these instructions, which
-; resulted in no order dependence between the instructions in the DAG. No order
-; dependence was added since the size was set to UINT_MAX, but there is a
-; computation using the size that overflowed.
-
-; CHECK: endloop0
-; CHECK: memh([[REG:r([0-9]+)]]+#0) =
-; CHECK: = memh([[REG]]++#2)
-
-; Function Attrs: nounwind
-define signext i16 @f0(i16* nocapture readonly %a0, i16* nocapture readonly %a1) local_unnamed_addr #0 {
-b0:
- %v0 = alloca [40 x i16], align 8
- %v1 = bitcast [40 x i16]* %v0 to i8*
- call void @llvm.lifetime.start.p0i8(i64 80, i8* nonnull %v1) #2
- %v2 = getelementptr inbounds [40 x i16], [40 x i16]* %v0, i32 0, i32 0
- br label %b1
-
-b1: ; preds = %b1, %b0
- %v3 = phi i16* [ %a1, %b0 ], [ %v24, %b1 ]
- %v4 = phi i16* [ %v2, %b0 ], [ %v25, %b1 ]
- %v5 = phi i32 [ 0, %b0 ], [ %v14, %b1 ]
- %v6 = phi i32 [ 1, %b0 ], [ %v22, %b1 ]
- %v7 = phi i32 [ 0, %b0 ], [ %v23, %b1 ]
- %v8 = load i16, i16* %v3, align 2
- %v9 = sext i16 %v8 to i32
- %v10 = tail call i32 @llvm.hexagon.A2.aslh(i32 %v9)
- %v11 = tail call i32 @llvm.hexagon.S2.asr.r.r.sat(i32 %v10, i32 1)
- %v12 = tail call i32 @llvm.hexagon.A2.asrh(i32 %v11)
- %v13 = trunc i32 %v12 to i16
- store i16 %v13, i16* %v4, align 2
- %v14 = add nuw nsw i32 %v5, 1
- %v15 = icmp eq i32 %v14, 40
- %v16 = getelementptr inbounds i16, i16* %a0, i32 %v7
- %v17 = load i16, i16* %v16, align 2
- %v18 = sext i16 %v17 to i32
- %v19 = getelementptr inbounds [40 x i16], [40 x i16]* %v0, i32 0, i32 %v7
- %v20 = load i16, i16* %v19, align 2
- %v21 = sext i16 %v20 to i32
- %v22 = tail call i32 @llvm.hexagon.M2.mpy.acc.sat.ll.s1(i32 %v6, i32 %v18, i32 %v21)
- %v23 = add nuw nsw i32 %v7, 1
- %v24 = getelementptr i16, i16* %v3, i32 1
- %v25 = getelementptr i16, i16* %v4, i32 1
- br i1 %v15, label %b2, label %b1
-
-b2: ; preds = %b1
- %v26 = tail call signext i16 @f1(i32 %v22) #0
- %v27 = sext i16 %v26 to i32
- %v28 = tail call i32 @llvm.hexagon.S2.asl.r.r.sat(i32 %v22, i32 %v27)
- %v29 = tail call i32 @llvm.hexagon.A2.asrh(i32 %v28)
- %v30 = shl i32 %v29, 16
- %v31 = ashr exact i32 %v30, 16
- %v32 = icmp slt i32 %v30, 65536
- br label %b3
-
-b3: ; preds = %b2
- call void @llvm.lifetime.end.p0i8(i64 80, i8* nonnull %v1) #2
- ret i16 0
-}
-
-; Function Attrs: argmemonly nounwind
-declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1
-
-; Function Attrs: nounwind readnone
-declare i32 @llvm.hexagon.S2.asr.r.r.sat(i32, i32) #2
-
-; Function Attrs: nounwind readnone
-declare i32 @llvm.hexagon.A2.aslh(i32) #2
-
-; Function Attrs: nounwind readnone
-declare i32 @llvm.hexagon.A2.asrh(i32) #2
-
-; Function Attrs: nounwind readnone
-declare i32 @llvm.hexagon.M2.mpy.acc.sat.ll.s1(i32, i32, i32) #2
-
-; Function Attrs: nounwind
-declare signext i16 @f1(i32) local_unnamed_addr #0
-
-; Function Attrs: nounwind readnone
-declare i32 @llvm.hexagon.S2.asl.r.r.sat(i32, i32) #2
-
-; Function Attrs: argmemonly nounwind
-declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { argmemonly nounwind }
-attributes #2 = { nounwind readnone }
; MIPS64R5-NEXT: sd $4, 24($sp)
; MIPS64R5-NEXT: ldi.b $w0, 0
; MIPS64R5-NEXT: lw $1, 20($sp)
-; MIPS64R5-NEXT: lw $2, 16($sp)
; MIPS64R5-NEXT: move.v $w1, $w0
-; MIPS64R5-NEXT: insert.d $w1[0], $2
+; MIPS64R5-NEXT: insert.d $w1[0], $5
; MIPS64R5-NEXT: insert.d $w1[1], $1
-; MIPS64R5-NEXT: lw $1, 24($sp)
-; MIPS64R5-NEXT: insert.d $w0[0], $1
+; MIPS64R5-NEXT: insert.d $w0[0], $4
; MIPS64R5-NEXT: lw $1, 28($sp)
; MIPS64R5-NEXT: insert.d $w0[1], $1
; MIPS64R5-NEXT: addv.d $w0, $w0, $w1
; MIPS32R5EB-NEXT: .cfi_def_cfa_offset 32
; MIPS32R5EB-NEXT: sw $ra, 28($sp) # 4-byte Folded Spill
; MIPS32R5EB-NEXT: .cfi_offset 31, -4
-; MIPS32R5EB-NEXT: addiu $1, $zero, 1543
-; MIPS32R5EB-NEXT: sh $1, 20($sp)
-; MIPS32R5EB-NEXT: addiu $1, $zero, 3080
-; MIPS32R5EB-NEXT: sh $1, 24($sp)
-; MIPS32R5EB-NEXT: lhu $4, 20($sp)
-; MIPS32R5EB-NEXT: lhu $5, 24($sp)
+; MIPS32R5EB-NEXT: addiu $4, $zero, 1543
+; MIPS32R5EB-NEXT: addiu $5, $zero, 3080
; MIPS32R5EB-NEXT: jal i8_2
; MIPS32R5EB-NEXT: nop
; MIPS32R5EB-NEXT: sw $2, 16($sp)
; MIPS32R5EL-NEXT: .cfi_def_cfa_offset 32
; MIPS32R5EL-NEXT: sw $ra, 28($sp) # 4-byte Folded Spill
; MIPS32R5EL-NEXT: .cfi_offset 31, -4
-; MIPS32R5EL-NEXT: addiu $1, $zero, 1798
-; MIPS32R5EL-NEXT: sh $1, 20($sp)
-; MIPS32R5EL-NEXT: addiu $1, $zero, 2060
-; MIPS32R5EL-NEXT: sh $1, 24($sp)
-; MIPS32R5EL-NEXT: lhu $4, 20($sp)
-; MIPS32R5EL-NEXT: lhu $5, 24($sp)
+; MIPS32R5EL-NEXT: addiu $4, $zero, 1798
+; MIPS32R5EL-NEXT: addiu $5, $zero, 2060
; MIPS32R5EL-NEXT: jal i8_2
; MIPS32R5EL-NEXT: nop
; MIPS32R5EL-NEXT: sw $2, 16($sp)
; MIPS64R2: # %bb.0: # %entry
; MIPS64R2-NEXT: daddiu $sp, $sp, -16
; MIPS64R2-NEXT: .cfi_def_cfa_offset 16
-; MIPS64R2-NEXT: sw $4, 4($sp)
-; MIPS64R2-NEXT: lwu $2, 4($sp)
+; MIPS64R2-NEXT: dext $2, $4, 0, 32
; MIPS64R2-NEXT: sltiu $1, $2, 7
; MIPS64R2-NEXT: beqz $1, .LBB0_3
-; MIPS64R2-NEXT: nop
+; MIPS64R2-NEXT: sw $4, 4($sp)
; MIPS64R2-NEXT: .LBB0_1: # %entry
; MIPS64R2-NEXT: dsll $1, $2, 3
; MIPS64R2-NEXT: lui $2, %highest(.LJTI0_0)
; MIPS64R6: # %bb.0: # %entry
; MIPS64R6-NEXT: daddiu $sp, $sp, -16
; MIPS64R6-NEXT: .cfi_def_cfa_offset 16
-; MIPS64R6-NEXT: sw $4, 4($sp)
-; MIPS64R6-NEXT: lwu $2, 4($sp)
+; MIPS64R6-NEXT: dext $2, $4, 0, 32
; MIPS64R6-NEXT: sltiu $1, $2, 7
-; MIPS64R6-NEXT: beqzc $1, .LBB0_3
+; MIPS64R6-NEXT: beqz $1, .LBB0_3
+; MIPS64R6-NEXT: sw $4, 4($sp)
; MIPS64R6-NEXT: .LBB0_1: # %entry
; MIPS64R6-NEXT: dsll $1, $2, 3
; MIPS64R6-NEXT: lui $2, %highest(.LJTI0_0)
; PIC-MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(_Z3fooi)))
; PIC-MIPS64R2-NEXT: daddu $1, $1, $25
; PIC-MIPS64R2-NEXT: daddiu $2, $1, %lo(%neg(%gp_rel(_Z3fooi)))
-; PIC-MIPS64R2-NEXT: sw $4, 4($sp)
-; PIC-MIPS64R2-NEXT: lwu $3, 4($sp)
+; PIC-MIPS64R2-NEXT: dext $3, $4, 0, 32
; PIC-MIPS64R2-NEXT: sltiu $1, $3, 7
; PIC-MIPS64R2-NEXT: beqz $1, .LBB0_3
-; PIC-MIPS64R2-NEXT: nop
+; PIC-MIPS64R2-NEXT: sw $4, 4($sp)
; PIC-MIPS64R2-NEXT: .LBB0_1: # %entry
; PIC-MIPS64R2-NEXT: dsll $1, $3, 3
; PIC-MIPS64R2-NEXT: ld $3, %got_page(.LJTI0_0)($2)
; PIC-MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(_Z3fooi)))
; PIC-MIPS64R6-NEXT: daddu $1, $1, $25
; PIC-MIPS64R6-NEXT: daddiu $2, $1, %lo(%neg(%gp_rel(_Z3fooi)))
-; PIC-MIPS64R6-NEXT: sw $4, 4($sp)
-; PIC-MIPS64R6-NEXT: lwu $3, 4($sp)
+; PIC-MIPS64R6-NEXT: dext $3, $4, 0, 32
; PIC-MIPS64R6-NEXT: sltiu $1, $3, 7
-; PIC-MIPS64R6-NEXT: beqzc $1, .LBB0_3
+; PIC-MIPS64R6-NEXT: beqz $1, .LBB0_3
+; PIC-MIPS64R6-NEXT: sw $4, 4($sp)
; PIC-MIPS64R6-NEXT: .LBB0_1: # %entry
; PIC-MIPS64R6-NEXT: dsll $1, $3, 3
; PIC-MIPS64R6-NEXT: ld $3, %got_page(.LJTI0_0)($2)
; CHECK-NEXT: lw $1, 64($sp)
; CHECK-NEXT: lw $2, 68($sp)
; CHECK-NEXT: lh $3, 58($sp)
-; CHECK-NEXT: lb $5, 56($sp)
+; CHECK-NEXT: sll $5, $6, 24
+; CHECK-NEXT: sra $5, $5, 24
; CHECK-NEXT: swc1 $f12, 36($sp)
; CHECK-NEXT: sw $5, 32($sp)
; CHECK-NEXT: sw $3, 28($sp)
; CHECK-NEXT: sw $ra, 44($sp) # 4-byte Folded Spill
; CHECK-NEXT: addu $gp, $2, $25
; CHECK-NEXT: move $4, $7
-; CHECK-NEXT: sw $5, 52($sp)
; CHECK-NEXT: sw $6, 56($sp)
+; CHECK-NEXT: sw $5, 52($sp)
; CHECK-NEXT: sw $7, 60($sp)
; CHECK-NEXT: lw $1, 80($sp)
-; CHECK-NEXT: lb $2, 52($sp)
+; CHECK-NEXT: sll $2, $5, 24
+; CHECK-NEXT: sra $2, $2, 24
; CHECK-NEXT: addiu $3, $zero, 4
; CHECK-NEXT: lui $5, 16576
; CHECK-NEXT: sw $5, 36($sp)
; CHECK-LABEL: va1:
; CHECK: addiu $sp, $sp, -16
-; CHECK: sw $5, 20($sp)
; CHECK: sw $7, 28($sp)
; CHECK: sw $6, 24($sp)
-; CHECK: lw $2, 20($sp)
+; CHECK: sw $5, 20($sp)
+; CHECK: move $2, $5
}
; check whether the variable double argument will be accessed from the 8-byte
; CHECK-LABEL: va3:
; CHECK: addiu $sp, $sp, -16
-; CHECK: sw $6, 24($sp)
; CHECK: sw $7, 28($sp)
-; CHECK: lw $2, 24($sp)
+; CHECK: sw $6, 24($sp)
+; CHECK: move $2, $6
}
; double
; CHECK-LABEL: va5:
; CHECK: addiu $sp, $sp, -24
; CHECK: sw $7, 36($sp)
-; CHECK: lw $2, 36($sp)
+; CHECK: move $2, $7
}
; double
ret i32 %bf.cast
; CHECK-LABEL: @foo
-; FIXME: We don't need to do these stores/loads at all.
+; FIXME: We don't need to do these stores at all.
; CHECK-DAG: std 3, -24(1)
; CHECK-DAG: stb 4, -16(1)
-; CHECK-DAG: lbz [[REG1:[0-9]+]], -16(1)
+; CHECK-DAG: sldi [[REG3:[0-9]+]], 4, 32
; CHECK-DAG: lwz [[REG2:[0-9]+]], -20(1)
-; CHECK-DAG: sldi [[REG3:[0-9]+]], [[REG1]], 32
; CHECK-DAG: or [[REG4:[0-9]+]], [[REG2]], [[REG3]]
; CHECK: rldicl 3, [[REG4]], 33, 57
; CHECK: blr
; CHECK-NEXT: rosbg %r0, %r1, 62, 62, 1
; CHECK-NEXT: vlgvb %r1, %v24, 15
; CHECK-NEXT: rosbg %r0, %r1, 63, 63, 0
-; CHECK-NEXT: sth %r0, 160(%r15)
-; CHECK-NEXT: lh %r2, 160(%r15)
+; CHECK-NEXT: llhr %r2, %r0
; CHECK-NEXT: aghi %r15, 168
; CHECK-NEXT: br %r14
{
;
; CHECK-NEXT: L_e$non_lazy_ptr, [[E:%[a-z]+]]
; CHECK-NEXT: movb %dl, ([[E]])
-; CHECK-NEXT: movsbl ([[E]]), [[CONV:%[a-z]+]]
+; CHECK-NEXT: movzbl %dl, [[CONV:%[a-z]+]]
; CHECK-NEXT: movl $6, [[CONV:%[a-z]+]]
; The eflags is used in the next instruction.
; If that instruction disappear, we are not exercising the bug
define void @pr32108() {
; CHECK-LABEL: pr32108:
; CHECK: # %bb.0: # %BB
-; CHECK-NEXT: movb $0, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB0_1: # %CF244
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; SSE-NEXT: pushq %rax
; SSE-NEXT: .cfi_def_cfa_offset 16
; SSE-NEXT: callq __gnu_f2h_ieee
-; SSE-NEXT: movw %ax, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movzwl {{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movw %ax, (%rax)
; SSE-NEXT: popq %rax
; SSE-NEXT: .cfi_def_cfa_offset 8
; AVX512: # %bb.0:
; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
-; AVX512-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
-; AVX512-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
; AVX512-NEXT: movw %ax, (%rax)
; AVX512-NEXT: retq
%a = bitcast half %x to <4 x i4>
; SSE-NEXT: pushq %rax
; SSE-NEXT: .cfi_def_cfa_offset 16
; SSE-NEXT: callq __gnu_f2h_ieee
-; SSE-NEXT: movw %ax, (%rsp)
-; SSE-NEXT: movzwl (%rsp), %eax
; SSE-NEXT: movw %ax, (%rax)
; SSE-NEXT: popq %rax
; SSE-NEXT: .cfi_def_cfa_offset 8
; CHECK: movq %rcx, %rax
; CHECK-DAG: movq %r9, 40(%rsp)
; CHECK-DAG: movq %r8, 32(%rsp)
-; CHECK: movl 32(%rsp), %[[tmp:[^ ]*]]
-; CHECK: movl %[[tmp]], (%rax)
+; CHECK-DAG: leaq 36(%rsp), %[[sret:[^ ]*]]
+; CHECK-DAG: movl %r8d, (%rax)
+; CHECK-DAG: movq %[[sret]], (%rsp)
; CHECK: popq
; CHECK: retq