return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);
}
- // Handle X+C
- if (isBaseWithConstantOffset(Loc) && Loc.getOperand(0) == BaseLoc &&
- cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue() == Dist*Bytes)
- return true;
-
+ // Handle X + C.
+ if (isBaseWithConstantOffset(Loc)) {
+ int64_t LocOffset = cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
+ if (Loc.getOperand(0) == BaseLoc) {
+ // If the base location is a simple address with no offset itself, then
+ // the second load's first add operand should be the base address.
+ if (LocOffset == Dist * (int)Bytes)
+ return true;
+ } else if (isBaseWithConstantOffset(BaseLoc)) {
+ // The base location itself has an offset, so subtract that value from the
+ // second load's offset before comparing to distance * size.
+ int64_t BOffset =
+ cast<ConstantSDNode>(BaseLoc.getOperand(1))->getSExtValue();
+ if (Loc.getOperand(0) == BaseLoc.getOperand(0)) {
+ if ((LocOffset - BOffset) == Dist * (int)Bytes)
+ return true;
+ }
+ }
+ }
const GlobalValue *GV1 = nullptr;
const GlobalValue *GV2 = nullptr;
int64_t Offset1 = 0;
; RUN: llc < %s -mcpu=corei7-avx -mtriple=x86_64-linux | FileCheck %s
-;CHECK-LABEL: cftx020:
-;CHECK: vmovsd (%rdi), %xmm{{.*}}
-;CHECK: vmovsd 16(%rdi), %xmm{{.*}}
-;CHECK: vmovsd 24(%rdi), %xmm{{.*}}
-;CHECK: vmovhpd 8(%rdi), %xmm{{.*}}
-;CHECK: vmovupd %xmm{{.*}}, (%rdi)
-;CHECK: vmovupd %xmm{{.*}}, 16(%rdi)
-;CHECK: ret
+; CHECK-LABEL: cftx020:
+; CHECK: vmovsd (%rdi), %xmm{{.*}}
+; CHECK-NEXT: vmovsd 16(%rdi), %xmm{{.*}}
+; CHECK-NEXT: vmovhpd 24(%rdi), %xmm{{.*}}
+; CHECK-NEXT: vmovhpd 8(%rdi), %xmm{{.*}}
+; CHECK: vmovupd %xmm{{.*}}, (%rdi)
+; CHECK-NEXT: vmovupd %xmm{{.*}}, 16(%rdi)
+; CHECK: ret
; A test from pifft (after SLP-vectorization) that fails when we drop the chain on newly merged loads.
define void @cftx020(double* nocapture %a) {
; FAST32-NEXT: retq
; SLOW32: vmovups
-; SLOW32: vinsertf128
+; SLOW32-NEXT: vinsertf128
; SLOW32-NEXT: retq
}
; FAST32-NEXT: retq
; SLOW32: vmovups
-; SLOW32: vinsertf128
+; SLOW32-NEXT: vinsertf128
+; SLOW32-NEXT: retq
+}
+
+; PR21771 ( http://llvm.org/bugs/show_bug.cgi?id=21771 )
+; Recognize and combine consecutive loads even when the
+; first of the combined loads is offset from the base address.
+define <4 x double> @merge_4_doubles_offset(double* %ptr) {
+ %arrayidx4 = getelementptr inbounds double* %ptr, i64 4
+ %arrayidx5 = getelementptr inbounds double* %ptr, i64 5
+ %arrayidx6 = getelementptr inbounds double* %ptr, i64 6
+ %arrayidx7 = getelementptr inbounds double* %ptr, i64 7
+ %e = load double* %arrayidx4, align 8
+ %f = load double* %arrayidx5, align 8
+ %g = load double* %arrayidx6, align 8
+ %h = load double* %arrayidx7, align 8
+ %vecinit4 = insertelement <4 x double> undef, double %e, i32 0
+ %vecinit5 = insertelement <4 x double> %vecinit4, double %f, i32 1
+ %vecinit6 = insertelement <4 x double> %vecinit5, double %g, i32 2
+ %vecinit7 = insertelement <4 x double> %vecinit6, double %h, i32 3
+ ret <4 x double> %vecinit7
+
+; ALL-LABEL: merge_4_doubles_offset
+; FAST32: vmovups
+; FAST32-NEXT: retq
+
+; SLOW32: vmovups
+; SLOW32-NEXT: vinsertf128
; SLOW32-NEXT: retq
}