const TargetRegisterInfo *TRI) const {
assert(LdSt->mayLoadOrStore() && "Expected a memory operation.");
// Handle only loads/stores with base register followed by immediate offset.
- if (LdSt->getNumExplicitOperands() != 3)
- return false;
- if (!LdSt->getOperand(1).isReg() || !LdSt->getOperand(2).isImm())
+ if (LdSt->getNumExplicitOperands() == 3) {
+ // Non-paired instruction (e.g., ldr x1, [x0, #8]).
+ if (!LdSt->getOperand(1).isReg() || !LdSt->getOperand(2).isImm())
+ return false;
+ } else if (LdSt->getNumExplicitOperands() == 4) {
+ // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
+ if (!LdSt->getOperand(1).isReg() || !LdSt->getOperand(2).isReg() || !LdSt->getOperand(3).isImm())
+ return false;
+ } else
return false;
// Offset is calculated as the immediate operand multiplied by the scaling factor.
Width = 1;
Scale = 1;
break;
+ case AArch64::LDPQi:
+ case AArch64::LDNPQi:
+ case AArch64::STPQi:
+ case AArch64::STNPQi:
+ Scale = 16;
+ Width = 32;
+ break;
case AArch64::LDRQui:
case AArch64::STRQui:
Scale = Width = 16;
break;
+ case AArch64::LDPXi:
+ case AArch64::LDPDi:
+ case AArch64::LDNPXi:
+ case AArch64::LDNPDi:
+ case AArch64::STPXi:
+ case AArch64::STPDi:
+ case AArch64::STNPXi:
+ case AArch64::STNPDi:
+ Scale = 8;
+ Width = 16;
+ break;
case AArch64::LDRXui:
case AArch64::LDRDui:
case AArch64::STRXui:
case AArch64::STRDui:
Scale = Width = 8;
break;
+ case AArch64::LDPWi:
+ case AArch64::LDPSi:
+ case AArch64::LDNPWi:
+ case AArch64::LDNPSi:
+ case AArch64::STPWi:
+ case AArch64::STPSi:
+ case AArch64::STNPWi:
+ case AArch64::STNPSi:
+ Scale = 4;
+ Width = 8;
+ break;
case AArch64::LDRWui:
case AArch64::LDRSui:
case AArch64::LDRSWui:
break;
}
- BaseReg = LdSt->getOperand(1).getReg();
- Offset = LdSt->getOperand(2).getImm() * Scale;
+ if (LdSt->getNumExplicitOperands() == 3) {
+ BaseReg = LdSt->getOperand(1).getReg();
+ Offset = LdSt->getOperand(2).getImm() * Scale;
+ } else {
+ assert(LdSt->getNumExplicitOperands() == 4 && "invalid number of operands");
+ BaseReg = LdSt->getOperand(2).getReg();
+ Offset = LdSt->getOperand(3).getImm() * Scale;
+ }
return true;
}
store double %b, double* %add.ptr, align 8
ret double %tmp
}
+
+; Check that the stores %c and %d are paired after the fadd instruction,
+; and then the stores %a and %d are paired after proving that they do not
+; depend on the the (%c, %d) pair.
+;
+; CHECK-LABEL: st1:
+; CHECK: stp q0, q1, [x{{[0-9]+}}]
+; CHECK: fadd
+; CHECK: stp q2, q0, [x{{[0-9]+}}, #32]
+define void @st1(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, float* %base, i64 %index) {
+entry:
+ %a0 = getelementptr inbounds float, float* %base, i64 %index
+ %b0 = getelementptr float, float* %a0, i64 4
+ %c0 = getelementptr float, float* %a0, i64 8
+ %d0 = getelementptr float, float* %a0, i64 12
+
+ %a1 = bitcast float* %a0 to <4 x float>*
+ %b1 = bitcast float* %b0 to <4 x float>*
+ %c1 = bitcast float* %c0 to <4 x float>*
+ %d1 = bitcast float* %d0 to <4 x float>*
+
+ store <4 x float> %c, <4 x float> * %c1, align 4
+ store <4 x float> %a, <4 x float> * %a1, align 4
+
+ ; This fadd forces the compiler to pair %c and %e after fadd, and leave the
+ ; stores %a and %b separated by a stp. The dependence analysis needs then to
+ ; prove that it is safe to move %b past the stp to be paired with %a.
+ %e = fadd fast <4 x float> %d, %a
+
+ store <4 x float> %e, <4 x float>* %d1, align 4
+ store <4 x float> %b, <4 x float>* %b1, align 4
+
+ ret void
+}