return true;
}
-static bool is64bitDefwithZeroHigh64bit(MachineInstr *MI) {
- // ToDo: check and add more MIs which set zero for high 64bits.
- switch (MI->getOpcode()) {
- default:
- break;
- case AArch64::FCVTNv2i32:
- case AArch64::FCVTNv4i16:
- case AArch64::RSHRNv2i32_shift:
- case AArch64::RSHRNv4i16_shift:
- case AArch64::RSHRNv8i8_shift :
- case AArch64::SHRNv2i32_shift:
- case AArch64::SHRNv4i16_shift:
- case AArch64::SHRNv8i8_shift:
- return true;
- }
-
- return false;
+// All instructions that set a FPR64 will implicitly zero the top bits of the
+// register.
+static bool is64bitDefwithZeroHigh64bit(MachineInstr *MI,
+ MachineRegisterInfo *MRI) {
+ if (!MI->getOperand(0).isDef() || !MI->getOperand(0).isReg())
+ return false;
+ const TargetRegisterClass *RC = MRI->getRegClass(MI->getOperand(0).getReg());
+ if (RC != &AArch64::FPR64RegClass)
+ return false;
+ return MI->getOpcode() > TargetOpcode::GENERIC_OP_END;
}
bool AArch64MIPeepholeOpt::visitINSvi64lane(MachineInstr &MI) {
if (Low64MI->getOpcode() != AArch64::INSERT_SUBREG)
return false;
Low64MI = MRI->getUniqueVRegDef(Low64MI->getOperand(2).getReg());
- if (!is64bitDefwithZeroHigh64bit(Low64MI))
+ if (!Low64MI || !is64bitDefwithZeroHigh64bit(Low64MI, MRI))
return false;
// Check there is `mov 0` MI for high 64-bits.
// %7:fpr128 = INSERT_SUBREG %8:fpr128(tied-def 0), killed %6:fpr64, %subreg.dsub
// %11:fpr128 = INSvi64lane %9:fpr128(tied-def 0), 1, killed %7:fpr128, 0
MachineInstr *High64MI = MRI->getUniqueVRegDef(MI.getOperand(3).getReg());
- if (High64MI->getOpcode() != AArch64::INSERT_SUBREG)
+ if (!High64MI || High64MI->getOpcode() != AArch64::INSERT_SUBREG)
return false;
High64MI = MRI->getUniqueVRegDef(High64MI->getOperand(2).getReg());
- if (High64MI->getOpcode() == TargetOpcode::COPY)
+ if (High64MI && High64MI->getOpcode() == TargetOpcode::COPY)
High64MI = MRI->getUniqueVRegDef(High64MI->getOperand(1).getReg());
- if (High64MI->getOpcode() != AArch64::MOVID &&
- High64MI->getOpcode() != AArch64::MOVIv2d_ns)
+ if (!High64MI || (High64MI->getOpcode() != AArch64::MOVID &&
+ High64MI->getOpcode() != AArch64::MOVIv2d_ns))
return false;
if (High64MI->getOperand(1).getImm() != 0)
return false;
; CHECK-NEXT: cmp x0, #0
; CHECK-NEXT: ldr q0, [x2]
; CHECK-NEXT: cset w8, gt
-; CHECK-NEXT: movi v2.2d, #0000000000000000
; CHECK-NEXT: cmtst v0.8h, v0.8h, v0.8h
; CHECK-NEXT: dup v1.8h, w8
; CHECK-NEXT: cmeq v1.8h, v1.8h, #0
; CHECK-NEXT: bic v0.16b, v0.16b, v1.16b
; CHECK-NEXT: xtn v0.8b, v0.8h
-; CHECK-NEXT: mov v0.d[1], v2.d[0]
; CHECK-NEXT: str q0, [x1]
; CHECK-NEXT: ret
%tmp = xor <16 x i1> zeroinitializer, <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
define <8 x i16> @addpv4i16(<4 x i16> noundef %a, <4 x i16> noundef %b) {
; CHECK-LABEL: addpv4i16:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: movi v2.2d, #0000000000000000
; CHECK-NEXT: addp v0.4h, v0.4h, v1.4h
-; CHECK-NEXT: mov v0.d[1], v2.d[0]
; CHECK-NEXT: ret
entry:
%vpadd_v2.i = tail call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> %a, <4 x i16> %b)
define <8 x i16> @addv4i16(<4 x i16> noundef %a, <4 x i16> noundef %b) {
; CHECK-LABEL: addv4i16:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: movi v2.2d, #0000000000000000
; CHECK-NEXT: add v0.4h, v1.4h, v0.4h
-; CHECK-NEXT: mov v0.d[1], v2.d[0]
; CHECK-NEXT: ret
entry:
%add.i = add <4 x i16> %b, %a
define <16 x i8> @tbl1(<16 x i8> %a, <8 x i8> %b) {
; CHECK-LABEL: tbl1:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: movi v2.2d, #0000000000000000
; CHECK-NEXT: tbl v0.8b, { v0.16b }, v1.8b
-; CHECK-NEXT: mov v0.d[1], v2.d[0]
; CHECK-NEXT: ret
entry:
%vtbl11 = tail call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %a, <8 x i8> %b)
define <16 x i8> @bsl(<4 x i16> noundef %a, <4 x i16> noundef %c, <4 x i16> noundef %d, <4 x i16> noundef %b) {
; CHECK-LABEL: bsl:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: movi v3.2d, #0000000000000000
; CHECK-NEXT: bsl v0.8b, v1.8b, v2.8b
-; CHECK-NEXT: mov v0.d[1], v3.d[0]
; CHECK-NEXT: ret
entry:
%vbsl3.i = and <4 x i16> %c, %a
define <16 x i8> @load(ptr %a, <8 x i8> %b) {
; CHECK-LABEL: load:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: movi v1.2d, #0000000000000000
; CHECK-NEXT: ldr d0, [x0]
-; CHECK-NEXT: mov v0.d[1], v1.d[0]
; CHECK-NEXT: ret
entry:
%vtbl11 = load <8 x i8>, ptr %a
; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], killed [[COPY2]], %subreg.dsub
; CHECK-NEXT: [[DEF1:%[0-9]+]]:fpr128 = IMPLICIT_DEF
; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF1]], killed [[FADDDrr]], %subreg.dsub
- ; CHECK-NEXT: [[INSvi64lane:%[0-9]+]]:fpr128 = INSvi64lane [[INSERT_SUBREG1]], 1, killed [[INSERT_SUBREG]], 0
- ; CHECK-NEXT: $q0 = COPY [[INSvi64lane]]
+ ; CHECK-NEXT: $q0 = COPY [[INSERT_SUBREG1]]
; CHECK-NEXT: RET_ReallyLR implicit $q0
%1:fpr64 = COPY $d1
%0:fpr64 = COPY $d0