uint64_t DType = TII->get(Opcode).TSFlags & AArch64::DestructiveInstTypeMask;
uint64_t FalseLanes = MI.getDesc().TSFlags & AArch64::FalseLanesMask;
bool FalseZero = FalseLanes == AArch64::FalseLanesZero;
-
Register DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
-
- if (DType == AArch64::DestructiveBinary)
- assert(DstReg != MI.getOperand(3).getReg());
-
bool UseRev = false;
unsigned PredIdx, DOPIdx, SrcIdx, Src2Idx;
+
switch (DType) {
case AArch64::DestructiveBinaryComm:
case AArch64::DestructiveBinaryCommWithRev:
llvm_unreachable("Unsupported Destructive Operand type");
}
-#ifndef NDEBUG
// MOVPRFX can only be used if the destination operand
// is the destructive operand, not as any other operand,
// so the Destructive Operand must be unique.
bool DOPRegIsUnique = false;
switch (DType) {
+ case AArch64::DestructiveBinary:
+ DOPRegIsUnique = DstReg != MI.getOperand(SrcIdx).getReg();
case AArch64::DestructiveBinaryComm:
case AArch64::DestructiveBinaryCommWithRev:
DOPRegIsUnique =
MI.getOperand(DOPIdx).getReg() != MI.getOperand(Src2Idx).getReg());
break;
}
-#endif
// Resolve the reverse opcode
if (UseRev) {
// Get the right MOVPRFX
uint64_t ElementSize = TII->getElementSizeForOpcode(Opcode);
- unsigned MovPrfx, MovPrfxZero;
+ unsigned MovPrfx, LSLZero, MovPrfxZero;
switch (ElementSize) {
case AArch64::ElementSizeNone:
case AArch64::ElementSizeB:
MovPrfx = AArch64::MOVPRFX_ZZ;
+ LSLZero = AArch64::LSL_ZPmI_B;
MovPrfxZero = AArch64::MOVPRFX_ZPzZ_B;
break;
case AArch64::ElementSizeH:
MovPrfx = AArch64::MOVPRFX_ZZ;
+ LSLZero = AArch64::LSL_ZPmI_H;
MovPrfxZero = AArch64::MOVPRFX_ZPzZ_H;
break;
case AArch64::ElementSizeS:
MovPrfx = AArch64::MOVPRFX_ZZ;
+ LSLZero = AArch64::LSL_ZPmI_S;
MovPrfxZero = AArch64::MOVPRFX_ZPzZ_S;
break;
case AArch64::ElementSizeD:
MovPrfx = AArch64::MOVPRFX_ZZ;
+ LSLZero = AArch64::LSL_ZPmI_D;
MovPrfxZero = AArch64::MOVPRFX_ZPzZ_D;
break;
default:
//
MachineInstrBuilder PRFX, DOP;
if (FalseZero) {
-#ifndef NDEBUG
- assert(DOPRegIsUnique && "The destructive operand should be unique");
-#endif
+ // If we cannot prefix the requested instruction we'll instead emit a
+ // prefixed_zeroing_mov for DestructiveBinary.
+ assert((DOPRegIsUnique || AArch64::DestructiveBinary == DType) &&
+ "The destructive operand should be unique");
assert(ElementSize != AArch64::ElementSizeNone &&
"This instruction is unpredicated");
// After the movprfx, the destructive operand is same as Dst
DOPIdx = 0;
+
+ // Create the additional LSL to zero the lanes when the DstReg is not
+ // unique. Zeros the lanes in z0 that aren't active in p0 with sequence
+ // movprfx z0.b, p0/z, z0.b; lsl z0.b, p0/m, z0.b, #0;
+ if (DType == AArch64::DestructiveBinary && !DOPRegIsUnique) {
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LSLZero))
+ .addReg(DstReg, RegState::Define)
+ .add(MI.getOperand(PredIdx))
+ .addReg(DstReg)
+ .addImm(0);
+ }
} else if (DstReg != MI.getOperand(DOPIdx).getReg()) {
-#ifndef NDEBUG
assert(DOPRegIsUnique && "The destructive operand should be unique");
-#endif
PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfx))
.addReg(DstReg, RegState::Define)
.addReg(MI.getOperand(DOPIdx).getReg());
.add(MI.getOperand(PredIdx))
.add(MI.getOperand(SrcIdx));
break;
+ case AArch64::DestructiveBinary:
case AArch64::DestructiveBinaryImm:
case AArch64::DestructiveBinaryComm:
case AArch64::DestructiveBinaryCommWithRev:
defm ORR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_orr>;
defm EOR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_eor>;
defm AND_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_and>;
- defm BIC_ZPZZ : sve_int_bin_pred_zeroing_bhsd<null_frag>;
+ defm BIC_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_bic>;
} // End HasSVEorSME, UseExperimentalZeroingPseudos
let Predicates = [HasSVEorSME] in {
define <vscale x 16 x i8> @bic_i8_zero(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: bic_i8_zero:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z2.b, #0 // =0x0
-; CHECK-NEXT: sel z0.b, p0, z0.b, z2.b
+; CHECK-NEXT: movprfx z0.b, p0/z, z0.b
; CHECK-NEXT: bic z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: ret
%a_z = select <vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> zeroinitializer
define <vscale x 8 x i16> @bic_i16_zero(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: bic_i16_zero:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z2.h, #0 // =0x0
-; CHECK-NEXT: sel z0.h, p0, z0.h, z2.h
+; CHECK-NEXT: movprfx z0.h, p0/z, z0.h
; CHECK-NEXT: bic z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: ret
%a_z = select <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> zeroinitializer
define <vscale x 4 x i32> @bic_i32_zero(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: bic_i32_zero:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z2.s, #0 // =0x0
-; CHECK-NEXT: sel z0.s, p0, z0.s, z2.s
+; CHECK-NEXT: movprfx z0.s, p0/z, z0.s
; CHECK-NEXT: bic z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
%a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> zeroinitializer
define <vscale x 2 x i64> @bic_i64_zero(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: bic_i64_zero:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z2.d, #0 // =0x0
-; CHECK-NEXT: sel z0.d, p0, z0.d, z2.d
+; CHECK-NEXT: movprfx z0.d, p0/z, z0.d
; CHECK-NEXT: bic z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
%a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> zeroinitializer
ret <vscale x 2 x i64> %out
}
+; BIC (i.e. A & ~A) is illegal operation with movprfx, so the codegen depend on IR before expand-pseudo
+define <vscale x 2 x i64> @bic_i64_zero_no_unique_reg(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
+; CHECK-LABEL: bic_i64_zero_no_unique_reg:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z1.d, #0 // =0x0
+; CHECK-NEXT: mov z1.d, p0/m, z0.d
+; CHECK-NEXT: movprfx z0.d, p0/z, z0.d
+; CHECK-NEXT: bic z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> zeroinitializer
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.bic.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %a_z,
+ <vscale x 2 x i64> %a_z)
+ ret <vscale x 2 x i64> %out
+}
+
+; BIC (i.e. A & ~B) is not a commutative operation, so disable it when the
+; destination operand is not the destructive operand
+define <vscale x 2 x i64> @bic_i64_zero_no_comm(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: bic_i64_zero_no_comm:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z2.d, #0 // =0x0
+; CHECK-NEXT: sel z0.d, p0, z0.d, z2.d
+; CHECK-NEXT: bic z1.d, p0/m, z1.d, z0.d
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ret
+ %a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> zeroinitializer
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.bic.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %b,
+ <vscale x 2 x i64> %a_z)
+ ret <vscale x 2 x i64> %out
+}
+
declare <vscale x 16 x i8> @llvm.aarch64.sve.add.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
declare <vscale x 8 x i16> @llvm.aarch64.sve.add.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.add.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64 -mattr=+sve -mattr=+use-experimental-zeroing-pseudos -run-pass=aarch64-expand-pseudo %s -o - | FileCheck %s
+
+# Should create an additional LSL to zero the lanes as the DstReg is not unique
+
+--- |
+ define <vscale x 8 x i16> @bic_i16_zero(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a){
+ %a_z = select <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> zeroinitializer
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.bic.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a_z, <vscale x 8 x i16> %a_z)
+ ret <vscale x 8 x i16> %out
+ }
+
+ declare <vscale x 8 x i16> @llvm.aarch64.sve.bic.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+...
+---
+name: bic_i16_zero
+alignment: 4
+tracksRegLiveness: true
+tracksDebugUserValues: true
+registers: []
+liveins:
+ - { reg: '$p0', virtual-reg: '' }
+ - { reg: '$z0', virtual-reg: '' }
+body: |
+ bb.0 (%ir-block.0):
+ liveins: $p0, $z0
+
+ ; CHECK-LABEL: name: bic_i16_zero
+ ; CHECK: liveins: $p0, $z0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: BUNDLE implicit-def $z0, implicit-def $q0, implicit-def $d0, implicit-def $s0, implicit-def $h0, implicit-def $b0, implicit-def $z0_hi, implicit killed $p0, implicit $z0 {
+ ; CHECK-NEXT: $z0 = MOVPRFX_ZPzZ_H $p0, $z0
+ ; CHECK-NEXT: $z0 = LSL_ZPmI_H killed renamable $p0, internal $z0, 0
+ ; CHECK-NEXT: $z0 = BIC_ZPmZ_H killed renamable $p0, internal killed $z0, internal killed renamable $z0
+ ; CHECK-NEXT: }
+ ; CHECK-NEXT: RET undef $lr, implicit $z0
+ renamable $z0 = BIC_ZPZZ_ZERO_H killed renamable $p0, killed renamable $z0, killed renamable $z0
+ RET_ReallyLR implicit $z0
+...