From ffb109b6852d248c9d2e3202477dccf20aac7151 Mon Sep 17 00:00:00 2001 From: zhongyunde Date: Fri, 11 Nov 2022 09:10:14 +0800 Subject: [PATCH] [AArch64][SVE] Support logical operation BIC with DestructiveBinary patterns Logical operation BIC with DestructiveBinary patterns is temporarily removed as causes an assert (commit 3c382ed71f15), so try to fix that. The most significant being that for pseudo instructions that do not have real instructions (including movpfx'd ones) that cover all combinations of register allocation, their expansion will be broken. This is the main reason the zeroing is an experimental feature because it has known bugs. So we add an extra LSL for movprfx expand BIC_ZPZZ_ZERO A, P, A, A when necessary. movprfx z0.s, p0/z, z0.s lsl z0.b, p0/m, z0.b, #0 bic z0.s, p0/m, z0.s, z0.s Depends on D88595 --- .../Target/AArch64/AArch64ExpandPseudoInsts.cpp | 37 +++++++++++------- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td | 2 +- .../AArch64/sve-intrinsics-int-arith-merging.ll | 45 ++++++++++++++++++---- .../AArch64/sve-intrinsics-int-arith-merging.mir | 39 +++++++++++++++++++ 4 files changed, 101 insertions(+), 22 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-merging.mir diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp index 62a0ae5..7bfb8c8 100644 --- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -447,15 +447,11 @@ bool AArch64ExpandPseudo::expand_DestructiveOp( uint64_t DType = TII->get(Opcode).TSFlags & AArch64::DestructiveInstTypeMask; uint64_t FalseLanes = MI.getDesc().TSFlags & AArch64::FalseLanesMask; bool FalseZero = FalseLanes == AArch64::FalseLanesZero; - Register DstReg = MI.getOperand(0).getReg(); bool DstIsDead = MI.getOperand(0).isDead(); - - if (DType == AArch64::DestructiveBinary) - assert(DstReg != MI.getOperand(3).getReg()); - bool UseRev = false; unsigned PredIdx, DOPIdx, SrcIdx, Src2Idx; + switch (DType) { case AArch64::DestructiveBinaryComm: case AArch64::DestructiveBinaryCommWithRev: @@ -489,12 +485,13 @@ bool AArch64ExpandPseudo::expand_DestructiveOp( llvm_unreachable("Unsupported Destructive Operand type"); } -#ifndef NDEBUG // MOVPRFX can only be used if the destination operand // is the destructive operand, not as any other operand, // so the Destructive Operand must be unique. bool DOPRegIsUnique = false; switch (DType) { + case AArch64::DestructiveBinary: + DOPRegIsUnique = DstReg != MI.getOperand(SrcIdx).getReg(); case AArch64::DestructiveBinaryComm: case AArch64::DestructiveBinaryCommWithRev: DOPRegIsUnique = @@ -512,7 +509,6 @@ bool AArch64ExpandPseudo::expand_DestructiveOp( MI.getOperand(DOPIdx).getReg() != MI.getOperand(Src2Idx).getReg()); break; } -#endif // Resolve the reverse opcode if (UseRev) { @@ -527,23 +523,27 @@ bool AArch64ExpandPseudo::expand_DestructiveOp( // Get the right MOVPRFX uint64_t ElementSize = TII->getElementSizeForOpcode(Opcode); - unsigned MovPrfx, MovPrfxZero; + unsigned MovPrfx, LSLZero, MovPrfxZero; switch (ElementSize) { case AArch64::ElementSizeNone: case AArch64::ElementSizeB: MovPrfx = AArch64::MOVPRFX_ZZ; + LSLZero = AArch64::LSL_ZPmI_B; MovPrfxZero = AArch64::MOVPRFX_ZPzZ_B; break; case AArch64::ElementSizeH: MovPrfx = AArch64::MOVPRFX_ZZ; + LSLZero = AArch64::LSL_ZPmI_H; MovPrfxZero = AArch64::MOVPRFX_ZPzZ_H; break; case AArch64::ElementSizeS: MovPrfx = AArch64::MOVPRFX_ZZ; + LSLZero = AArch64::LSL_ZPmI_S; MovPrfxZero = AArch64::MOVPRFX_ZPzZ_S; break; case AArch64::ElementSizeD: MovPrfx = AArch64::MOVPRFX_ZZ; + LSLZero = AArch64::LSL_ZPmI_D; MovPrfxZero = AArch64::MOVPRFX_ZPzZ_D; break; default: @@ -555,9 +555,10 @@ bool AArch64ExpandPseudo::expand_DestructiveOp( // MachineInstrBuilder PRFX, DOP; if (FalseZero) { -#ifndef NDEBUG - assert(DOPRegIsUnique && "The destructive operand should be unique"); -#endif + // If we cannot prefix the requested instruction we'll instead emit a + // prefixed_zeroing_mov for DestructiveBinary. + assert((DOPRegIsUnique || AArch64::DestructiveBinary == DType) && + "The destructive operand should be unique"); assert(ElementSize != AArch64::ElementSizeNone && "This instruction is unpredicated"); @@ -569,10 +570,19 @@ bool AArch64ExpandPseudo::expand_DestructiveOp( // After the movprfx, the destructive operand is same as Dst DOPIdx = 0; + + // Create the additional LSL to zero the lanes when the DstReg is not + // unique. Zeros the lanes in z0 that aren't active in p0 with sequence + // movprfx z0.b, p0/z, z0.b; lsl z0.b, p0/m, z0.b, #0; + if (DType == AArch64::DestructiveBinary && !DOPRegIsUnique) { + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LSLZero)) + .addReg(DstReg, RegState::Define) + .add(MI.getOperand(PredIdx)) + .addReg(DstReg) + .addImm(0); + } } else if (DstReg != MI.getOperand(DOPIdx).getReg()) { -#ifndef NDEBUG assert(DOPRegIsUnique && "The destructive operand should be unique"); -#endif PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfx)) .addReg(DstReg, RegState::Define) .addReg(MI.getOperand(DOPIdx).getReg()); @@ -591,6 +601,7 @@ bool AArch64ExpandPseudo::expand_DestructiveOp( .add(MI.getOperand(PredIdx)) .add(MI.getOperand(SrcIdx)); break; + case AArch64::DestructiveBinary: case AArch64::DestructiveBinaryImm: case AArch64::DestructiveBinaryComm: case AArch64::DestructiveBinaryCommWithRev: diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 43ea225..c076493 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -433,7 +433,7 @@ let Predicates = [HasSVEorSME, UseExperimentalZeroingPseudos] in { defm ORR_ZPZZ : sve_int_bin_pred_zeroing_bhsd; defm EOR_ZPZZ : sve_int_bin_pred_zeroing_bhsd; defm AND_ZPZZ : sve_int_bin_pred_zeroing_bhsd; - defm BIC_ZPZZ : sve_int_bin_pred_zeroing_bhsd; + defm BIC_ZPZZ : sve_int_bin_pred_zeroing_bhsd; } // End HasSVEorSME, UseExperimentalZeroingPseudos let Predicates = [HasSVEorSME] in { diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-merging.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-merging.ll index 9f13980..42c65a2 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-merging.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-merging.ll @@ -344,8 +344,7 @@ define @and_i64_zero( %pg, @bic_i8_zero( %pg, %a, %b) { ; CHECK-LABEL: bic_i8_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z2.b, #0 // =0x0 -; CHECK-NEXT: sel z0.b, p0, z0.b, z2.b +; CHECK-NEXT: movprfx z0.b, p0/z, z0.b ; CHECK-NEXT: bic z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: ret %a_z = select %pg, %a, zeroinitializer @@ -358,8 +357,7 @@ define @bic_i8_zero( %pg, @bic_i16_zero( %pg, %a, %b) { ; CHECK-LABEL: bic_i16_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z2.h, #0 // =0x0 -; CHECK-NEXT: sel z0.h, p0, z0.h, z2.h +; CHECK-NEXT: movprfx z0.h, p0/z, z0.h ; CHECK-NEXT: bic z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret %a_z = select %pg, %a, zeroinitializer @@ -372,8 +370,7 @@ define @bic_i16_zero( %pg, @bic_i32_zero( %pg, %a, %b) { ; CHECK-LABEL: bic_i32_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z2.s, #0 // =0x0 -; CHECK-NEXT: sel z0.s, p0, z0.s, z2.s +; CHECK-NEXT: movprfx z0.s, p0/z, z0.s ; CHECK-NEXT: bic z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret %a_z = select %pg, %a, zeroinitializer @@ -386,8 +383,7 @@ define @bic_i32_zero( %pg, @bic_i64_zero( %pg, %a, %b) { ; CHECK-LABEL: bic_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z2.d, #0 // =0x0 -; CHECK-NEXT: sel z0.d, p0, z0.d, z2.d +; CHECK-NEXT: movprfx z0.d, p0/z, z0.d ; CHECK-NEXT: bic z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret %a_z = select %pg, %a, zeroinitializer @@ -397,6 +393,39 @@ define @bic_i64_zero( %pg, %out } +; BIC (i.e. A & ~A) is illegal operation with movprfx, so the codegen depend on IR before expand-pseudo +define @bic_i64_zero_no_unique_reg( %pg, %a) { +; CHECK-LABEL: bic_i64_zero_no_unique_reg: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.d, #0 // =0x0 +; CHECK-NEXT: mov z1.d, p0/m, z0.d +; CHECK-NEXT: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: bic z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.bic.nxv2i64( %pg, + %a_z, + %a_z) + ret %out +} + +; BIC (i.e. A & ~B) is not a commutative operation, so disable it when the +; destination operand is not the destructive operand +define @bic_i64_zero_no_comm( %pg, %a, %b) { +; CHECK-LABEL: bic_i64_zero_no_comm: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z2.d, #0 // =0x0 +; CHECK-NEXT: sel z0.d, p0, z0.d, z2.d +; CHECK-NEXT: bic z1.d, p0/m, z1.d, z0.d +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.bic.nxv2i64( %pg, + %b, + %a_z) + ret %out +} + declare @llvm.aarch64.sve.add.nxv16i8(, , ) declare @llvm.aarch64.sve.add.nxv8i16(, , ) declare @llvm.aarch64.sve.add.nxv4i32(, , ) diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-merging.mir b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-merging.mir new file mode 100644 index 0000000..da40014 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-merging.mir @@ -0,0 +1,39 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64 -mattr=+sve -mattr=+use-experimental-zeroing-pseudos -run-pass=aarch64-expand-pseudo %s -o - | FileCheck %s + +# Should create an additional LSL to zero the lanes as the DstReg is not unique + +--- | + define @bic_i16_zero( %pg, %a){ + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.bic.nxv8i16( %pg, %a_z, %a_z) + ret %out + } + + declare @llvm.aarch64.sve.bic.nxv8i16(, , ) +... +--- +name: bic_i16_zero +alignment: 4 +tracksRegLiveness: true +tracksDebugUserValues: true +registers: [] +liveins: + - { reg: '$p0', virtual-reg: '' } + - { reg: '$z0', virtual-reg: '' } +body: | + bb.0 (%ir-block.0): + liveins: $p0, $z0 + + ; CHECK-LABEL: name: bic_i16_zero + ; CHECK: liveins: $p0, $z0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: BUNDLE implicit-def $z0, implicit-def $q0, implicit-def $d0, implicit-def $s0, implicit-def $h0, implicit-def $b0, implicit-def $z0_hi, implicit killed $p0, implicit $z0 { + ; CHECK-NEXT: $z0 = MOVPRFX_ZPzZ_H $p0, $z0 + ; CHECK-NEXT: $z0 = LSL_ZPmI_H killed renamable $p0, internal $z0, 0 + ; CHECK-NEXT: $z0 = BIC_ZPmZ_H killed renamable $p0, internal killed $z0, internal killed renamable $z0 + ; CHECK-NEXT: } + ; CHECK-NEXT: RET undef $lr, implicit $z0 + renamable $z0 = BIC_ZPZZ_ZERO_H killed renamable $p0, killed renamable $z0, killed renamable $z0 + RET_ReallyLR implicit $z0 +... -- 2.7.4