From: Jessica Paquette Date: Thu, 3 Dec 2020 22:31:43 +0000 (-0800) Subject: [AArch64][GlobalISel] Select G_SADDO and G_SSUBO X-Git-Tag: llvmorg-13-init~4034 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=b15491eb333809e907e51d1b05da2a6a6344a427;p=platform%2Fupstream%2Fllvm.git [AArch64][GlobalISel] Select G_SADDO and G_SSUBO We didn't have selector support for these. Selection code is similar to `getAArch64XALUOOp` in AArch64ISelLowering. Similar to that code, this returns the AArch64CC and the instruction produced. In SDAG, this is used to optimize select + overflow and condition branch + overflow pairs. (See `AArch64TargetLowering::LowerBR_CC` and `AArch64TargetLowering::LowerSelect`) (G_USUBO should be easy to add here, but it isn't legalized right now.) This also factors out the existing G_UADDO selection code, and removes an unnecessary check for s32/s64. AFAIK, we shouldn't ever get anything other than s32/s64. It makes more sense for this to be handled by the type assertion in `emitAddSub`. Differential Revision: https://reviews.llvm.org/D92610 --- diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index 8d75acd..f23743b 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -258,6 +258,14 @@ private: MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const; + /// Emit the overflow op for \p Opcode. + /// + /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO, + /// G_USUBO, etc. + std::pair + emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS, + MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const; + /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg. /// \p IsNegative is true if the test should be "not zero". /// This will also optimize the test bit instruction when possible. @@ -2672,35 +2680,23 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { I.eraseFromParent(); return true; } - case TargetOpcode::G_UADDO: { - // TODO: Support other types. - unsigned OpSize = Ty.getSizeInBits(); - if (OpSize != 32 && OpSize != 64) { - LLVM_DEBUG( - dbgs() - << "G_UADDO currently only supported for 32 and 64 b types.\n"); - return false; - } - - // TODO: Support vectors. - if (Ty.isVector()) { - LLVM_DEBUG(dbgs() << "G_UADDO currently only supported for scalars.\n"); - return false; - } - - // Add and set the set condition flag. + case TargetOpcode::G_SADDO: + case TargetOpcode::G_UADDO: + case TargetOpcode::G_SSUBO: { + // Emit the operation and get the correct condition code. MachineIRBuilder MIRBuilder(I); - emitADDS(I.getOperand(0).getReg(), I.getOperand(2), I.getOperand(3), - MIRBuilder); + auto OpAndCC = emitOverflowOp(Opcode, I.getOperand(0).getReg(), + I.getOperand(2), I.getOperand(3), MIRBuilder); // Now, put the overflow result in the register given by the first operand - // to the G_UADDO. CSINC increments the result when the predicate is false, - // so to get the increment when it's true, we need to use the inverse. In - // this case, we want to increment when carry is set. + // to the overflow op. CSINC increments the result when the predicate is + // false, so to get the increment when it's true, we need to use the + // inverse. In this case, we want to increment when carry is set. + Register ZReg = AArch64::WZR; auto CsetMI = MIRBuilder .buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()}, - {Register(AArch64::WZR), Register(AArch64::WZR)}) - .addImm(getInvertedCondCode(AArch64CC::HS)); + {ZReg, ZReg}) + .addImm(getInvertedCondCode(OpAndCC.second)); constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI); I.eraseFromParent(); return true; @@ -4287,6 +4283,23 @@ AArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred, return &*I; } +std::pair +AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst, + MachineOperand &LHS, + MachineOperand &RHS, + MachineIRBuilder &MIRBuilder) const { + switch (Opcode) { + default: + llvm_unreachable("Unexpected opcode!"); + case TargetOpcode::G_SADDO: + return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS); + case TargetOpcode::G_UADDO: + return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS); + case TargetOpcode::G_SSUBO: + return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS); + } +} + bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const { MachineIRBuilder MIB(I); MachineRegisterInfo &MRI = *MIB.getMRI(); diff --git a/llvm/lib/Target/AArch64/GISel/select-saddo.mir b/llvm/lib/Target/AArch64/GISel/select-saddo.mir new file mode 100644 index 0000000..6f05bd7 --- /dev/null +++ b/llvm/lib/Target/AArch64/GISel/select-saddo.mir @@ -0,0 +1,158 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -verify-machineinstrs -mtriple aarch64-unknown-uknown -global-isel -run-pass=instruction-select %s -o - | FileCheck %s + +... +--- +name: saddo_s32 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $w0, $w1, $x2 + + ; CHECK-LABEL: name: saddo_s32 + ; CHECK: liveins: $w0, $w1, $x2 + ; CHECK: %reg0:gpr32 = COPY $w0 + ; CHECK: %reg1:gpr32 = COPY $w1 + ; CHECK: %saddo:gpr32 = ADDSWrr %reg0, %reg1, implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv + ; CHECK: $w0 = COPY %saddo + ; CHECK: RET_ReallyLR implicit $w0 + %reg0:gpr(s32) = COPY $w0 + %reg1:gpr(s32) = COPY $w1 + %saddo:gpr(s32), %4:gpr(s1) = G_SADDO %reg0, %reg1 + $w0 = COPY %saddo(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: saddo_s64 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $x0, $x1, $x2 + + ; CHECK-LABEL: name: saddo_s64 + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK: %reg0:gpr64 = COPY $x0 + ; CHECK: %reg1:gpr64 = COPY $x1 + ; CHECK: %saddo:gpr64 = ADDSXrr %reg0, %reg1, implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv + ; CHECK: $x0 = COPY %saddo + ; CHECK: RET_ReallyLR implicit $x0 + %reg0:gpr(s64) = COPY $x0 + %reg1:gpr(s64) = COPY $x1 + %saddo:gpr(s64), %4:gpr(s1) = G_SADDO %reg0, %reg1 + $x0 = COPY %saddo(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: saddo_s32_imm +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $w0, $w1, $x2 + ; Check that we get ADDSWri when we can fold in a constant. + ; + ; CHECK-LABEL: name: saddo_s32_imm + ; CHECK: liveins: $w0, $w1, $x2 + ; CHECK: %copy:gpr32sp = COPY $w0 + ; CHECK: %saddo:gpr32 = ADDSWri %copy, 16, 0, implicit-def $nzcv + ; CHECK: %overflow:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv + ; CHECK: $w0 = COPY %saddo + ; CHECK: RET_ReallyLR implicit $w0 + %copy:gpr(s32) = COPY $w0 + %constant:gpr(s32) = G_CONSTANT i32 16 + %saddo:gpr(s32), %overflow:gpr(s1) = G_SADDO %copy, %constant + $w0 = COPY %saddo(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: saddo_s32_shifted +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $w0, $w1, $x2 + ; Check that we get ADDSWrs when we can fold in a shift. + ; + ; CHECK-LABEL: name: saddo_s32_shifted + ; CHECK: liveins: $w0, $w1, $x2 + ; CHECK: %reg0:gpr32 = COPY $w0 + ; CHECK: %reg1:gpr32 = COPY $w1 + ; CHECK: %add:gpr32 = ADDSWrs %reg0, %reg1, 16, implicit-def $nzcv + ; CHECK: %overflow:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv + ; CHECK: $w0 = COPY %add + ; CHECK: RET_ReallyLR implicit $w0 + %reg0:gpr(s32) = COPY $w0 + %reg1:gpr(s32) = COPY $w1 + %constant:gpr(s32) = G_CONSTANT i32 16 + %shift:gpr(s32) = G_SHL %reg1(s32), %constant(s32) + %add:gpr(s32), %overflow:gpr(s1) = G_SADDO %reg0, %shift + $w0 = COPY %add(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: saddo_s32_neg_imm +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $w0, $w1, $x2 + ; Check that we get SUBSWri when we can fold in a negative constant. + ; + ; CHECK-LABEL: name: saddo_s32_neg_imm + ; CHECK: liveins: $w0, $w1, $x2 + ; CHECK: %copy:gpr32sp = COPY $w0 + ; CHECK: %add:gpr32 = SUBSWri %copy, 16, 0, implicit-def $nzcv + ; CHECK: %overflow:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv + ; CHECK: $w0 = COPY %add + ; CHECK: RET_ReallyLR implicit $w0 + %copy:gpr(s32) = COPY $w0 + %constant:gpr(s32) = G_CONSTANT i32 -16 + %add:gpr(s32), %overflow:gpr(s1) = G_SADDO %copy, %constant + $w0 = COPY %add(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: saddo_arith_extended +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $w0, $x0 + ; Check that we get ADDSXrx. + ; CHECK-LABEL: name: saddo_arith_extended + ; CHECK: liveins: $w0, $x0 + ; CHECK: %reg0:gpr64sp = COPY $x0 + ; CHECK: %reg1:gpr32 = COPY $w0 + ; CHECK: %add:gpr64 = ADDSXrx %reg0, %reg1, 18, implicit-def $nzcv + ; CHECK: %flags:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv + ; CHECK: $x0 = COPY %add + ; CHECK: RET_ReallyLR implicit $x0 + %reg0:gpr(s64) = COPY $x0 + %reg1:gpr(s32) = COPY $w0 + %ext:gpr(s64) = G_ZEXT %reg1(s32) + %cst:gpr(s64) = G_CONSTANT i64 2 + %shift:gpr(s64) = G_SHL %ext, %cst(s64) + %add:gpr(s64), %flags:gpr(s1) = G_SADDO %reg0, %shift + $x0 = COPY %add(s64) + RET_ReallyLR implicit $x0 diff --git a/llvm/lib/Target/AArch64/GISel/select-ssubo.mir b/llvm/lib/Target/AArch64/GISel/select-ssubo.mir new file mode 100644 index 0000000..f6b1794 --- /dev/null +++ b/llvm/lib/Target/AArch64/GISel/select-ssubo.mir @@ -0,0 +1,158 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -verify-machineinstrs -mtriple aarch64-unknown-uknown -global-isel -run-pass=instruction-select %s -o - | FileCheck %s + +... +--- +name: ssubo_s32 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $w0, $w1, $x2 + + ; CHECK-LABEL: name: ssubo_s32 + ; CHECK: liveins: $w0, $w1, $x2 + ; CHECK: %reg0:gpr32 = COPY $w0 + ; CHECK: %reg1:gpr32 = COPY $w1 + ; CHECK: %ssubo:gpr32 = SUBSWrr %reg0, %reg1, implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv + ; CHECK: $w0 = COPY %ssubo + ; CHECK: RET_ReallyLR implicit $w0 + %reg0:gpr(s32) = COPY $w0 + %reg1:gpr(s32) = COPY $w1 + %ssubo:gpr(s32), %4:gpr(s1) = G_SSUBO %reg0, %reg1 + $w0 = COPY %ssubo(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: ssubo_s64 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $x0, $x1, $x2 + + ; CHECK-LABEL: name: ssubo_s64 + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK: %reg0:gpr64 = COPY $x0 + ; CHECK: %reg1:gpr64 = COPY $x1 + ; CHECK: %ssubo:gpr64 = SUBSXrr %reg0, %reg1, implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv + ; CHECK: $x0 = COPY %ssubo + ; CHECK: RET_ReallyLR implicit $x0 + %reg0:gpr(s64) = COPY $x0 + %reg1:gpr(s64) = COPY $x1 + %ssubo:gpr(s64), %4:gpr(s1) = G_SSUBO %reg0, %reg1 + $x0 = COPY %ssubo(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: ssubo_s32_imm +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $w0, $w1, $x2 + ; Check that we get SUBSWri when we can fold in a constant. + ; + ; CHECK-LABEL: name: ssubo_s32_imm + ; CHECK: liveins: $w0, $w1, $x2 + ; CHECK: %copy:gpr32sp = COPY $w0 + ; CHECK: %ssubo:gpr32 = SUBSWri %copy, 16, 0, implicit-def $nzcv + ; CHECK: %overflow:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv + ; CHECK: $w0 = COPY %ssubo + ; CHECK: RET_ReallyLR implicit $w0 + %copy:gpr(s32) = COPY $w0 + %constant:gpr(s32) = G_CONSTANT i32 16 + %ssubo:gpr(s32), %overflow:gpr(s1) = G_SSUBO %copy, %constant + $w0 = COPY %ssubo(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: ssubo_s32_shifted +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $w0, $w1, $x2 + ; Check that we get SUBSWrs when we can fold in a shift. + ; + ; CHECK-LABEL: name: ssubo_s32_shifted + ; CHECK: liveins: $w0, $w1, $x2 + ; CHECK: %reg0:gpr32 = COPY $w0 + ; CHECK: %reg1:gpr32 = COPY $w1 + ; CHECK: %sub:gpr32 = SUBSWrs %reg0, %reg1, 16, implicit-def $nzcv + ; CHECK: %overflow:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv + ; CHECK: $w0 = COPY %sub + ; CHECK: RET_ReallyLR implicit $w0 + %reg0:gpr(s32) = COPY $w0 + %reg1:gpr(s32) = COPY $w1 + %constant:gpr(s32) = G_CONSTANT i32 16 + %shift:gpr(s32) = G_SHL %reg1(s32), %constant(s32) + %sub:gpr(s32), %overflow:gpr(s1) = G_SSUBO %reg0, %shift + $w0 = COPY %sub(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: ssubo_s32_neg_imm +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $w0, $w1, $x2 + ; Check that we get ADDSWri when we can fold in a negative constant. + ; + ; CHECK-LABEL: name: ssubo_s32_neg_imm + ; CHECK: liveins: $w0, $w1, $x2 + ; CHECK: %copy:gpr32sp = COPY $w0 + ; CHECK: %sub:gpr32 = ADDSWri %copy, 16, 0, implicit-def $nzcv + ; CHECK: %overflow:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv + ; CHECK: $w0 = COPY %sub + ; CHECK: RET_ReallyLR implicit $w0 + %copy:gpr(s32) = COPY $w0 + %constant:gpr(s32) = G_CONSTANT i32 -16 + %sub:gpr(s32), %overflow:gpr(s1) = G_SSUBO %copy, %constant + $w0 = COPY %sub(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: ssubo_arith_extended +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $w0, $x0 + ; Check that we get SUBSXrx. + ; CHECK-LABEL: name: ssubo_arith_extended + ; CHECK: liveins: $w0, $x0 + ; CHECK: %reg0:gpr64sp = COPY $x0 + ; CHECK: %reg1:gpr32 = COPY $w0 + ; CHECK: %sub:gpr64 = SUBSXrx %reg0, %reg1, 18, implicit-def $nzcv + ; CHECK: %flags:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv + ; CHECK: $x0 = COPY %sub + ; CHECK: RET_ReallyLR implicit $x0 + %reg0:gpr(s64) = COPY $x0 + %reg1:gpr(s32) = COPY $w0 + %ext:gpr(s64) = G_ZEXT %reg1(s32) + %cst:gpr(s64) = G_CONSTANT i64 2 + %shift:gpr(s64) = G_SHL %ext, %cst(s64) + %sub:gpr(s64), %flags:gpr(s1) = G_SSUBO %reg0, %shift + $x0 = COPY %sub(s64) + RET_ReallyLR implicit $x0