}
}
+ // If we have a chain of two selects, which share a true/false value and
+ // both are controlled from the two setcc nodes which cannot produce the
+ // same value, we can fold away N.
+ // select (setcc X), Y, (select (setcc X), Z, Y) -> select (setcc X), Z, Y
+ auto IsSelect = [](SDValue Op) {
+ return Op->getOpcode() == ISD::SELECT;
+ };
+ if ((IsSelect(N1) || IsSelect(N2)) && (N1.getOpcode() != N2.getOpcode())) {
+ auto AreSame = [](SDValue Op0, SDValue Op1) {
+ if (Op0 == Op1)
+ return true;
+ auto *C0 = dyn_cast<ConstantSDNode>(Op0);
+ auto *C1 = dyn_cast<ConstantSDNode>(Op1);
+ return C0 && C1 &&
+ APInt::isSameValue(C0->getAPIntValue(), C1->getAPIntValue());
+ };
+
+ SDValue OtherSelect;
+ bool SelectsShareOp = false;
+ if (IsSelect(N1)) {
+ OtherSelect = N1;
+ SelectsShareOp = AreSame(OtherSelect.getOperand(1), N2);
+ } else {
+ OtherSelect = N2;
+ SelectsShareOp = AreSame(OtherSelect.getOperand(2), N1);
+ }
+
+ auto CanNeverBeEqual = [](SDValue SetCC0, SDValue SetCC1) {
+ if (SetCC0->getOpcode() != ISD::SETCC ||
+ SetCC1->getOpcode() != ISD::SETCC ||
+ SetCC0->getOperand(0) != SetCC1->getOperand(0))
+ return false;
+
+ ISD::CondCode CC0 = cast<CondCodeSDNode>(SetCC0.getOperand(2))->get();
+ ISD::CondCode CC1 = cast<CondCodeSDNode>(SetCC1.getOperand(2))->get();
+ auto *C0 = dyn_cast<ConstantSDNode>(SetCC0.getOperand(1));
+ auto *C1 = dyn_cast<ConstantSDNode>(SetCC1.getOperand(1));
+ if (!C0 || !C1)
+ return false;
+
+ bool AreInverse = ISD::getSetCCInverse(CC0, C0->getValueType(0)) == CC1;
+ bool ConstantsAreSame =
+ APInt::isSameValue(C0->getAPIntValue(), C1->getAPIntValue());
+ auto IsEqual = [](ISD::CondCode CC) {
+ return CC == ISD::SETEQ;
+ };
+
+ if (ConstantsAreSame && AreInverse)
+ return true;
+ if (!ConstantsAreSame && IsEqual(CC0) && IsEqual(CC1))
+ return true;
+
+ return false;
+ };
+
+ SDValue SetCC0 = N0;
+ SDValue SetCC1 = OtherSelect.getOperand(0);
+ if (SelectsShareOp && CanNeverBeEqual(SetCC0, SetCC1))
+ return OtherSelect;
+ }
+
if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
(!LegalOperations &&
TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))) {
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s
+
+define i64 @not_redundant_ugt_eq(i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3) {
+; CHECK-LABEL: not_redundant_ugt_eq:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmp x0, #40
+; CHECK-NEXT: csel x8, x1, x2, eq
+; CHECK-NEXT: csel x8, x8, x1, hi
+; CHECK-NEXT: sub x0, x8, x3
+; CHECK-NEXT: ret
+ %ugt = icmp ugt i64 %arg0, 40
+ %eq = icmp eq i64 %arg0, 40
+ %select = select i1 %eq, i64 %arg1, i64 %arg2
+ %select.1 = select i1 %ugt, i64 %select, i64 %arg1
+ %res = sub i64 %select.1, %arg3
+ ret i64 %res
+}
+define i64 @redundant_ugt_ule(i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3) {
+; CHECK-LABEL: redundant_ugt_ule:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmp x0, #41
+; CHECK-NEXT: csel x8, x1, x2, lo
+; CHECK-NEXT: sub x0, x8, x3
+; CHECK-NEXT: ret
+ %ugt = icmp ugt i64 %arg0, 40
+ %ule = icmp ule i64 %arg0, 40
+ %select = select i1 %ule, i64 %arg1, i64 %arg2
+ %select.1 = select i1 %ugt, i64 %select, i64 %arg1
+ %res = sub i64 %select.1, %arg3
+ ret i64 %res
+}
+define i64 @not_redundant_uge_ule(i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3) {
+; CHECK-LABEL: not_redundant_uge_ule:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmp x0, #41
+; CHECK-NEXT: csel x8, x1, x2, lo
+; CHECK-NEXT: cmp x0, #39
+; CHECK-NEXT: csel x8, x8, x1, hi
+; CHECK-NEXT: sub x0, x8, x3
+; CHECK-NEXT: ret
+ %uge = icmp uge i64 %arg0, 40
+ %ule = icmp ule i64 %arg0, 40
+ %select = select i1 %ule, i64 %arg1, i64 %arg2
+ %select.1 = select i1 %uge, i64 %select, i64 %arg1
+ %res = sub i64 %select.1, %arg3
+ ret i64 %res
+}
+define i64 @not_redundant_eq_eq(i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3) {
+; CHECK-LABEL: not_redundant_eq_eq:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmp x0, #40
+; CHECK-NEXT: csel x8, x1, x2, eq
+; CHECK-NEXT: csel x8, x8, x1, eq
+; CHECK-NEXT: sub x0, x8, x3
+; CHECK-NEXT: ret
+ %ugt = icmp eq i64 %arg0, 40
+ %eq = icmp eq i64 %arg0, 40
+ %select = select i1 %eq, i64 %arg1, i64 %arg2
+ %select.1 = select i1 %ugt, i64 %select, i64 %arg1
+ %res = sub i64 %select.1, %arg3
+ ret i64 %res
+}
+define i64 @redundant_ne_eq(i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3) {
+; CHECK-LABEL: redundant_ne_eq:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmp x0, #40
+; CHECK-NEXT: csel x8, x1, x2, eq
+; CHECK-NEXT: sub x0, x8, x3
+; CHECK-NEXT: ret
+ %ne = icmp ne i64 %arg0, 40
+ %eq = icmp eq i64 %arg0, 40
+ %select = select i1 %eq, i64 %arg1, i64 %arg2
+ %select.1 = select i1 %ne, i64 %select, i64 %arg1
+ %res = sub i64 %select.1, %arg3
+ ret i64 %res
+}
; CHECK-NOV-NEXT: mv s1, a1
; CHECK-NOV-NEXT: fmv.d fa0, fs0
; CHECK-NOV-NEXT: call __fixunsdfti@plt
-; CHECK-NOV-NEXT: snez a2, a1
-; CHECK-NOV-NEXT: addi a2, a2, -1
-; CHECK-NOV-NEXT: and a0, a2, a0
-; CHECK-NOV-NEXT: addi a1, a1, -1
-; CHECK-NOV-NEXT: seqz a1, a1
+; CHECK-NOV-NEXT: snez a1, a1
; CHECK-NOV-NEXT: addi a1, a1, -1
; CHECK-NOV-NEXT: and a0, a1, a0
; CHECK-NOV-NEXT: snez a1, s1
; CHECK-V-NEXT: seqz a3, s1
; CHECK-V-NEXT: addi a3, a3, -1
; CHECK-V-NEXT: and a2, a3, a2
-; CHECK-V-NEXT: snez a3, a1
-; CHECK-V-NEXT: addi a3, a3, -1
-; CHECK-V-NEXT: and a0, a3, a0
-; CHECK-V-NEXT: addi a1, a1, -1
-; CHECK-V-NEXT: seqz a1, a1
+; CHECK-V-NEXT: snez a1, a1
; CHECK-V-NEXT: addi a1, a1, -1
; CHECK-V-NEXT: and a0, a1, a0
; CHECK-V-NEXT: sd a0, 24(sp)
; CHECK-NOV-NEXT: mv s1, a1
; CHECK-NOV-NEXT: fmv.s fa0, fs0
; CHECK-NOV-NEXT: call __fixunssfti@plt
-; CHECK-NOV-NEXT: snez a2, a1
-; CHECK-NOV-NEXT: addi a2, a2, -1
-; CHECK-NOV-NEXT: and a0, a2, a0
-; CHECK-NOV-NEXT: addi a1, a1, -1
-; CHECK-NOV-NEXT: seqz a1, a1
+; CHECK-NOV-NEXT: snez a1, a1
; CHECK-NOV-NEXT: addi a1, a1, -1
; CHECK-NOV-NEXT: and a0, a1, a0
; CHECK-NOV-NEXT: snez a1, s1
; CHECK-V-NEXT: seqz a3, s1
; CHECK-V-NEXT: addi a3, a3, -1
; CHECK-V-NEXT: and a2, a3, a2
-; CHECK-V-NEXT: snez a3, a1
-; CHECK-V-NEXT: addi a3, a3, -1
-; CHECK-V-NEXT: and a0, a3, a0
-; CHECK-V-NEXT: addi a1, a1, -1
-; CHECK-V-NEXT: seqz a1, a1
+; CHECK-V-NEXT: snez a1, a1
; CHECK-V-NEXT: addi a1, a1, -1
; CHECK-V-NEXT: and a0, a1, a0
; CHECK-V-NEXT: sd a0, 24(sp)
; CHECK-NOV-NEXT: mv a0, s0
; CHECK-NOV-NEXT: call __extendhfsf2@plt
; CHECK-NOV-NEXT: call __fixunssfti@plt
-; CHECK-NOV-NEXT: snez a2, a1
-; CHECK-NOV-NEXT: addi a2, a2, -1
-; CHECK-NOV-NEXT: and a0, a2, a0
-; CHECK-NOV-NEXT: addi a1, a1, -1
-; CHECK-NOV-NEXT: seqz a1, a1
+; CHECK-NOV-NEXT: snez a1, a1
; CHECK-NOV-NEXT: addi a1, a1, -1
; CHECK-NOV-NEXT: and a0, a1, a0
; CHECK-NOV-NEXT: snez a1, s2
; CHECK-V-NEXT: .cfi_offset s0, -16
; CHECK-V-NEXT: .cfi_offset s1, -24
; CHECK-V-NEXT: .cfi_offset s2, -32
-; CHECK-V-NEXT: mv s0, a0
-; CHECK-V-NEXT: mv a0, a1
+; CHECK-V-NEXT: mv s0, a1
; CHECK-V-NEXT: call __extendhfsf2@plt
; CHECK-V-NEXT: call __fixunssfti@plt
; CHECK-V-NEXT: mv s1, a0
; CHECK-V-NEXT: snez a1, s2
; CHECK-V-NEXT: addi a1, a1, -1
; CHECK-V-NEXT: and a1, a1, s1
-; CHECK-V-NEXT: addi s2, s2, -1
-; CHECK-V-NEXT: seqz a2, s2
-; CHECK-V-NEXT: addi a2, a2, -1
-; CHECK-V-NEXT: and a1, a2, a1
-; CHECK-V-NEXT: sd a1, 8(sp)
-; CHECK-V-NEXT: sd a0, 0(sp)
-; CHECK-V-NEXT: addi a0, sp, 8
-; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-V-NEXT: vle64.v v9, (a0)
+; CHECK-V-NEXT: sd a1, 0(sp)
+; CHECK-V-NEXT: sd a0, 8(sp)
; CHECK-V-NEXT: mv a0, sp
+; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-V-NEXT: vle64.v v8, (a0)
+; CHECK-V-NEXT: addi a0, sp, 8
+; CHECK-V-NEXT: vle64.v v9, (a0)
; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; CHECK-V-NEXT: vslideup.vi v8, v9, 1
; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.add
; CHECK-NEXT: global.set __stack_pointer
-; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 3
; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 2
; CHECK-NEXT: i64.eqz
; CHECK-NEXT: i64.select
-; CHECK-NEXT: local.get 2
-; CHECK-NEXT: i64.const 1
-; CHECK-NEXT: i64.eq
-; CHECK-NEXT: i64.select
; CHECK-NEXT: # fallthrough-return
entry:
%conv = fptoui double %x to i128
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.add
; CHECK-NEXT: global.set __stack_pointer
-; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 3
; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 2
; CHECK-NEXT: i64.eqz
; CHECK-NEXT: i64.select
-; CHECK-NEXT: local.get 2
-; CHECK-NEXT: i64.const 1
-; CHECK-NEXT: i64.eq
-; CHECK-NEXT: i64.select
; CHECK-NEXT: # fallthrough-return
entry:
%conv = fptoui double %x to i128
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.add
; CHECK-NEXT: global.set __stack_pointer
-; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 3
; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 2
; CHECK-NEXT: i64.eqz
; CHECK-NEXT: i64.select
-; CHECK-NEXT: local.get 2
-; CHECK-NEXT: i64.const 1
-; CHECK-NEXT: i64.eq
-; CHECK-NEXT: i64.select
; CHECK-NEXT: # fallthrough-return
entry:
%conv = fptoui float %x to i128
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.add
; CHECK-NEXT: global.set __stack_pointer
-; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 3
; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 2
; CHECK-NEXT: i64.eqz
; CHECK-NEXT: i64.select
-; CHECK-NEXT: local.get 2
-; CHECK-NEXT: i64.const 1
-; CHECK-NEXT: i64.eq
-; CHECK-NEXT: i64.select
; CHECK-NEXT: # fallthrough-return
entry:
%conv = fptoui half %x to i128
; CHECK-NEXT: i32.const 32
; CHECK-NEXT: i32.add
; CHECK-NEXT: global.set __stack_pointer
-; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 5
; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 4
; CHECK-NEXT: i64.eqz
; CHECK-NEXT: i64.select
-; CHECK-NEXT: local.get 4
-; CHECK-NEXT: i64.const 1
-; CHECK-NEXT: i64.eq
-; CHECK-NEXT: i64.select
; CHECK-NEXT: i64x2.splat
-; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 3
; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 2
; CHECK-NEXT: i64.eqz
; CHECK-NEXT: i64.select
-; CHECK-NEXT: local.get 2
-; CHECK-NEXT: i64.const 1
-; CHECK-NEXT: i64.eq
-; CHECK-NEXT: i64.select
; CHECK-NEXT: i64x2.replace_lane 1
; CHECK-NEXT: # fallthrough-return
entry:
; CHECK-NEXT: i32.const 32
; CHECK-NEXT: i32.add
; CHECK-NEXT: global.set __stack_pointer
-; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 5
; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 4
; CHECK-NEXT: i64.eqz
; CHECK-NEXT: i64.select
-; CHECK-NEXT: local.get 4
-; CHECK-NEXT: i64.const 1
-; CHECK-NEXT: i64.eq
-; CHECK-NEXT: i64.select
; CHECK-NEXT: i64x2.splat
-; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 3
; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 2
; CHECK-NEXT: i64.eqz
; CHECK-NEXT: i64.select
-; CHECK-NEXT: local.get 2
-; CHECK-NEXT: i64.const 1
-; CHECK-NEXT: i64.eq
-; CHECK-NEXT: i64.select
; CHECK-NEXT: i64x2.replace_lane 1
; CHECK-NEXT: # fallthrough-return
entry:
; CHECK-NEXT: i32.const 32
; CHECK-NEXT: i32.add
; CHECK-NEXT: global.set __stack_pointer
-; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 6
; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 5
; CHECK-NEXT: i64.eqz
; CHECK-NEXT: i64.select
-; CHECK-NEXT: local.get 5
-; CHECK-NEXT: i64.const 1
-; CHECK-NEXT: i64.eq
-; CHECK-NEXT: i64.select
; CHECK-NEXT: i64x2.splat
-; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 4
; CHECK-NEXT: i64.const 0
; CHECK-NEXT: local.get 3
; CHECK-NEXT: i64.eqz
; CHECK-NEXT: i64.select
-; CHECK-NEXT: local.get 3
-; CHECK-NEXT: i64.const 1
-; CHECK-NEXT: i64.eq
-; CHECK-NEXT: i64.select
; CHECK-NEXT: i64x2.replace_lane 1
; CHECK-NEXT: # fallthrough-return
entry:
; CHECK-NEXT: cmoveq %rcx, %rax
; CHECK-NEXT: testq %r14, %r14
; CHECK-NEXT: cmovneq %rcx, %rbx
-; CHECK-NEXT: cmpq $1, %r14
-; CHECK-NEXT: cmoveq %rcx, %rbx
; CHECK-NEXT: movq %rbx, %xmm0
; CHECK-NEXT: movq %rax, %xmm1
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: cmoveq %rcx, %rax
; CHECK-NEXT: testq %r14, %r14
; CHECK-NEXT: cmovneq %rcx, %rbx
-; CHECK-NEXT: cmpq $1, %r14
-; CHECK-NEXT: cmoveq %rcx, %rbx
; CHECK-NEXT: movq %rbx, %xmm0
; CHECK-NEXT: movq %rax, %xmm1
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: cmoveq %rcx, %rax
; CHECK-NEXT: testq %r14, %r14
; CHECK-NEXT: cmovneq %rcx, %rbx
-; CHECK-NEXT: cmpq $1, %r14
-; CHECK-NEXT: cmoveq %rcx, %rbx
; CHECK-NEXT: movq %rbx, %xmm0
; CHECK-NEXT: movq %rax, %xmm1
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]