assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT)
&& "If SETO is expanded, SETOEQ must be legal!");
CC1 = ISD::SETOEQ; CC2 = ISD::SETOEQ; Opc = ISD::AND; break;
+ case ISD::SETONE:
+ case ISD::SETUEQ:
+ // If the SETUO or SETO CC isn't legal, we might be able to use
+ // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
+ // of SETOGT/SETOLT to be legal, the other can be emulated by swapping
+ // the operands.
+ CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
+ if (!TLI.isCondCodeLegal(CC2, OpVT) &&
+ (TLI.isCondCodeLegal(ISD::SETOGT, OpVT) ||
+ TLI.isCondCodeLegal(ISD::SETOLT, OpVT))) {
+ CC1 = ISD::SETOGT;
+ CC2 = ISD::SETOLT;
+ Opc = ISD::OR;
+ NeedInvert = ((unsigned)CCCode & 0x8U);
+ break;
+ }
+ LLVM_FALLTHROUGH;
case ISD::SETOEQ:
case ISD::SETOGT:
case ISD::SETOGE:
case ISD::SETOLT:
case ISD::SETOLE:
- case ISD::SETONE:
- case ISD::SETUEQ:
case ISD::SETUNE:
case ISD::SETUGT:
case ISD::SETUGE:
}
; FUNC-LABEL: {{^}}f32_one:
-; R600-DAG: SETE_DX10
-; R600-DAG: SETE_DX10
-; R600-DAG: AND_INT
-; R600-DAG: SETNE_DX10
-; R600-DAG: AND_INT
+; R600-DAG: SETGT_DX10
+; R600-DAG: SETGT_DX10
+; R600-DAG: OR_INT
; R600-DAG: SETNE_INT
; GCN: v_cmp_lg_f32_e32 vcc
}
; FUNC-LABEL: {{^}}f32_ueq:
-; R600-DAG: SETNE_DX10
-; R600-DAG: SETNE_DX10
-; R600-DAG: OR_INT
-; R600-DAG: SETE_DX10
+; R600-DAG: SETGT_DX10
+; R600-DAG: SETGT_DX10
; R600-DAG: OR_INT
-; R600-DAG: SETNE_INT
+; R600-DAG: SETE_INT
; GCN: v_cmp_nlg_f32_e32 vcc
; GCN-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
define i1 @test_fcmpueq(float %a, float %b) #0 {
; CHECK-LABEL: test_fcmpueq:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: efscmpeq 0, 3, 3
-; CHECK-NEXT: efscmpeq 1, 4, 4
-; CHECK-NEXT: crnand 20, 5, 1
-; CHECK-NEXT: efscmpeq 0, 3, 4
+; CHECK-NEXT: efscmpgt 0, 3, 4
+; CHECK-NEXT: efscmplt 1, 3, 4
; CHECK-NEXT: li 5, 1
-; CHECK-NEXT: crnor 20, 1, 20
+; CHECK-NEXT: cror 20, 5, 1
; CHECK-NEXT: bc 12, 20, .LBB14_2
; CHECK-NEXT: # %bb.1: # %entry
; CHECK-NEXT: ori 3, 5, 0
define i1 @test_fcmpne(float %a, float %b) #0 {
; CHECK-LABEL: test_fcmpne:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: efscmpeq 0, 4, 4
-; CHECK-NEXT: efscmpeq 1, 3, 3
-; CHECK-NEXT: crand 20, 5, 1
-; CHECK-NEXT: efscmpeq 0, 3, 4
+; CHECK-NEXT: efscmplt 0, 3, 4
+; CHECK-NEXT: efscmpgt 1, 3, 4
; CHECK-NEXT: li 5, 1
-; CHECK-NEXT: crorc 20, 1, 20
+; CHECK-NEXT: crnor 20, 5, 1
; CHECK-NEXT: bc 12, 20, .LBB15_2
; CHECK-NEXT: # %bb.1: # %entry
; CHECK-NEXT: ori 3, 5, 0
; SPE-LABEL: test_dcmpueq:
; SPE: # %bb.0: # %entry
; SPE-NEXT: stwu 1, -16(1)
+; SPE-NEXT: evmergelo 5, 5, 6
; SPE-NEXT: evmergelo 3, 3, 4
-; SPE-NEXT: evmergelo 4, 5, 6
-; SPE-NEXT: efdcmpeq 0, 4, 4
-; SPE-NEXT: bc 4, 1, .LBB16_4
+; SPE-NEXT: efdcmplt 0, 3, 5
+; SPE-NEXT: bc 12, 1, .LBB16_3
; SPE-NEXT: # %bb.1: # %entry
-; SPE-NEXT: efdcmpeq 0, 3, 3
-; SPE-NEXT: bc 4, 1, .LBB16_4
-; SPE-NEXT: # %bb.2: # %entry
-; SPE-NEXT: efdcmpeq 0, 3, 4
-; SPE-NEXT: bc 12, 1, .LBB16_4
-; SPE-NEXT: # %bb.3: # %fa
-; SPE-NEXT: li 3, 0
-; SPE-NEXT: b .LBB16_5
-; SPE-NEXT: .LBB16_4: # %tr
+; SPE-NEXT: efdcmpgt 0, 3, 5
+; SPE-NEXT: bc 12, 1, .LBB16_3
+; SPE-NEXT: # %bb.2: # %tr
; SPE-NEXT: li 3, 1
-; SPE-NEXT: .LBB16_5: # %ret
+; SPE-NEXT: b .LBB16_4
+; SPE-NEXT: .LBB16_3: # %fa
+; SPE-NEXT: li 3, 0
+; SPE-NEXT: .LBB16_4: # %ret
; SPE-NEXT: stw 3, 12(1)
; SPE-NEXT: lwz 3, 12(1)
; SPE-NEXT: addi 1, 1, 16
define i1 @test_dcmpne(double %a, double %b) #0 {
; SPE-LABEL: test_dcmpne:
; SPE: # %bb.0: # %entry
+; SPE-NEXT: evmergelo 5, 5, 6
; SPE-NEXT: evmergelo 3, 3, 4
-; SPE-NEXT: evmergelo 4, 5, 6
; SPE-NEXT: li 7, 1
-; SPE-NEXT: efdcmpeq 0, 4, 4
-; SPE-NEXT: efdcmpeq 1, 3, 3
-; SPE-NEXT: efdcmpeq 5, 3, 4
-; SPE-NEXT: crand 24, 5, 1
-; SPE-NEXT: crorc 20, 21, 24
+; SPE-NEXT: efdcmplt 0, 3, 5
+; SPE-NEXT: efdcmpgt 1, 3, 5
+; SPE-NEXT: crnor 20, 5, 1
; SPE-NEXT: bc 12, 20, .LBB17_2
; SPE-NEXT: # %bb.1: # %entry
; SPE-NEXT: ori 3, 7, 0
define <4 x float> @test22(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d) {
; CHECK-LABEL: test22:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvcmpeqsp vs0, v5, v5
-; CHECK-NEXT: xvcmpeqsp vs1, v4, v4
-; CHECK-NEXT: xvcmpeqsp vs2, v4, v5
-; CHECK-NEXT: xxlnor vs0, vs0, vs0
-; CHECK-NEXT: xxlnor vs1, vs1, vs1
-; CHECK-NEXT: xxlor vs0, vs1, vs0
-; CHECK-NEXT: xxlor vs0, vs2, vs0
+; CHECK-NEXT: xvcmpgtsp vs0, v5, v4
+; CHECK-NEXT: xvcmpgtsp vs1, v4, v5
+; CHECK-NEXT: xxlnor vs0, vs1, vs0
; CHECK-NEXT: xxsel v2, v3, v2, vs0
; CHECK-NEXT: blr
;
; CHECK-REG-LABEL: test22:
; CHECK-REG: # %bb.0: # %entry
-; CHECK-REG-NEXT: xvcmpeqsp vs0, v5, v5
-; CHECK-REG-NEXT: xvcmpeqsp vs1, v4, v4
-; CHECK-REG-NEXT: xvcmpeqsp vs2, v4, v5
-; CHECK-REG-NEXT: xxlnor vs0, vs0, vs0
-; CHECK-REG-NEXT: xxlnor vs1, vs1, vs1
-; CHECK-REG-NEXT: xxlor vs0, vs1, vs0
-; CHECK-REG-NEXT: xxlor vs0, vs2, vs0
+; CHECK-REG-NEXT: xvcmpgtsp vs0, v5, v4
+; CHECK-REG-NEXT: xvcmpgtsp vs1, v4, v5
+; CHECK-REG-NEXT: xxlnor vs0, vs1, vs0
; CHECK-REG-NEXT: xxsel v2, v3, v2, vs0
; CHECK-REG-NEXT: blr
;
; CHECK-FISL-LABEL: test22:
; CHECK-FISL: # %bb.0: # %entry
-; CHECK-FISL-NEXT: xvcmpeqsp vs0, v4, v5
-; CHECK-FISL-NEXT: xvcmpeqsp vs1, v5, v5
-; CHECK-FISL-NEXT: xxlnor vs2, vs1, vs1
-; CHECK-FISL-NEXT: xvcmpeqsp vs1, v4, v4
-; CHECK-FISL-NEXT: xxlnor vs1, vs1, vs1
-; CHECK-FISL-NEXT: xxlor vs1, vs1, vs2
-; CHECK-FISL-NEXT: xxlor vs0, vs0, vs1
+; CHECK-FISL-NEXT: xvcmpgtsp vs1, v5, v4
+; CHECK-FISL-NEXT: xvcmpgtsp vs0, v4, v5
+; CHECK-FISL-NEXT: xxlnor vs0, vs0, vs1
; CHECK-FISL-NEXT: xxsel v2, v3, v2, vs0
; CHECK-FISL-NEXT: blr
;
; CHECK-LE-LABEL: test22:
; CHECK-LE: # %bb.0: # %entry
-; CHECK-LE-NEXT: xvcmpeqsp vs0, v5, v5
-; CHECK-LE-NEXT: xvcmpeqsp vs1, v4, v4
-; CHECK-LE-NEXT: xvcmpeqsp vs2, v4, v5
-; CHECK-LE-NEXT: xxlnor vs0, vs0, vs0
-; CHECK-LE-NEXT: xxlnor vs1, vs1, vs1
-; CHECK-LE-NEXT: xxlor vs0, vs1, vs0
-; CHECK-LE-NEXT: xxlor vs0, vs2, vs0
+; CHECK-LE-NEXT: xvcmpgtsp vs0, v5, v4
+; CHECK-LE-NEXT: xvcmpgtsp vs1, v4, v5
+; CHECK-LE-NEXT: xxlnor vs0, vs1, vs0
; CHECK-LE-NEXT: xxsel v2, v3, v2, vs0
; CHECK-LE-NEXT: blr
entry:
unreachable
}
-; TODO: feq.s+sltiu+bne -> feq.s+beq
define void @br_fcmp_one(double %a, double %b) nounwind {
; RV32IFD-LABEL: br_fcmp_one:
; RV32IFD: # %bb.0:
; RV32IFD-NEXT: addi sp, sp, -16
; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IFD-NEXT: sw a0, 0(sp)
-; RV32IFD-NEXT: sw a1, 4(sp)
-; RV32IFD-NEXT: fld ft0, 0(sp)
; RV32IFD-NEXT: sw a2, 0(sp)
; RV32IFD-NEXT: sw a3, 4(sp)
+; RV32IFD-NEXT: fld ft0, 0(sp)
+; RV32IFD-NEXT: sw a0, 0(sp)
+; RV32IFD-NEXT: sw a1, 4(sp)
; RV32IFD-NEXT: fld ft1, 0(sp)
-; RV32IFD-NEXT: feq.d a0, ft1, ft1
-; RV32IFD-NEXT: feq.d a1, ft0, ft0
-; RV32IFD-NEXT: and a0, a1, a0
-; RV32IFD-NEXT: feq.d a1, ft0, ft1
-; RV32IFD-NEXT: not a1, a1
-; RV32IFD-NEXT: and a0, a1, a0
+; RV32IFD-NEXT: flt.d a0, ft1, ft0
+; RV32IFD-NEXT: flt.d a1, ft0, ft1
+; RV32IFD-NEXT: or a0, a1, a0
; RV32IFD-NEXT: bnez a0, .LBB7_2
; RV32IFD-NEXT: # %bb.1: # %if.else
; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV64IFD: # %bb.0:
; RV64IFD-NEXT: addi sp, sp, -16
; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64IFD-NEXT: fmv.d.x ft0, a0
-; RV64IFD-NEXT: fmv.d.x ft1, a1
-; RV64IFD-NEXT: feq.d a0, ft1, ft1
-; RV64IFD-NEXT: feq.d a1, ft0, ft0
-; RV64IFD-NEXT: and a0, a1, a0
-; RV64IFD-NEXT: feq.d a1, ft0, ft1
-; RV64IFD-NEXT: not a1, a1
-; RV64IFD-NEXT: and a0, a1, a0
+; RV64IFD-NEXT: fmv.d.x ft0, a1
+; RV64IFD-NEXT: fmv.d.x ft1, a0
+; RV64IFD-NEXT: flt.d a0, ft1, ft0
+; RV64IFD-NEXT: flt.d a1, ft0, ft1
+; RV64IFD-NEXT: or a0, a1, a0
; RV64IFD-NEXT: bnez a0, .LBB7_2
; RV64IFD-NEXT: # %bb.1: # %if.else
; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV32IFD-NEXT: sw a0, 0(sp)
; RV32IFD-NEXT: sw a1, 4(sp)
; RV32IFD-NEXT: fld ft1, 0(sp)
-; RV32IFD-NEXT: feq.d a0, ft1, ft0
-; RV32IFD-NEXT: feq.d a1, ft0, ft0
-; RV32IFD-NEXT: feq.d a2, ft1, ft1
-; RV32IFD-NEXT: and a1, a2, a1
-; RV32IFD-NEXT: xori a1, a1, 1
-; RV32IFD-NEXT: or a0, a0, a1
-; RV32IFD-NEXT: bnez a0, .LBB9_2
+; RV32IFD-NEXT: flt.d a0, ft1, ft0
+; RV32IFD-NEXT: flt.d a1, ft0, ft1
+; RV32IFD-NEXT: or a0, a1, a0
+; RV32IFD-NEXT: addi a1, zero, 1
+; RV32IFD-NEXT: bne a0, a1, .LBB9_2
; RV32IFD-NEXT: # %bb.1: # %if.else
; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: addi sp, sp, 16
; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64IFD-NEXT: fmv.d.x ft0, a1
; RV64IFD-NEXT: fmv.d.x ft1, a0
-; RV64IFD-NEXT: feq.d a0, ft1, ft0
-; RV64IFD-NEXT: feq.d a1, ft0, ft0
-; RV64IFD-NEXT: feq.d a2, ft1, ft1
-; RV64IFD-NEXT: and a1, a2, a1
-; RV64IFD-NEXT: xori a1, a1, 1
-; RV64IFD-NEXT: or a0, a0, a1
-; RV64IFD-NEXT: bnez a0, .LBB9_2
+; RV64IFD-NEXT: flt.d a0, ft1, ft0
+; RV64IFD-NEXT: flt.d a1, ft0, ft1
+; RV64IFD-NEXT: or a0, a1, a0
+; RV64IFD-NEXT: addi a1, zero, 1
+; RV64IFD-NEXT: bne a0, a1, .LBB9_2
; RV64IFD-NEXT: # %bb.1: # %if.else
; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64IFD-NEXT: addi sp, sp, 16
; RV32IFD-LABEL: fcmp_one:
; RV32IFD: # %bb.0:
; RV32IFD-NEXT: addi sp, sp, -16
-; RV32IFD-NEXT: sw a0, 8(sp)
-; RV32IFD-NEXT: sw a1, 12(sp)
-; RV32IFD-NEXT: fld ft0, 8(sp)
; RV32IFD-NEXT: sw a2, 8(sp)
; RV32IFD-NEXT: sw a3, 12(sp)
+; RV32IFD-NEXT: fld ft0, 8(sp)
+; RV32IFD-NEXT: sw a0, 8(sp)
+; RV32IFD-NEXT: sw a1, 12(sp)
; RV32IFD-NEXT: fld ft1, 8(sp)
-; RV32IFD-NEXT: feq.d a0, ft1, ft1
-; RV32IFD-NEXT: feq.d a1, ft0, ft0
-; RV32IFD-NEXT: and a0, a1, a0
-; RV32IFD-NEXT: feq.d a1, ft0, ft1
-; RV32IFD-NEXT: not a1, a1
-; RV32IFD-NEXT: and a0, a1, a0
+; RV32IFD-NEXT: flt.d a0, ft1, ft0
+; RV32IFD-NEXT: flt.d a1, ft0, ft1
+; RV32IFD-NEXT: or a0, a1, a0
; RV32IFD-NEXT: addi sp, sp, 16
; RV32IFD-NEXT: ret
;
; RV64IFD-LABEL: fcmp_one:
; RV64IFD: # %bb.0:
-; RV64IFD-NEXT: fmv.d.x ft0, a0
-; RV64IFD-NEXT: fmv.d.x ft1, a1
-; RV64IFD-NEXT: feq.d a0, ft1, ft1
-; RV64IFD-NEXT: feq.d a1, ft0, ft0
-; RV64IFD-NEXT: and a0, a1, a0
-; RV64IFD-NEXT: feq.d a1, ft0, ft1
-; RV64IFD-NEXT: not a1, a1
-; RV64IFD-NEXT: and a0, a1, a0
+; RV64IFD-NEXT: fmv.d.x ft0, a1
+; RV64IFD-NEXT: fmv.d.x ft1, a0
+; RV64IFD-NEXT: flt.d a0, ft1, ft0
+; RV64IFD-NEXT: flt.d a1, ft0, ft1
+; RV64IFD-NEXT: or a0, a1, a0
; RV64IFD-NEXT: ret
%1 = fcmp one double %a, %b
%2 = zext i1 %1 to i32
; RV32IFD-NEXT: sw a0, 8(sp)
; RV32IFD-NEXT: sw a1, 12(sp)
; RV32IFD-NEXT: fld ft1, 8(sp)
-; RV32IFD-NEXT: feq.d a0, ft1, ft0
-; RV32IFD-NEXT: feq.d a1, ft0, ft0
-; RV32IFD-NEXT: feq.d a2, ft1, ft1
-; RV32IFD-NEXT: and a1, a2, a1
-; RV32IFD-NEXT: xori a1, a1, 1
-; RV32IFD-NEXT: or a0, a0, a1
+; RV32IFD-NEXT: flt.d a0, ft1, ft0
+; RV32IFD-NEXT: flt.d a1, ft0, ft1
+; RV32IFD-NEXT: or a0, a1, a0
+; RV32IFD-NEXT: xori a0, a0, 1
; RV32IFD-NEXT: addi sp, sp, 16
; RV32IFD-NEXT: ret
;
; RV64IFD: # %bb.0:
; RV64IFD-NEXT: fmv.d.x ft0, a1
; RV64IFD-NEXT: fmv.d.x ft1, a0
-; RV64IFD-NEXT: feq.d a0, ft1, ft0
-; RV64IFD-NEXT: feq.d a1, ft0, ft0
-; RV64IFD-NEXT: feq.d a2, ft1, ft1
-; RV64IFD-NEXT: and a1, a2, a1
-; RV64IFD-NEXT: xori a1, a1, 1
-; RV64IFD-NEXT: or a0, a0, a1
+; RV64IFD-NEXT: flt.d a0, ft1, ft0
+; RV64IFD-NEXT: flt.d a1, ft0, ft1
+; RV64IFD-NEXT: or a0, a1, a0
+; RV64IFD-NEXT: xori a0, a0, 1
; RV64IFD-NEXT: ret
%1 = fcmp ueq double %a, %b
%2 = zext i1 %1 to i32
}
define double @select_fcmp_one(double %a, double %b) nounwind {
-; TODO: feq.s+sltiu+bne sequence could be optimised
; RV32IFD-LABEL: select_fcmp_one:
; RV32IFD: # %bb.0:
; RV32IFD-NEXT: addi sp, sp, -16
-; RV32IFD-NEXT: sw a0, 8(sp)
-; RV32IFD-NEXT: sw a1, 12(sp)
-; RV32IFD-NEXT: fld ft0, 8(sp)
; RV32IFD-NEXT: sw a2, 8(sp)
; RV32IFD-NEXT: sw a3, 12(sp)
+; RV32IFD-NEXT: fld ft0, 8(sp)
+; RV32IFD-NEXT: sw a0, 8(sp)
+; RV32IFD-NEXT: sw a1, 12(sp)
; RV32IFD-NEXT: fld ft1, 8(sp)
-; RV32IFD-NEXT: feq.d a0, ft1, ft1
-; RV32IFD-NEXT: feq.d a1, ft0, ft0
-; RV32IFD-NEXT: and a0, a1, a0
-; RV32IFD-NEXT: feq.d a1, ft0, ft1
-; RV32IFD-NEXT: not a1, a1
-; RV32IFD-NEXT: and a0, a1, a0
+; RV32IFD-NEXT: flt.d a0, ft1, ft0
+; RV32IFD-NEXT: flt.d a1, ft0, ft1
+; RV32IFD-NEXT: or a0, a1, a0
; RV32IFD-NEXT: bnez a0, .LBB6_2
; RV32IFD-NEXT: # %bb.1:
-; RV32IFD-NEXT: fmv.d ft0, ft1
+; RV32IFD-NEXT: fmv.d ft1, ft0
; RV32IFD-NEXT: .LBB6_2:
-; RV32IFD-NEXT: fsd ft0, 8(sp)
+; RV32IFD-NEXT: fsd ft1, 8(sp)
; RV32IFD-NEXT: lw a0, 8(sp)
; RV32IFD-NEXT: lw a1, 12(sp)
; RV32IFD-NEXT: addi sp, sp, 16
;
; RV64IFD-LABEL: select_fcmp_one:
; RV64IFD: # %bb.0:
-; RV64IFD-NEXT: fmv.d.x ft0, a0
; RV64IFD-NEXT: fmv.d.x ft1, a1
-; RV64IFD-NEXT: feq.d a0, ft1, ft1
-; RV64IFD-NEXT: feq.d a1, ft0, ft0
-; RV64IFD-NEXT: and a0, a1, a0
-; RV64IFD-NEXT: feq.d a1, ft0, ft1
-; RV64IFD-NEXT: not a1, a1
-; RV64IFD-NEXT: and a0, a1, a0
+; RV64IFD-NEXT: fmv.d.x ft0, a0
+; RV64IFD-NEXT: flt.d a0, ft0, ft1
+; RV64IFD-NEXT: flt.d a1, ft1, ft0
+; RV64IFD-NEXT: or a0, a1, a0
; RV64IFD-NEXT: bnez a0, .LBB6_2
; RV64IFD-NEXT: # %bb.1:
; RV64IFD-NEXT: fmv.d ft0, ft1
; RV32IFD-NEXT: sw a0, 8(sp)
; RV32IFD-NEXT: sw a1, 12(sp)
; RV32IFD-NEXT: fld ft1, 8(sp)
-; RV32IFD-NEXT: feq.d a0, ft1, ft0
-; RV32IFD-NEXT: feq.d a1, ft0, ft0
-; RV32IFD-NEXT: feq.d a2, ft1, ft1
-; RV32IFD-NEXT: and a1, a2, a1
-; RV32IFD-NEXT: xori a1, a1, 1
-; RV32IFD-NEXT: or a0, a0, a1
+; RV32IFD-NEXT: flt.d a0, ft1, ft0
+; RV32IFD-NEXT: flt.d a1, ft0, ft1
+; RV32IFD-NEXT: or a0, a1, a0
+; RV32IFD-NEXT: xori a0, a0, 1
; RV32IFD-NEXT: bnez a0, .LBB8_2
; RV32IFD-NEXT: # %bb.1:
; RV32IFD-NEXT: fmv.d ft1, ft0
; RV64IFD: # %bb.0:
; RV64IFD-NEXT: fmv.d.x ft1, a1
; RV64IFD-NEXT: fmv.d.x ft0, a0
-; RV64IFD-NEXT: feq.d a0, ft0, ft1
-; RV64IFD-NEXT: feq.d a1, ft1, ft1
-; RV64IFD-NEXT: feq.d a2, ft0, ft0
-; RV64IFD-NEXT: and a1, a2, a1
-; RV64IFD-NEXT: xori a1, a1, 1
-; RV64IFD-NEXT: or a0, a0, a1
+; RV64IFD-NEXT: flt.d a0, ft0, ft1
+; RV64IFD-NEXT: flt.d a1, ft1, ft0
+; RV64IFD-NEXT: or a0, a1, a0
+; RV64IFD-NEXT: xori a0, a0, 1
; RV64IFD-NEXT: bnez a0, .LBB8_2
; RV64IFD-NEXT: # %bb.1:
; RV64IFD-NEXT: fmv.d ft0, ft1
unreachable
}
-; TODO: feq.s+sltiu+bne -> feq.s+beq
define void @br_fcmp_one(float %a, float %b) nounwind {
; RV32IF-LABEL: br_fcmp_one:
; RV32IF: # %bb.0:
; RV32IF-NEXT: addi sp, sp, -16
; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IF-NEXT: fmv.w.x ft0, a0
-; RV32IF-NEXT: fmv.w.x ft1, a1
-; RV32IF-NEXT: feq.s a0, ft1, ft1
-; RV32IF-NEXT: feq.s a1, ft0, ft0
-; RV32IF-NEXT: and a0, a1, a0
-; RV32IF-NEXT: feq.s a1, ft0, ft1
-; RV32IF-NEXT: not a1, a1
-; RV32IF-NEXT: and a0, a1, a0
+; RV32IF-NEXT: fmv.w.x ft0, a1
+; RV32IF-NEXT: fmv.w.x ft1, a0
+; RV32IF-NEXT: flt.s a0, ft1, ft0
+; RV32IF-NEXT: flt.s a1, ft0, ft1
+; RV32IF-NEXT: or a0, a1, a0
; RV32IF-NEXT: bnez a0, .LBB7_2
; RV32IF-NEXT: # %bb.1: # %if.else
; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV64IF: # %bb.0:
; RV64IF-NEXT: addi sp, sp, -16
; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64IF-NEXT: fmv.w.x ft0, a0
-; RV64IF-NEXT: fmv.w.x ft1, a1
-; RV64IF-NEXT: feq.s a0, ft1, ft1
-; RV64IF-NEXT: feq.s a1, ft0, ft0
-; RV64IF-NEXT: and a0, a1, a0
-; RV64IF-NEXT: feq.s a1, ft0, ft1
-; RV64IF-NEXT: not a1, a1
-; RV64IF-NEXT: and a0, a1, a0
+; RV64IF-NEXT: fmv.w.x ft0, a1
+; RV64IF-NEXT: fmv.w.x ft1, a0
+; RV64IF-NEXT: flt.s a0, ft1, ft0
+; RV64IF-NEXT: flt.s a1, ft0, ft1
+; RV64IF-NEXT: or a0, a1, a0
; RV64IF-NEXT: bnez a0, .LBB7_2
; RV64IF-NEXT: # %bb.1: # %if.else
; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IF-NEXT: fmv.w.x ft0, a1
; RV32IF-NEXT: fmv.w.x ft1, a0
-; RV32IF-NEXT: feq.s a0, ft1, ft0
-; RV32IF-NEXT: feq.s a1, ft0, ft0
-; RV32IF-NEXT: feq.s a2, ft1, ft1
-; RV32IF-NEXT: and a1, a2, a1
-; RV32IF-NEXT: xori a1, a1, 1
-; RV32IF-NEXT: or a0, a0, a1
-; RV32IF-NEXT: bnez a0, .LBB9_2
+; RV32IF-NEXT: flt.s a0, ft1, ft0
+; RV32IF-NEXT: flt.s a1, ft0, ft1
+; RV32IF-NEXT: or a0, a1, a0
+; RV32IF-NEXT: addi a1, zero, 1
+; RV32IF-NEXT: bne a0, a1, .LBB9_2
; RV32IF-NEXT: # %bb.1: # %if.else
; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IF-NEXT: addi sp, sp, 16
; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64IF-NEXT: fmv.w.x ft0, a1
; RV64IF-NEXT: fmv.w.x ft1, a0
-; RV64IF-NEXT: feq.s a0, ft1, ft0
-; RV64IF-NEXT: feq.s a1, ft0, ft0
-; RV64IF-NEXT: feq.s a2, ft1, ft1
-; RV64IF-NEXT: and a1, a2, a1
-; RV64IF-NEXT: xori a1, a1, 1
-; RV64IF-NEXT: or a0, a0, a1
-; RV64IF-NEXT: bnez a0, .LBB9_2
+; RV64IF-NEXT: flt.s a0, ft1, ft0
+; RV64IF-NEXT: flt.s a1, ft0, ft1
+; RV64IF-NEXT: or a0, a1, a0
+; RV64IF-NEXT: addi a1, zero, 1
+; RV64IF-NEXT: bne a0, a1, .LBB9_2
; RV64IF-NEXT: # %bb.1: # %if.else
; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64IF-NEXT: addi sp, sp, 16
define i32 @fcmp_one(float %a, float %b) nounwind {
; RV32IF-LABEL: fcmp_one:
; RV32IF: # %bb.0:
-; RV32IF-NEXT: fmv.w.x ft0, a0
-; RV32IF-NEXT: fmv.w.x ft1, a1
-; RV32IF-NEXT: feq.s a0, ft1, ft1
-; RV32IF-NEXT: feq.s a1, ft0, ft0
-; RV32IF-NEXT: and a0, a1, a0
-; RV32IF-NEXT: feq.s a1, ft0, ft1
-; RV32IF-NEXT: not a1, a1
-; RV32IF-NEXT: and a0, a1, a0
+; RV32IF-NEXT: fmv.w.x ft0, a1
+; RV32IF-NEXT: fmv.w.x ft1, a0
+; RV32IF-NEXT: flt.s a0, ft1, ft0
+; RV32IF-NEXT: flt.s a1, ft0, ft1
+; RV32IF-NEXT: or a0, a1, a0
; RV32IF-NEXT: ret
;
; RV64IF-LABEL: fcmp_one:
; RV64IF: # %bb.0:
-; RV64IF-NEXT: fmv.w.x ft0, a0
-; RV64IF-NEXT: fmv.w.x ft1, a1
-; RV64IF-NEXT: feq.s a0, ft1, ft1
-; RV64IF-NEXT: feq.s a1, ft0, ft0
-; RV64IF-NEXT: and a0, a1, a0
-; RV64IF-NEXT: feq.s a1, ft0, ft1
-; RV64IF-NEXT: not a1, a1
-; RV64IF-NEXT: and a0, a1, a0
+; RV64IF-NEXT: fmv.w.x ft0, a1
+; RV64IF-NEXT: fmv.w.x ft1, a0
+; RV64IF-NEXT: flt.s a0, ft1, ft0
+; RV64IF-NEXT: flt.s a1, ft0, ft1
+; RV64IF-NEXT: or a0, a1, a0
; RV64IF-NEXT: ret
%1 = fcmp one float %a, %b
%2 = zext i1 %1 to i32
; RV32IF: # %bb.0:
; RV32IF-NEXT: fmv.w.x ft0, a1
; RV32IF-NEXT: fmv.w.x ft1, a0
-; RV32IF-NEXT: feq.s a0, ft1, ft0
-; RV32IF-NEXT: feq.s a1, ft0, ft0
-; RV32IF-NEXT: feq.s a2, ft1, ft1
-; RV32IF-NEXT: and a1, a2, a1
-; RV32IF-NEXT: xori a1, a1, 1
-; RV32IF-NEXT: or a0, a0, a1
+; RV32IF-NEXT: flt.s a0, ft1, ft0
+; RV32IF-NEXT: flt.s a1, ft0, ft1
+; RV32IF-NEXT: or a0, a1, a0
+; RV32IF-NEXT: xori a0, a0, 1
; RV32IF-NEXT: ret
;
; RV64IF-LABEL: fcmp_ueq:
; RV64IF: # %bb.0:
; RV64IF-NEXT: fmv.w.x ft0, a1
; RV64IF-NEXT: fmv.w.x ft1, a0
-; RV64IF-NEXT: feq.s a0, ft1, ft0
-; RV64IF-NEXT: feq.s a1, ft0, ft0
-; RV64IF-NEXT: feq.s a2, ft1, ft1
-; RV64IF-NEXT: and a1, a2, a1
-; RV64IF-NEXT: xori a1, a1, 1
-; RV64IF-NEXT: or a0, a0, a1
+; RV64IF-NEXT: flt.s a0, ft1, ft0
+; RV64IF-NEXT: flt.s a1, ft0, ft1
+; RV64IF-NEXT: or a0, a1, a0
+; RV64IF-NEXT: xori a0, a0, 1
; RV64IF-NEXT: ret
%1 = fcmp ueq float %a, %b
%2 = zext i1 %1 to i32
}
define float @select_fcmp_one(float %a, float %b) nounwind {
-; TODO: feq.s+sltiu+bne sequence could be optimised
; RV32IF-LABEL: select_fcmp_one:
; RV32IF: # %bb.0:
-; RV32IF-NEXT: fmv.w.x ft0, a0
; RV32IF-NEXT: fmv.w.x ft1, a1
-; RV32IF-NEXT: feq.s a0, ft1, ft1
-; RV32IF-NEXT: feq.s a1, ft0, ft0
-; RV32IF-NEXT: and a0, a1, a0
-; RV32IF-NEXT: feq.s a1, ft0, ft1
-; RV32IF-NEXT: not a1, a1
-; RV32IF-NEXT: and a0, a1, a0
+; RV32IF-NEXT: fmv.w.x ft0, a0
+; RV32IF-NEXT: flt.s a0, ft0, ft1
+; RV32IF-NEXT: flt.s a1, ft1, ft0
+; RV32IF-NEXT: or a0, a1, a0
; RV32IF-NEXT: bnez a0, .LBB6_2
; RV32IF-NEXT: # %bb.1:
; RV32IF-NEXT: fmv.s ft0, ft1
;
; RV64IF-LABEL: select_fcmp_one:
; RV64IF: # %bb.0:
-; RV64IF-NEXT: fmv.w.x ft0, a0
; RV64IF-NEXT: fmv.w.x ft1, a1
-; RV64IF-NEXT: feq.s a0, ft1, ft1
-; RV64IF-NEXT: feq.s a1, ft0, ft0
-; RV64IF-NEXT: and a0, a1, a0
-; RV64IF-NEXT: feq.s a1, ft0, ft1
-; RV64IF-NEXT: not a1, a1
-; RV64IF-NEXT: and a0, a1, a0
+; RV64IF-NEXT: fmv.w.x ft0, a0
+; RV64IF-NEXT: flt.s a0, ft0, ft1
+; RV64IF-NEXT: flt.s a1, ft1, ft0
+; RV64IF-NEXT: or a0, a1, a0
; RV64IF-NEXT: bnez a0, .LBB6_2
; RV64IF-NEXT: # %bb.1:
; RV64IF-NEXT: fmv.s ft0, ft1
; RV32IF: # %bb.0:
; RV32IF-NEXT: fmv.w.x ft1, a1
; RV32IF-NEXT: fmv.w.x ft0, a0
-; RV32IF-NEXT: feq.s a0, ft0, ft1
-; RV32IF-NEXT: feq.s a1, ft1, ft1
-; RV32IF-NEXT: feq.s a2, ft0, ft0
-; RV32IF-NEXT: and a1, a2, a1
-; RV32IF-NEXT: xori a1, a1, 1
-; RV32IF-NEXT: or a0, a0, a1
+; RV32IF-NEXT: flt.s a0, ft0, ft1
+; RV32IF-NEXT: flt.s a1, ft1, ft0
+; RV32IF-NEXT: or a0, a1, a0
+; RV32IF-NEXT: xori a0, a0, 1
; RV32IF-NEXT: bnez a0, .LBB8_2
; RV32IF-NEXT: # %bb.1:
; RV32IF-NEXT: fmv.s ft0, ft1
; RV64IF: # %bb.0:
; RV64IF-NEXT: fmv.w.x ft1, a1
; RV64IF-NEXT: fmv.w.x ft0, a0
-; RV64IF-NEXT: feq.s a0, ft0, ft1
-; RV64IF-NEXT: feq.s a1, ft1, ft1
-; RV64IF-NEXT: feq.s a2, ft0, ft0
-; RV64IF-NEXT: and a1, a2, a1
-; RV64IF-NEXT: xori a1, a1, 1
-; RV64IF-NEXT: or a0, a0, a1
+; RV64IF-NEXT: flt.s a0, ft0, ft1
+; RV64IF-NEXT: flt.s a1, ft1, ft0
+; RV64IF-NEXT: or a0, a1, a0
+; RV64IF-NEXT: xori a0, a0, 1
; RV64IF-NEXT: bnez a0, .LBB8_2
; RV64IF-NEXT: # %bb.1:
; RV64IF-NEXT: fmv.s ft0, ft1
unreachable
}
-; TODO: feq.h+sltiu+bne -> feq.h+beq
define void @br_fcmp_one(half %a, half %b) nounwind {
; RV32IZFH-LABEL: br_fcmp_one:
; RV32IZFH: # %bb.0:
; RV32IZFH-NEXT: addi sp, sp, -16
; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IZFH-NEXT: feq.h a0, fa1, fa1
-; RV32IZFH-NEXT: feq.h a1, fa0, fa0
-; RV32IZFH-NEXT: and a0, a1, a0
-; RV32IZFH-NEXT: feq.h a1, fa0, fa1
-; RV32IZFH-NEXT: not a1, a1
-; RV32IZFH-NEXT: and a0, a1, a0
+; RV32IZFH-NEXT: flt.h a0, fa0, fa1
+; RV32IZFH-NEXT: flt.h a1, fa1, fa0
+; RV32IZFH-NEXT: or a0, a1, a0
; RV32IZFH-NEXT: bnez a0, .LBB7_2
; RV32IZFH-NEXT: # %bb.1: # %if.else
; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV64IZFH: # %bb.0:
; RV64IZFH-NEXT: addi sp, sp, -16
; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64IZFH-NEXT: feq.h a0, fa1, fa1
-; RV64IZFH-NEXT: feq.h a1, fa0, fa0
-; RV64IZFH-NEXT: and a0, a1, a0
-; RV64IZFH-NEXT: feq.h a1, fa0, fa1
-; RV64IZFH-NEXT: not a1, a1
-; RV64IZFH-NEXT: and a0, a1, a0
+; RV64IZFH-NEXT: flt.h a0, fa0, fa1
+; RV64IZFH-NEXT: flt.h a1, fa1, fa0
+; RV64IZFH-NEXT: or a0, a1, a0
; RV64IZFH-NEXT: bnez a0, .LBB7_2
; RV64IZFH-NEXT: # %bb.1: # %if.else
; RV64IZFH-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV32IZFH: # %bb.0:
; RV32IZFH-NEXT: addi sp, sp, -16
; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IZFH-NEXT: feq.h a0, fa0, fa1
-; RV32IZFH-NEXT: feq.h a1, fa1, fa1
-; RV32IZFH-NEXT: feq.h a2, fa0, fa0
-; RV32IZFH-NEXT: and a1, a2, a1
-; RV32IZFH-NEXT: xori a1, a1, 1
-; RV32IZFH-NEXT: or a0, a0, a1
-; RV32IZFH-NEXT: bnez a0, .LBB9_2
+; RV32IZFH-NEXT: flt.h a0, fa0, fa1
+; RV32IZFH-NEXT: flt.h a1, fa1, fa0
+; RV32IZFH-NEXT: or a0, a1, a0
+; RV32IZFH-NEXT: addi a1, zero, 1
+; RV32IZFH-NEXT: bne a0, a1, .LBB9_2
; RV32IZFH-NEXT: # %bb.1: # %if.else
; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFH-NEXT: addi sp, sp, 16
; RV64IZFH: # %bb.0:
; RV64IZFH-NEXT: addi sp, sp, -16
; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64IZFH-NEXT: feq.h a0, fa0, fa1
-; RV64IZFH-NEXT: feq.h a1, fa1, fa1
-; RV64IZFH-NEXT: feq.h a2, fa0, fa0
-; RV64IZFH-NEXT: and a1, a2, a1
-; RV64IZFH-NEXT: xori a1, a1, 1
-; RV64IZFH-NEXT: or a0, a0, a1
-; RV64IZFH-NEXT: bnez a0, .LBB9_2
+; RV64IZFH-NEXT: flt.h a0, fa0, fa1
+; RV64IZFH-NEXT: flt.h a1, fa1, fa0
+; RV64IZFH-NEXT: or a0, a1, a0
+; RV64IZFH-NEXT: addi a1, zero, 1
+; RV64IZFH-NEXT: bne a0, a1, .LBB9_2
; RV64IZFH-NEXT: # %bb.1: # %if.else
; RV64IZFH-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64IZFH-NEXT: addi sp, sp, 16
define i32 @fcmp_one(half %a, half %b) nounwind {
; RV32IZFH-LABEL: fcmp_one:
; RV32IZFH: # %bb.0:
-; RV32IZFH-NEXT: feq.h a0, fa1, fa1
-; RV32IZFH-NEXT: feq.h a1, fa0, fa0
-; RV32IZFH-NEXT: and a0, a1, a0
-; RV32IZFH-NEXT: feq.h a1, fa0, fa1
-; RV32IZFH-NEXT: not a1, a1
-; RV32IZFH-NEXT: and a0, a1, a0
+; RV32IZFH-NEXT: flt.h a0, fa0, fa1
+; RV32IZFH-NEXT: flt.h a1, fa1, fa0
+; RV32IZFH-NEXT: or a0, a1, a0
; RV32IZFH-NEXT: ret
;
; RV64IZFH-LABEL: fcmp_one:
; RV64IZFH: # %bb.0:
-; RV64IZFH-NEXT: feq.h a0, fa1, fa1
-; RV64IZFH-NEXT: feq.h a1, fa0, fa0
-; RV64IZFH-NEXT: and a0, a1, a0
-; RV64IZFH-NEXT: feq.h a1, fa0, fa1
-; RV64IZFH-NEXT: not a1, a1
-; RV64IZFH-NEXT: and a0, a1, a0
+; RV64IZFH-NEXT: flt.h a0, fa0, fa1
+; RV64IZFH-NEXT: flt.h a1, fa1, fa0
+; RV64IZFH-NEXT: or a0, a1, a0
; RV64IZFH-NEXT: ret
%1 = fcmp one half %a, %b
%2 = zext i1 %1 to i32
define i32 @fcmp_ueq(half %a, half %b) nounwind {
; RV32IZFH-LABEL: fcmp_ueq:
; RV32IZFH: # %bb.0:
-; RV32IZFH-NEXT: feq.h a0, fa0, fa1
-; RV32IZFH-NEXT: feq.h a1, fa1, fa1
-; RV32IZFH-NEXT: feq.h a2, fa0, fa0
-; RV32IZFH-NEXT: and a1, a2, a1
-; RV32IZFH-NEXT: xori a1, a1, 1
-; RV32IZFH-NEXT: or a0, a0, a1
+; RV32IZFH-NEXT: flt.h a0, fa0, fa1
+; RV32IZFH-NEXT: flt.h a1, fa1, fa0
+; RV32IZFH-NEXT: or a0, a1, a0
+; RV32IZFH-NEXT: xori a0, a0, 1
; RV32IZFH-NEXT: ret
;
; RV64IZFH-LABEL: fcmp_ueq:
; RV64IZFH: # %bb.0:
-; RV64IZFH-NEXT: feq.h a0, fa0, fa1
-; RV64IZFH-NEXT: feq.h a1, fa1, fa1
-; RV64IZFH-NEXT: feq.h a2, fa0, fa0
-; RV64IZFH-NEXT: and a1, a2, a1
-; RV64IZFH-NEXT: xori a1, a1, 1
-; RV64IZFH-NEXT: or a0, a0, a1
+; RV64IZFH-NEXT: flt.h a0, fa0, fa1
+; RV64IZFH-NEXT: flt.h a1, fa1, fa0
+; RV64IZFH-NEXT: or a0, a1, a0
+; RV64IZFH-NEXT: xori a0, a0, 1
; RV64IZFH-NEXT: ret
%1 = fcmp ueq half %a, %b
%2 = zext i1 %1 to i32
}
define half @select_fcmp_one(half %a, half %b) nounwind {
-; TODO: feq.h+sltiu+bne sequence could be optimised
; RV32IZFH-LABEL: select_fcmp_one:
; RV32IZFH: # %bb.0:
-; RV32IZFH-NEXT: feq.h a0, fa1, fa1
-; RV32IZFH-NEXT: feq.h a1, fa0, fa0
-; RV32IZFH-NEXT: and a0, a1, a0
-; RV32IZFH-NEXT: feq.h a1, fa0, fa1
-; RV32IZFH-NEXT: not a1, a1
-; RV32IZFH-NEXT: and a0, a1, a0
+; RV32IZFH-NEXT: flt.h a0, fa0, fa1
+; RV32IZFH-NEXT: flt.h a1, fa1, fa0
+; RV32IZFH-NEXT: or a0, a1, a0
; RV32IZFH-NEXT: bnez a0, .LBB6_2
; RV32IZFH-NEXT: # %bb.1:
; RV32IZFH-NEXT: fmv.h fa0, fa1
;
; RV64IZFH-LABEL: select_fcmp_one:
; RV64IZFH: # %bb.0:
-; RV64IZFH-NEXT: feq.h a0, fa1, fa1
-; RV64IZFH-NEXT: feq.h a1, fa0, fa0
-; RV64IZFH-NEXT: and a0, a1, a0
-; RV64IZFH-NEXT: feq.h a1, fa0, fa1
-; RV64IZFH-NEXT: not a1, a1
-; RV64IZFH-NEXT: and a0, a1, a0
+; RV64IZFH-NEXT: flt.h a0, fa0, fa1
+; RV64IZFH-NEXT: flt.h a1, fa1, fa0
+; RV64IZFH-NEXT: or a0, a1, a0
; RV64IZFH-NEXT: bnez a0, .LBB6_2
; RV64IZFH-NEXT: # %bb.1:
; RV64IZFH-NEXT: fmv.h fa0, fa1
define half @select_fcmp_ueq(half %a, half %b) nounwind {
; RV32IZFH-LABEL: select_fcmp_ueq:
; RV32IZFH: # %bb.0:
-; RV32IZFH-NEXT: feq.h a0, fa0, fa1
-; RV32IZFH-NEXT: feq.h a1, fa1, fa1
-; RV32IZFH-NEXT: feq.h a2, fa0, fa0
-; RV32IZFH-NEXT: and a1, a2, a1
-; RV32IZFH-NEXT: xori a1, a1, 1
-; RV32IZFH-NEXT: or a0, a0, a1
+; RV32IZFH-NEXT: flt.h a0, fa0, fa1
+; RV32IZFH-NEXT: flt.h a1, fa1, fa0
+; RV32IZFH-NEXT: or a0, a1, a0
+; RV32IZFH-NEXT: xori a0, a0, 1
; RV32IZFH-NEXT: bnez a0, .LBB8_2
; RV32IZFH-NEXT: # %bb.1:
; RV32IZFH-NEXT: fmv.h fa0, fa1
;
; RV64IZFH-LABEL: select_fcmp_ueq:
; RV64IZFH: # %bb.0:
-; RV64IZFH-NEXT: feq.h a0, fa0, fa1
-; RV64IZFH-NEXT: feq.h a1, fa1, fa1
-; RV64IZFH-NEXT: feq.h a2, fa0, fa0
-; RV64IZFH-NEXT: and a1, a2, a1
-; RV64IZFH-NEXT: xori a1, a1, 1
-; RV64IZFH-NEXT: or a0, a0, a1
+; RV64IZFH-NEXT: flt.h a0, fa0, fa1
+; RV64IZFH-NEXT: flt.h a1, fa1, fa0
+; RV64IZFH-NEXT: or a0, a1, a0
+; RV64IZFH-NEXT: xori a0, a0, 1
; RV64IZFH-NEXT: bnez a0, .LBB8_2
; RV64IZFH-NEXT: # %bb.1:
; RV64IZFH-NEXT: fmv.h fa0, fa1
; CHECK-LABEL: fcmp_one_vv_nxv8f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu
-; CHECK-NEXT: vmfne.vv v25, v16, v18
-; CHECK-NEXT: vmfeq.vv v26, v18, v18
-; CHECK-NEXT: vmfeq.vv v27, v16, v16
+; CHECK-NEXT: vmflt.vv v25, v16, v18
+; CHECK-NEXT: vmflt.vv v26, v18, v16
; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu
-; CHECK-NEXT: vmand.mm v26, v27, v26
-; CHECK-NEXT: vmand.mm v0, v25, v26
+; CHECK-NEXT: vmor.mm v0, v26, v25
; CHECK-NEXT: ret
%vc = fcmp one <vscale x 8 x half> %va, %vb
ret <vscale x 8 x i1> %vc
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $f10_h killed $f10_h def $f10_f
; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu
-; CHECK-NEXT: vfmv.v.f v26, fa0
-; CHECK-NEXT: vmfne.vf v25, v16, fa0
-; CHECK-NEXT: vmfeq.vf v28, v26, fa0
-; CHECK-NEXT: vmfeq.vv v26, v16, v16
+; CHECK-NEXT: vmflt.vf v25, v16, fa0
+; CHECK-NEXT: vmfgt.vf v26, v16, fa0
; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu
-; CHECK-NEXT: vmand.mm v26, v26, v28
-; CHECK-NEXT: vmand.mm v0, v25, v26
+; CHECK-NEXT: vmor.mm v0, v26, v25
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x half> undef, half %b, i32 0
%splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $f10_h killed $f10_h def $f10_f
; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu
-; CHECK-NEXT: vfmv.v.f v26, fa0
-; CHECK-NEXT: vmfne.vf v25, v16, fa0
-; CHECK-NEXT: vmfeq.vf v28, v26, fa0
-; CHECK-NEXT: vmfeq.vv v26, v16, v16
+; CHECK-NEXT: vmfgt.vf v25, v16, fa0
+; CHECK-NEXT: vmflt.vf v26, v16, fa0
; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu
-; CHECK-NEXT: vmand.mm v26, v28, v26
-; CHECK-NEXT: vmand.mm v0, v25, v26
+; CHECK-NEXT: vmor.mm v0, v26, v25
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x half> undef, half %b, i32 0
%splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
; CHECK-LABEL: fcmp_ueq_vv_nxv8f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu
-; CHECK-NEXT: vmfeq.vv v25, v16, v18
-; CHECK-NEXT: vmfne.vv v26, v18, v18
-; CHECK-NEXT: vmfne.vv v27, v16, v16
+; CHECK-NEXT: vmflt.vv v25, v16, v18
+; CHECK-NEXT: vmflt.vv v26, v18, v16
; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu
-; CHECK-NEXT: vmor.mm v26, v27, v26
-; CHECK-NEXT: vmor.mm v0, v25, v26
+; CHECK-NEXT: vmnor.mm v0, v26, v25
; CHECK-NEXT: ret
%vc = fcmp ueq <vscale x 8 x half> %va, %vb
ret <vscale x 8 x i1> %vc
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $f10_h killed $f10_h def $f10_f
; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu
-; CHECK-NEXT: vfmv.v.f v26, fa0
-; CHECK-NEXT: vmfeq.vf v25, v16, fa0
-; CHECK-NEXT: vmfne.vf v28, v26, fa0
-; CHECK-NEXT: vmfne.vv v26, v16, v16
+; CHECK-NEXT: vmflt.vf v25, v16, fa0
+; CHECK-NEXT: vmfgt.vf v26, v16, fa0
; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu
-; CHECK-NEXT: vmor.mm v26, v26, v28
-; CHECK-NEXT: vmor.mm v0, v25, v26
+; CHECK-NEXT: vmnor.mm v0, v26, v25
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x half> undef, half %b, i32 0
%splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $f10_h killed $f10_h def $f10_f
; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu
-; CHECK-NEXT: vfmv.v.f v26, fa0
-; CHECK-NEXT: vmfeq.vf v25, v16, fa0
-; CHECK-NEXT: vmfne.vf v28, v26, fa0
-; CHECK-NEXT: vmfne.vv v26, v16, v16
+; CHECK-NEXT: vmfgt.vf v25, v16, fa0
+; CHECK-NEXT: vmflt.vf v26, v16, fa0
; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu
-; CHECK-NEXT: vmor.mm v26, v28, v26
-; CHECK-NEXT: vmor.mm v0, v25, v26
+; CHECK-NEXT: vmnor.mm v0, v26, v25
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x half> undef, half %b, i32 0
%splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
; CHECK-LABEL: fcmp_one_vv_nxv8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu
-; CHECK-NEXT: vmfne.vv v25, v16, v20
-; CHECK-NEXT: vmfeq.vv v26, v20, v20
-; CHECK-NEXT: vmfeq.vv v27, v16, v16
+; CHECK-NEXT: vmflt.vv v25, v16, v20
+; CHECK-NEXT: vmflt.vv v26, v20, v16
; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu
-; CHECK-NEXT: vmand.mm v26, v27, v26
-; CHECK-NEXT: vmand.mm v0, v25, v26
+; CHECK-NEXT: vmor.mm v0, v26, v25
; CHECK-NEXT: ret
%vc = fcmp one <vscale x 8 x float> %va, %vb
ret <vscale x 8 x i1> %vc
; CHECK-LABEL: fcmp_one_vf_nxv8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu
-; CHECK-NEXT: vfmv.v.f v28, fa0
-; CHECK-NEXT: vmfne.vf v25, v16, fa0
-; CHECK-NEXT: vmfeq.vf v26, v28, fa0
-; CHECK-NEXT: vmfeq.vv v27, v16, v16
+; CHECK-NEXT: vmflt.vf v25, v16, fa0
+; CHECK-NEXT: vmfgt.vf v26, v16, fa0
; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu
-; CHECK-NEXT: vmand.mm v26, v27, v26
-; CHECK-NEXT: vmand.mm v0, v25, v26
+; CHECK-NEXT: vmor.mm v0, v26, v25
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x float> undef, float %b, i32 0
%splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> undef, <vscale x 8 x i32> zeroinitializer
; CHECK-LABEL: fcmp_one_fv_nxv8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu
-; CHECK-NEXT: vfmv.v.f v28, fa0
-; CHECK-NEXT: vmfne.vf v25, v16, fa0
-; CHECK-NEXT: vmfeq.vf v26, v28, fa0
-; CHECK-NEXT: vmfeq.vv v27, v16, v16
+; CHECK-NEXT: vmfgt.vf v25, v16, fa0
+; CHECK-NEXT: vmflt.vf v26, v16, fa0
; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu
-; CHECK-NEXT: vmand.mm v26, v26, v27
-; CHECK-NEXT: vmand.mm v0, v25, v26
+; CHECK-NEXT: vmor.mm v0, v26, v25
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x float> undef, float %b, i32 0
%splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> undef, <vscale x 8 x i32> zeroinitializer
; CHECK-LABEL: fcmp_ueq_vv_nxv8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu
-; CHECK-NEXT: vmfeq.vv v25, v16, v20
-; CHECK-NEXT: vmfne.vv v26, v20, v20
-; CHECK-NEXT: vmfne.vv v27, v16, v16
+; CHECK-NEXT: vmflt.vv v25, v16, v20
+; CHECK-NEXT: vmflt.vv v26, v20, v16
; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu
-; CHECK-NEXT: vmor.mm v26, v27, v26
-; CHECK-NEXT: vmor.mm v0, v25, v26
+; CHECK-NEXT: vmnor.mm v0, v26, v25
; CHECK-NEXT: ret
%vc = fcmp ueq <vscale x 8 x float> %va, %vb
ret <vscale x 8 x i1> %vc
; CHECK-LABEL: fcmp_ueq_vf_nxv8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu
-; CHECK-NEXT: vfmv.v.f v28, fa0
-; CHECK-NEXT: vmfeq.vf v25, v16, fa0
-; CHECK-NEXT: vmfne.vf v26, v28, fa0
-; CHECK-NEXT: vmfne.vv v27, v16, v16
+; CHECK-NEXT: vmflt.vf v25, v16, fa0
+; CHECK-NEXT: vmfgt.vf v26, v16, fa0
; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu
-; CHECK-NEXT: vmor.mm v26, v27, v26
-; CHECK-NEXT: vmor.mm v0, v25, v26
+; CHECK-NEXT: vmnor.mm v0, v26, v25
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x float> undef, float %b, i32 0
%splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> undef, <vscale x 8 x i32> zeroinitializer
; CHECK-LABEL: fcmp_ueq_fv_nxv8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu
-; CHECK-NEXT: vfmv.v.f v28, fa0
-; CHECK-NEXT: vmfeq.vf v25, v16, fa0
-; CHECK-NEXT: vmfne.vf v26, v28, fa0
-; CHECK-NEXT: vmfne.vv v27, v16, v16
+; CHECK-NEXT: vmfgt.vf v25, v16, fa0
+; CHECK-NEXT: vmflt.vf v26, v16, fa0
; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu
-; CHECK-NEXT: vmor.mm v26, v26, v27
-; CHECK-NEXT: vmor.mm v0, v25, v26
+; CHECK-NEXT: vmnor.mm v0, v26, v25
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x float> undef, float %b, i32 0
%splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> undef, <vscale x 8 x i32> zeroinitializer
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e64,m8,ta,mu
; CHECK-NEXT: vle64.v v8, (a0)
-; CHECK-NEXT: vmfne.vv v25, v16, v8
-; CHECK-NEXT: vmfeq.vv v26, v16, v16
-; CHECK-NEXT: vmfeq.vv v27, v8, v8
+; CHECK-NEXT: vmflt.vv v25, v16, v8
+; CHECK-NEXT: vmflt.vv v26, v8, v16
; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu
-; CHECK-NEXT: vmand.mm v26, v26, v27
-; CHECK-NEXT: vmand.mm v0, v25, v26
+; CHECK-NEXT: vmor.mm v0, v26, v25
; CHECK-NEXT: ret
%vc = fcmp one <vscale x 8 x double> %va, %vb
ret <vscale x 8 x i1> %vc
; CHECK-LABEL: fcmp_one_vf_nxv8f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
-; CHECK-NEXT: vfmv.v.f v8, fa0
-; CHECK-NEXT: vmfne.vf v25, v16, fa0
-; CHECK-NEXT: vmfeq.vf v26, v8, fa0
-; CHECK-NEXT: vmfeq.vv v27, v16, v16
+; CHECK-NEXT: vmflt.vf v25, v16, fa0
+; CHECK-NEXT: vmfgt.vf v26, v16, fa0
; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu
-; CHECK-NEXT: vmand.mm v26, v27, v26
-; CHECK-NEXT: vmand.mm v0, v25, v26
+; CHECK-NEXT: vmor.mm v0, v26, v25
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x double> undef, double %b, i32 0
%splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> undef, <vscale x 8 x i32> zeroinitializer
; CHECK-LABEL: fcmp_one_fv_nxv8f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
-; CHECK-NEXT: vfmv.v.f v8, fa0
-; CHECK-NEXT: vmfne.vf v25, v16, fa0
-; CHECK-NEXT: vmfeq.vf v26, v8, fa0
-; CHECK-NEXT: vmfeq.vv v27, v16, v16
+; CHECK-NEXT: vmfgt.vf v25, v16, fa0
+; CHECK-NEXT: vmflt.vf v26, v16, fa0
; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu
-; CHECK-NEXT: vmand.mm v26, v26, v27
-; CHECK-NEXT: vmand.mm v0, v25, v26
+; CHECK-NEXT: vmor.mm v0, v26, v25
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x double> undef, double %b, i32 0
%splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> undef, <vscale x 8 x i32> zeroinitializer
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e64,m8,ta,mu
; CHECK-NEXT: vle64.v v8, (a0)
-; CHECK-NEXT: vmfeq.vv v25, v16, v8
-; CHECK-NEXT: vmfne.vv v26, v16, v16
-; CHECK-NEXT: vmfne.vv v27, v8, v8
+; CHECK-NEXT: vmflt.vv v25, v16, v8
+; CHECK-NEXT: vmflt.vv v26, v8, v16
; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu
-; CHECK-NEXT: vmor.mm v26, v26, v27
-; CHECK-NEXT: vmor.mm v0, v25, v26
+; CHECK-NEXT: vmnor.mm v0, v26, v25
; CHECK-NEXT: ret
%vc = fcmp ueq <vscale x 8 x double> %va, %vb
ret <vscale x 8 x i1> %vc
; CHECK-LABEL: fcmp_ueq_vf_nxv8f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
-; CHECK-NEXT: vfmv.v.f v8, fa0
-; CHECK-NEXT: vmfeq.vf v25, v16, fa0
-; CHECK-NEXT: vmfne.vf v26, v8, fa0
-; CHECK-NEXT: vmfne.vv v27, v16, v16
+; CHECK-NEXT: vmflt.vf v25, v16, fa0
+; CHECK-NEXT: vmfgt.vf v26, v16, fa0
; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu
-; CHECK-NEXT: vmor.mm v26, v27, v26
-; CHECK-NEXT: vmor.mm v0, v25, v26
+; CHECK-NEXT: vmnor.mm v0, v26, v25
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x double> undef, double %b, i32 0
%splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> undef, <vscale x 8 x i32> zeroinitializer
; CHECK-LABEL: fcmp_ueq_fv_nxv8f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
-; CHECK-NEXT: vfmv.v.f v8, fa0
-; CHECK-NEXT: vmfeq.vf v25, v16, fa0
-; CHECK-NEXT: vmfne.vf v26, v8, fa0
-; CHECK-NEXT: vmfne.vv v27, v16, v16
+; CHECK-NEXT: vmfgt.vf v25, v16, fa0
+; CHECK-NEXT: vmflt.vf v26, v16, fa0
; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu
-; CHECK-NEXT: vmor.mm v26, v26, v27
-; CHECK-NEXT: vmor.mm v0, v25, v26
+; CHECK-NEXT: vmnor.mm v0, v26, v25
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x double> undef, double %b, i32 0
%splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> undef, <vscale x 8 x i32> zeroinitializer
; CHECK-LABEL: fcmp_one_vv_nxv8f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu
-; CHECK-NEXT: vmfne.vv v25, v16, v18
-; CHECK-NEXT: vmfeq.vv v26, v18, v18
-; CHECK-NEXT: vmfeq.vv v27, v16, v16
+; CHECK-NEXT: vmflt.vv v25, v16, v18
+; CHECK-NEXT: vmflt.vv v26, v18, v16
; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu
-; CHECK-NEXT: vmand.mm v26, v27, v26
-; CHECK-NEXT: vmand.mm v0, v25, v26
+; CHECK-NEXT: vmor.mm v0, v26, v25
; CHECK-NEXT: ret
%vc = fcmp one <vscale x 8 x half> %va, %vb
ret <vscale x 8 x i1> %vc
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $f10_h killed $f10_h def $f10_f
; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu
-; CHECK-NEXT: vfmv.v.f v26, fa0
-; CHECK-NEXT: vmfne.vf v25, v16, fa0
-; CHECK-NEXT: vmfeq.vf v28, v26, fa0
-; CHECK-NEXT: vmfeq.vv v26, v16, v16
+; CHECK-NEXT: vmflt.vf v25, v16, fa0
+; CHECK-NEXT: vmfgt.vf v26, v16, fa0
; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu
-; CHECK-NEXT: vmand.mm v26, v26, v28
-; CHECK-NEXT: vmand.mm v0, v25, v26
+; CHECK-NEXT: vmor.mm v0, v26, v25
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x half> undef, half %b, i32 0
%splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $f10_h killed $f10_h def $f10_f
; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu
-; CHECK-NEXT: vfmv.v.f v26, fa0
-; CHECK-NEXT: vmfne.vf v25, v16, fa0
-; CHECK-NEXT: vmfeq.vf v28, v26, fa0
-; CHECK-NEXT: vmfeq.vv v26, v16, v16
+; CHECK-NEXT: vmfgt.vf v25, v16, fa0
+; CHECK-NEXT: vmflt.vf v26, v16, fa0
; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu
-; CHECK-NEXT: vmand.mm v26, v28, v26
-; CHECK-NEXT: vmand.mm v0, v25, v26
+; CHECK-NEXT: vmor.mm v0, v26, v25
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x half> undef, half %b, i32 0
%splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
; CHECK-LABEL: fcmp_ueq_vv_nxv8f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu
-; CHECK-NEXT: vmfeq.vv v25, v16, v18
-; CHECK-NEXT: vmfne.vv v26, v18, v18
-; CHECK-NEXT: vmfne.vv v27, v16, v16
+; CHECK-NEXT: vmflt.vv v25, v16, v18
+; CHECK-NEXT: vmflt.vv v26, v18, v16
; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu
-; CHECK-NEXT: vmor.mm v26, v27, v26
-; CHECK-NEXT: vmor.mm v0, v25, v26
+; CHECK-NEXT: vmnor.mm v0, v26, v25
; CHECK-NEXT: ret
%vc = fcmp ueq <vscale x 8 x half> %va, %vb
ret <vscale x 8 x i1> %vc
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $f10_h killed $f10_h def $f10_f
; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu
-; CHECK-NEXT: vfmv.v.f v26, fa0
-; CHECK-NEXT: vmfeq.vf v25, v16, fa0
-; CHECK-NEXT: vmfne.vf v28, v26, fa0
-; CHECK-NEXT: vmfne.vv v26, v16, v16
+; CHECK-NEXT: vmflt.vf v25, v16, fa0
+; CHECK-NEXT: vmfgt.vf v26, v16, fa0
; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu
-; CHECK-NEXT: vmor.mm v26, v26, v28
-; CHECK-NEXT: vmor.mm v0, v25, v26
+; CHECK-NEXT: vmnor.mm v0, v26, v25
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x half> undef, half %b, i32 0
%splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $f10_h killed $f10_h def $f10_f
; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu
-; CHECK-NEXT: vfmv.v.f v26, fa0
-; CHECK-NEXT: vmfeq.vf v25, v16, fa0
-; CHECK-NEXT: vmfne.vf v28, v26, fa0
-; CHECK-NEXT: vmfne.vv v26, v16, v16
+; CHECK-NEXT: vmfgt.vf v25, v16, fa0
+; CHECK-NEXT: vmflt.vf v26, v16, fa0
; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu
-; CHECK-NEXT: vmor.mm v26, v28, v26
-; CHECK-NEXT: vmor.mm v0, v25, v26
+; CHECK-NEXT: vmnor.mm v0, v26, v25
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x half> undef, half %b, i32 0
%splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
; CHECK-LABEL: fcmp_one_vv_nxv8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu
-; CHECK-NEXT: vmfne.vv v25, v16, v20
-; CHECK-NEXT: vmfeq.vv v26, v20, v20
-; CHECK-NEXT: vmfeq.vv v27, v16, v16
+; CHECK-NEXT: vmflt.vv v25, v16, v20
+; CHECK-NEXT: vmflt.vv v26, v20, v16
; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu
-; CHECK-NEXT: vmand.mm v26, v27, v26
-; CHECK-NEXT: vmand.mm v0, v25, v26
+; CHECK-NEXT: vmor.mm v0, v26, v25
; CHECK-NEXT: ret
%vc = fcmp one <vscale x 8 x float> %va, %vb
ret <vscale x 8 x i1> %vc
; CHECK-LABEL: fcmp_one_vf_nxv8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu
-; CHECK-NEXT: vfmv.v.f v28, fa0
-; CHECK-NEXT: vmfne.vf v25, v16, fa0
-; CHECK-NEXT: vmfeq.vf v26, v28, fa0
-; CHECK-NEXT: vmfeq.vv v27, v16, v16
+; CHECK-NEXT: vmflt.vf v25, v16, fa0
+; CHECK-NEXT: vmfgt.vf v26, v16, fa0
; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu
-; CHECK-NEXT: vmand.mm v26, v27, v26
-; CHECK-NEXT: vmand.mm v0, v25, v26
+; CHECK-NEXT: vmor.mm v0, v26, v25
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x float> undef, float %b, i32 0
%splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> undef, <vscale x 8 x i32> zeroinitializer
; CHECK-LABEL: fcmp_one_fv_nxv8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu
-; CHECK-NEXT: vfmv.v.f v28, fa0
-; CHECK-NEXT: vmfne.vf v25, v16, fa0
-; CHECK-NEXT: vmfeq.vf v26, v28, fa0
-; CHECK-NEXT: vmfeq.vv v27, v16, v16
+; CHECK-NEXT: vmfgt.vf v25, v16, fa0
+; CHECK-NEXT: vmflt.vf v26, v16, fa0
; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu
-; CHECK-NEXT: vmand.mm v26, v26, v27
-; CHECK-NEXT: vmand.mm v0, v25, v26
+; CHECK-NEXT: vmor.mm v0, v26, v25
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x float> undef, float %b, i32 0
%splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> undef, <vscale x 8 x i32> zeroinitializer
; CHECK-LABEL: fcmp_ueq_vv_nxv8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu
-; CHECK-NEXT: vmfeq.vv v25, v16, v20
-; CHECK-NEXT: vmfne.vv v26, v20, v20
-; CHECK-NEXT: vmfne.vv v27, v16, v16
+; CHECK-NEXT: vmflt.vv v25, v16, v20
+; CHECK-NEXT: vmflt.vv v26, v20, v16
; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu
-; CHECK-NEXT: vmor.mm v26, v27, v26
-; CHECK-NEXT: vmor.mm v0, v25, v26
+; CHECK-NEXT: vmnor.mm v0, v26, v25
; CHECK-NEXT: ret
%vc = fcmp ueq <vscale x 8 x float> %va, %vb
ret <vscale x 8 x i1> %vc
; CHECK-LABEL: fcmp_ueq_vf_nxv8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu
-; CHECK-NEXT: vfmv.v.f v28, fa0
-; CHECK-NEXT: vmfeq.vf v25, v16, fa0
-; CHECK-NEXT: vmfne.vf v26, v28, fa0
-; CHECK-NEXT: vmfne.vv v27, v16, v16
+; CHECK-NEXT: vmflt.vf v25, v16, fa0
+; CHECK-NEXT: vmfgt.vf v26, v16, fa0
; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu
-; CHECK-NEXT: vmor.mm v26, v27, v26
-; CHECK-NEXT: vmor.mm v0, v25, v26
+; CHECK-NEXT: vmnor.mm v0, v26, v25
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x float> undef, float %b, i32 0
%splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> undef, <vscale x 8 x i32> zeroinitializer
; CHECK-LABEL: fcmp_ueq_fv_nxv8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu
-; CHECK-NEXT: vfmv.v.f v28, fa0
-; CHECK-NEXT: vmfeq.vf v25, v16, fa0
-; CHECK-NEXT: vmfne.vf v26, v28, fa0
-; CHECK-NEXT: vmfne.vv v27, v16, v16
+; CHECK-NEXT: vmfgt.vf v25, v16, fa0
+; CHECK-NEXT: vmflt.vf v26, v16, fa0
; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu
-; CHECK-NEXT: vmor.mm v26, v26, v27
-; CHECK-NEXT: vmor.mm v0, v25, v26
+; CHECK-NEXT: vmnor.mm v0, v26, v25
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x float> undef, float %b, i32 0
%splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> undef, <vscale x 8 x i32> zeroinitializer
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e64,m8,ta,mu
; CHECK-NEXT: vle64.v v8, (a0)
-; CHECK-NEXT: vmfne.vv v25, v16, v8
-; CHECK-NEXT: vmfeq.vv v26, v16, v16
-; CHECK-NEXT: vmfeq.vv v27, v8, v8
+; CHECK-NEXT: vmflt.vv v25, v16, v8
+; CHECK-NEXT: vmflt.vv v26, v8, v16
; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu
-; CHECK-NEXT: vmand.mm v26, v26, v27
-; CHECK-NEXT: vmand.mm v0, v25, v26
+; CHECK-NEXT: vmor.mm v0, v26, v25
; CHECK-NEXT: ret
%vc = fcmp one <vscale x 8 x double> %va, %vb
ret <vscale x 8 x i1> %vc
; CHECK-LABEL: fcmp_one_vf_nxv8f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
-; CHECK-NEXT: vfmv.v.f v8, fa0
-; CHECK-NEXT: vmfne.vf v25, v16, fa0
-; CHECK-NEXT: vmfeq.vf v26, v8, fa0
-; CHECK-NEXT: vmfeq.vv v27, v16, v16
+; CHECK-NEXT: vmflt.vf v25, v16, fa0
+; CHECK-NEXT: vmfgt.vf v26, v16, fa0
; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu
-; CHECK-NEXT: vmand.mm v26, v27, v26
-; CHECK-NEXT: vmand.mm v0, v25, v26
+; CHECK-NEXT: vmor.mm v0, v26, v25
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x double> undef, double %b, i32 0
%splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> undef, <vscale x 8 x i32> zeroinitializer
; CHECK-LABEL: fcmp_one_fv_nxv8f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
-; CHECK-NEXT: vfmv.v.f v8, fa0
-; CHECK-NEXT: vmfne.vf v25, v16, fa0
-; CHECK-NEXT: vmfeq.vf v26, v8, fa0
-; CHECK-NEXT: vmfeq.vv v27, v16, v16
+; CHECK-NEXT: vmfgt.vf v25, v16, fa0
+; CHECK-NEXT: vmflt.vf v26, v16, fa0
; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu
-; CHECK-NEXT: vmand.mm v26, v26, v27
-; CHECK-NEXT: vmand.mm v0, v25, v26
+; CHECK-NEXT: vmor.mm v0, v26, v25
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x double> undef, double %b, i32 0
%splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> undef, <vscale x 8 x i32> zeroinitializer
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e64,m8,ta,mu
; CHECK-NEXT: vle64.v v8, (a0)
-; CHECK-NEXT: vmfeq.vv v25, v16, v8
-; CHECK-NEXT: vmfne.vv v26, v16, v16
-; CHECK-NEXT: vmfne.vv v27, v8, v8
+; CHECK-NEXT: vmflt.vv v25, v16, v8
+; CHECK-NEXT: vmflt.vv v26, v8, v16
; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu
-; CHECK-NEXT: vmor.mm v26, v26, v27
-; CHECK-NEXT: vmor.mm v0, v25, v26
+; CHECK-NEXT: vmnor.mm v0, v26, v25
; CHECK-NEXT: ret
%vc = fcmp ueq <vscale x 8 x double> %va, %vb
ret <vscale x 8 x i1> %vc
; CHECK-LABEL: fcmp_ueq_vf_nxv8f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
-; CHECK-NEXT: vfmv.v.f v8, fa0
-; CHECK-NEXT: vmfeq.vf v25, v16, fa0
-; CHECK-NEXT: vmfne.vf v26, v8, fa0
-; CHECK-NEXT: vmfne.vv v27, v16, v16
+; CHECK-NEXT: vmflt.vf v25, v16, fa0
+; CHECK-NEXT: vmfgt.vf v26, v16, fa0
; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu
-; CHECK-NEXT: vmor.mm v26, v27, v26
-; CHECK-NEXT: vmor.mm v0, v25, v26
+; CHECK-NEXT: vmnor.mm v0, v26, v25
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x double> undef, double %b, i32 0
%splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> undef, <vscale x 8 x i32> zeroinitializer
; CHECK-LABEL: fcmp_ueq_fv_nxv8f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
-; CHECK-NEXT: vfmv.v.f v8, fa0
-; CHECK-NEXT: vmfeq.vf v25, v16, fa0
-; CHECK-NEXT: vmfne.vf v26, v8, fa0
-; CHECK-NEXT: vmfne.vv v27, v16, v16
+; CHECK-NEXT: vmfgt.vf v25, v16, fa0
+; CHECK-NEXT: vmflt.vf v26, v16, fa0
; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu
-; CHECK-NEXT: vmor.mm v26, v26, v27
-; CHECK-NEXT: vmor.mm v0, v25, v26
+; CHECK-NEXT: vmnor.mm v0, v26, v25
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x double> undef, double %b, i32 0
%splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> undef, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: .functype ueq_f32 (f32, f32) -> (i32){{$}}
; CHECK-NEXT: local.get $push[[L0:[0-9]+]]=, 0{{$}}
; CHECK-NEXT: local.get $push[[L1:[0-9]+]]=, 1{{$}}
-; CHECK-NEXT: f32.eq $push[[NUM0:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
+; CHECK-NEXT: f32.gt $push[[NUM0:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
; CHECK-NEXT: local.get $push[[L2:[0-9]+]]=, 0{{$}}
-; CHECK-NEXT: local.get $push[[L3:[0-9]+]]=, 0{{$}}
-; CHECK-NEXT: f32.ne $push[[NUM1:[0-9]+]]=, $pop[[L2]], $pop[[L3]]{{$}}
-; CHECK-NEXT: local.get $push[[L4:[0-9]+]]=, 1{{$}}
-; CHECK-NEXT: local.get $push[[L5:[0-9]+]]=, 1{{$}}
-; CHECK-NEXT: f32.ne $push[[NUM2:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
-; CHECK-NEXT: i32.or $push[[NUM3:[0-9]+]]=, $pop[[NUM1]], $pop[[NUM2]]{{$}}
-; CHECK-NEXT: i32.or $push[[NUM4:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM3]]{{$}}
-; CHECK-NEXT: return $pop[[NUM4]]{{$}}
+; CHECK-NEXT: local.get $push[[L3:[0-9]+]]=, 1{{$}}
+; CHECK-NEXT: f32.lt $push[[NUM1:[0-9]+]]=, $pop[[L2]], $pop[[L3]]{{$}}
+; CHECK-NEXT: i32.or $push[[NUM2:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM1]]{{$}}
+; CHECK-NEXT: i32.const $push[[C0:[0-9]+]]=, 1
+; CHECK-NEXT: i32.xor $push[[NUM3:[0-9]+]]=, $pop[[NUM2]], $pop[[C0]]{{$}}
+; CHECK-NEXT: return $pop[[NUM3]]{{$}}
define i32 @ueq_f32(float %x, float %y) {
%a = fcmp ueq float %x, %y
%b = zext i1 %a to i32
; CHECK-NEXT: .functype one_f32 (f32, f32) -> (i32){{$}}
; CHECK-NEXT: local.get $push[[L0:[0-9]+]]=, 0{{$}}
; CHECK-NEXT: local.get $push[[L1:[0-9]+]]=, 1{{$}}
-; CHECK-NEXT: f32.ne $push[[NUM0:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
+; CHECK-NEXT: f32.gt $push[[NUM0:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
; CHECK-NEXT: local.get $push[[L2:[0-9]+]]=, 0{{$}}
-; CHECK-NEXT: local.get $push[[L3:[0-9]+]]=, 0{{$}}
-; CHECK-NEXT: f32.eq $push[[NUM1:[0-9]+]]=, $pop[[L2]], $pop[[L3]]{{$}}
-; CHECK-NEXT: local.get $push[[L4:[0-9]+]]=, 1{{$}}
-; CHECK-NEXT: local.get $push[[L5:[0-9]+]]=, 1{{$}}
-; CHECK-NEXT: f32.eq $push[[NUM2:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
-; CHECK-NEXT: i32.and $push[[NUM3:[0-9]+]]=, $pop[[NUM1]], $pop[[NUM2]]{{$}}
-; CHECK-NEXT: i32.and $push[[NUM4:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM3]]{{$}}
+; CHECK-NEXT: local.get $push[[L3:[0-9]+]]=, 1{{$}}
+; CHECK-NEXT: f32.lt $push[[NUM1:[0-9]+]]=, $pop[[L2]], $pop[[L3]]{{$}}
+; CHECK-NEXT: i32.or $push[[NUM4:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM1]]{{$}}
; CHECK-NEXT: return $pop[[NUM4]]
define i32 @one_f32(float %x, float %y) {
%a = fcmp one float %x, %y
; CHECK-NEXT: .functype ueq_f64 (f64, f64) -> (i32){{$}}
; CHECK-NEXT: local.get $push[[L0:[0-9]+]]=, 0{{$}}
; CHECK-NEXT: local.get $push[[L1:[0-9]+]]=, 1{{$}}
-; CHECK-NEXT: f64.eq $push[[NUM0:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
+; CHECK-NEXT: f64.gt $push[[NUM0:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
; CHECK-NEXT: local.get $push[[L2:[0-9]+]]=, 0{{$}}
-; CHECK-NEXT: local.get $push[[L3:[0-9]+]]=, 0{{$}}
-; CHECK-NEXT: f64.ne $push[[NUM1:[0-9]+]]=, $pop[[L2]], $pop[[L3]]{{$}}
-; CHECK-NEXT: local.get $push[[L4:[0-9]+]]=, 1{{$}}
-; CHECK-NEXT: local.get $push[[L5:[0-9]+]]=, 1{{$}}
-; CHECK-NEXT: f64.ne $push[[NUM2:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
-; CHECK-NEXT: i32.or $push[[NUM3:[0-9]+]]=, $pop[[NUM1]], $pop[[NUM2]]{{$}}
-; CHECK-NEXT: i32.or $push[[NUM4:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM3]]{{$}}
-; CHECK-NEXT: return $pop[[NUM4]]{{$}}
+; CHECK-NEXT: local.get $push[[L3:[0-9]+]]=, 1{{$}}
+; CHECK-NEXT: f64.lt $push[[NUM1:[0-9]+]]=, $pop[[L2]], $pop[[L3]]{{$}}
+; CHECK-NEXT: i32.or $push[[NUM2:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM1]]{{$}}
+; CHECK-NEXT: i32.const $push[[C0:[0-9]+]]=, 1
+; CHECK-NEXT: i32.xor $push[[NUM3:[0-9]+]]=, $pop[[NUM2]], $pop[[C0]]{{$}}
+; CHECK-NEXT: return $pop[[NUM3]]{{$}}
define i32 @ueq_f64(double %x, double %y) {
%a = fcmp ueq double %x, %y
%b = zext i1 %a to i32
; CHECK-NEXT: .functype one_f64 (f64, f64) -> (i32){{$}}
; CHECK-NEXT: local.get $push[[L0:[0-9]+]]=, 0{{$}}
; CHECK-NEXT: local.get $push[[L1:[0-9]+]]=, 1{{$}}
-; CHECK-NEXT: f64.ne $push[[NUM0:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
+; CHECK-NEXT: f64.gt $push[[NUM0:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
; CHECK-NEXT: local.get $push[[L2:[0-9]+]]=, 0{{$}}
-; CHECK-NEXT: local.get $push[[L3:[0-9]+]]=, 0{{$}}
-; CHECK-NEXT: f64.eq $push[[NUM1:[0-9]+]]=, $pop[[L2]], $pop[[L3]]{{$}}
-; CHECK-NEXT: local.get $push[[L4:[0-9]+]]=, 1{{$}}
-; CHECK-NEXT: local.get $push[[L5:[0-9]+]]=, 1{{$}}
-; CHECK-NEXT: f64.eq $push[[NUM2:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
-; CHECK-NEXT: i32.and $push[[NUM3:[0-9]+]]=, $pop[[NUM1]], $pop[[NUM2]]{{$}}
-; CHECK-NEXT: i32.and $push[[NUM4:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM3]]{{$}}
+; CHECK-NEXT: local.get $push[[L3:[0-9]+]]=, 1{{$}}
+; CHECK-NEXT: f64.lt $push[[NUM1:[0-9]+]]=, $pop[[L2]], $pop[[L3]]{{$}}
+; CHECK-NEXT: i32.or $push[[NUM4:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM1]]{{$}}
; CHECK-NEXT: return $pop[[NUM4]]
define i32 @one_f64(double %x, double %y) {
%a = fcmp one double %x, %y
; CHECK-LABEL: compare_one_v4f32:
; NO-SIMD128-NOT: f32x4
; SIMD128-NEXT: .functype compare_one_v4f32 (v128, v128) -> (v128){{$}}
-; SIMD128-NEXT: f32x4.ne $push[[T0:[0-9]+]]=, $0, $1{{$}}
-; SIMD128-NEXT: f32x4.eq $push[[T1:[0-9]+]]=, $0, $0{{$}}
-; SIMD128-NEXT: f32x4.eq $push[[T2:[0-9]+]]=, $1, $1{{$}}
-; SIMD128-NEXT: v128.and $push[[T3:[0-9]+]]=, $pop[[T1]], $pop[[T2]]{{$}}
-; SIMD128-NEXT: v128.and $push[[R:[0-9]+]]=, $pop[[T0]], $pop[[T3]]{{$}}
+; SIMD128-NEXT: f32x4.gt $push[[T0:[0-9]+]]=, $0, $1{{$}}
+; SIMD128-NEXT: f32x4.lt $push[[T1:[0-9]+]]=, $0, $1{{$}}
+; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $pop[[T0]], $pop[[T1]]{{$}}
; SIMD128-NEXT: return $pop[[R]]{{$}}
define <4 x i1> @compare_one_v4f32 (<4 x float> %x, <4 x float> %y) {
%res = fcmp one <4 x float> %x, %y
; CHECK-LABEL: compare_sext_one_v4f32:
; NO-SIMD128-NOT: f32x4
; SIMD128-NEXT: .functype compare_sext_one_v4f32 (v128, v128) -> (v128){{$}}
-; SIMD128-NEXT: f32x4.ne $push[[T0:[0-9]+]]=, $0, $1{{$}}
-; SIMD128-NEXT: f32x4.eq $push[[T1:[0-9]+]]=, $0, $0{{$}}
-; SIMD128-NEXT: f32x4.eq $push[[T2:[0-9]+]]=, $1, $1{{$}}
-; SIMD128-NEXT: v128.and $push[[T3:[0-9]+]]=, $pop[[T1]], $pop[[T2]]{{$}}
-; SIMD128-NEXT: v128.and $push[[R:[0-9]+]]=, $pop[[T0]], $pop[[T3]]{{$}}
+; SIMD128-NEXT: f32x4.gt $push[[T0:[0-9]+]]=, $0, $1{{$}}
+; SIMD128-NEXT: f32x4.lt $push[[T1:[0-9]+]]=, $0, $1{{$}}
+; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $pop[[T0]], $pop[[T1]]{{$}}
; SIMD128-NEXT: return $pop[[R]]{{$}}
define <4 x i32> @compare_sext_one_v4f32 (<4 x float> %x, <4 x float> %y) {
%cmp = fcmp one <4 x float> %x, %y
; CHECK-LABEL: compare_ueq_v4f32:
; NO-SIMD128-NOT: f32x4
; SIMD128-NEXT: .functype compare_ueq_v4f32 (v128, v128) -> (v128){{$}}
-; SIMD128-NEXT: f32x4.eq $push[[T0:[0-9]+]]=, $0, $1{{$}}
-; SIMD128-NEXT: f32x4.ne $push[[T1:[0-9]+]]=, $0, $0{{$}}
-; SIMD128-NEXT: f32x4.ne $push[[T2:[0-9]+]]=, $1, $1{{$}}
-; SIMD128-NEXT: v128.or $push[[T3:[0-9]+]]=, $pop[[T1]], $pop[[T2]]{{$}}
-; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $pop[[T0]], $pop[[T3]]{{$}}
+; SIMD128-NEXT: f32x4.gt $push[[T0:[0-9]+]]=, $0, $1{{$}}
+; SIMD128-NEXT: f32x4.lt $push[[T1:[0-9]+]]=, $0, $1{{$}}
+; SIMD128-NEXT: v128.or $push[[T2:[0-9]+]]=, $pop[[T0]], $pop[[T1]]{{$}}
+; SIMD128-NEXT: v128.not $push[[R:[0-9]+]]=, $pop[[T2]]{{$}}
; SIMD128-NEXT: return $pop[[R]]{{$}}
define <4 x i1> @compare_ueq_v4f32 (<4 x float> %x, <4 x float> %y) {
%res = fcmp ueq <4 x float> %x, %y
; CHECK-LABEL: compare_sext_ueq_v4f32:
; NO-SIMD128-NOT: f32x4
; SIMD128-NEXT: .functype compare_sext_ueq_v4f32 (v128, v128) -> (v128){{$}}
-; SIMD128-NEXT: f32x4.eq $push[[T0:[0-9]+]]=, $0, $1{{$}}
-; SIMD128-NEXT: f32x4.ne $push[[T1:[0-9]+]]=, $0, $0{{$}}
-; SIMD128-NEXT: f32x4.ne $push[[T2:[0-9]+]]=, $1, $1{{$}}
-; SIMD128-NEXT: v128.or $push[[T3:[0-9]+]]=, $pop[[T1]], $pop[[T2]]{{$}}
-; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $pop[[T0]], $pop[[T3]]{{$}}
+; SIMD128-NEXT: f32x4.gt $push[[T0:[0-9]+]]=, $0, $1{{$}}
+; SIMD128-NEXT: f32x4.lt $push[[T1:[0-9]+]]=, $0, $1{{$}}
+; SIMD128-NEXT: v128.or $push[[T2:[0-9]+]]=, $pop[[T0]], $pop[[T1]]{{$}}
+; SIMD128-NEXT: v128.not $push[[R:[0-9]+]]=, $pop[[T2]]{{$}}
; SIMD128-NEXT: return $pop[[R]]
define <4 x i32> @compare_sext_ueq_v4f32 (<4 x float> %x, <4 x float> %y) {
%cmp = fcmp ueq <4 x float> %x, %y
; CHECK-LABEL: compare_one_v2f64:
; NO-SIMD128-NOT: f64x2
; SIMD128-NEXT: .functype compare_one_v2f64 (v128, v128) -> (v128){{$}}
-; SIMD128-NEXT: f64x2.ne $push[[T0:[0-9]+]]=, $0, $1{{$}}
-; SIMD128-NEXT: f64x2.eq $push[[T1:[0-9]+]]=, $0, $0{{$}}
-; SIMD128-NEXT: f64x2.eq $push[[T2:[0-9]+]]=, $1, $1{{$}}
-; SIMD128-NEXT: v128.and $push[[T3:[0-9]+]]=, $pop[[T1]], $pop[[T2]]{{$}}
-; SIMD128-NEXT: v128.and $push[[R:[0-9]+]]=, $pop[[T0]], $pop[[T3]]{{$}}
+; SIMD128-NEXT: f64x2.gt $push[[T0:[0-9]+]]=, $0, $1{{$}}
+; SIMD128-NEXT: f64x2.lt $push[[T1:[0-9]+]]=, $0, $1{{$}}
+; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $pop[[T0]], $pop[[T1]]{{$}}
; SIMD128-NEXT: return $pop[[R]]{{$}}
define <2 x i1> @compare_one_v2f64 (<2 x double> %x, <2 x double> %y) {
%res = fcmp one <2 x double> %x, %y
; CHECK-LABEL: compare_sext_one_v2f64:
; NO-SIMD128-NOT: f64x2
; SIMD128-NEXT: .functype compare_sext_one_v2f64 (v128, v128) -> (v128){{$}}
-; SIMD128-NEXT: f64x2.ne $push[[T0:[0-9]+]]=, $0, $1{{$}}
-; SIMD128-NEXT: f64x2.eq $push[[T1:[0-9]+]]=, $0, $0{{$}}
-; SIMD128-NEXT: f64x2.eq $push[[T2:[0-9]+]]=, $1, $1{{$}}
-; SIMD128-NEXT: v128.and $push[[T3:[0-9]+]]=, $pop[[T1]], $pop[[T2]]{{$}}
-; SIMD128-NEXT: v128.and $push[[R:[0-9]+]]=, $pop[[T0]], $pop[[T3]]{{$}}
+; SIMD128-NEXT: f64x2.gt $push[[T0:[0-9]+]]=, $0, $1{{$}}
+; SIMD128-NEXT: f64x2.lt $push[[T1:[0-9]+]]=, $0, $1{{$}}
+; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $pop[[T0]], $pop[[T1]]{{$}}
; SIMD128-NEXT: return $pop[[R]]{{$}}
define <2 x i64> @compare_sext_one_v2f64 (<2 x double> %x, <2 x double> %y) {
%cmp = fcmp one <2 x double> %x, %y
; CHECK-LABEL: compare_ueq_v2f64:
; NO-SIMD128-NOT: f64x2
; SIMD128-NEXT: .functype compare_ueq_v2f64 (v128, v128) -> (v128){{$}}
-; SIMD128-NEXT: f64x2.eq $push[[T0:[0-9]+]]=, $0, $1{{$}}
-; SIMD128-NEXT: f64x2.ne $push[[T1:[0-9]+]]=, $0, $0{{$}}
-; SIMD128-NEXT: f64x2.ne $push[[T2:[0-9]+]]=, $1, $1{{$}}
-; SIMD128-NEXT: v128.or $push[[T3:[0-9]+]]=, $pop[[T1]], $pop[[T2]]{{$}}
-; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $pop[[T0]], $pop[[T3]]{{$}}
+; SIMD128-NEXT: f64x2.gt $push[[T0:[0-9]+]]=, $0, $1{{$}}
+; SIMD128-NEXT: f64x2.lt $push[[T1:[0-9]+]]=, $0, $1{{$}}
+; SIMD128-NEXT: v128.or $push[[T2:[0-9]+]]=, $pop[[T0]], $pop[[T1]]{{$}}
+; SIMD128-NEXT: v128.not $push[[R:[0-9]+]]=, $pop[[T2]]{{$}}
; SIMD128-NEXT: return $pop[[R]]{{$}}
define <2 x i1> @compare_ueq_v2f64 (<2 x double> %x, <2 x double> %y) {
%res = fcmp ueq <2 x double> %x, %y
; CHECK-LABEL: compare_sext_ueq_v2f64:
; NO-SIMD128-NOT: f64x2
; SIMD128-NEXT: .functype compare_sext_ueq_v2f64 (v128, v128) -> (v128){{$}}
-; SIMD128-NEXT: f64x2.eq $push[[T0:[0-9]+]]=, $0, $1{{$}}
-; SIMD128-NEXT: f64x2.ne $push[[T1:[0-9]+]]=, $0, $0{{$}}
-; SIMD128-NEXT: f64x2.ne $push[[T2:[0-9]+]]=, $1, $1{{$}}
-; SIMD128-NEXT: v128.or $push[[T3:[0-9]+]]=, $pop[[T1]], $pop[[T2]]{{$}}
-; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $pop[[T0]], $pop[[T3]]{{$}}
+; SIMD128-NEXT: f64x2.gt $push[[T0:[0-9]+]]=, $0, $1{{$}}
+; SIMD128-NEXT: f64x2.lt $push[[T1:[0-9]+]]=, $0, $1{{$}}
+; SIMD128-NEXT: v128.or $push[[T2:[0-9]+]]=, $pop[[T0]], $pop[[T1]]{{$}}
+; SIMD128-NEXT: v128.not $push[[R:[0-9]+]]=, $pop[[T2]]{{$}}
; SIMD128-NEXT: return $pop[[R]]{{$}}
define <2 x i64> @compare_sext_ueq_v2f64 (<2 x double> %x, <2 x double> %y) {
%cmp = fcmp ueq <2 x double> %x, %y