let Inst{22} = Dd{4};
let Predicates = [HasVFP2, HasDPVFP];
+ let hasSideEffects = 0;
}
// Special case encoding: bits 11-8 is 0b1011.
let Inst{4} = 0;
let Predicates = [HasVFP2, HasDPVFP];
+ let hasSideEffects = 0;
}
// Between half, single and double-precision.
+let hasSideEffects = 0 in
def VCVTBHS: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
/* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$Sd, $Sm",
[/* Intentionally left blank, see patterns below */]>,
def : FP16Pat<(f16_to_fp GPR:$a),
(VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>;
+let hasSideEffects = 0 in
def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
/* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm",
[/* Intentionally left blank, see patterns below */]>,
(v4f16 (INSERT_SUBREG (v4f16 DPR:$src1), (VCVTBSH SPR:$src2),
(SSubReg_f16_reg imm:$lane)))>;
+let hasSideEffects = 0 in
def VCVTTHS: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
/* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$Sd, $Sm",
[/* Intentionally left blank, see patterns below */]>,
(v2f32 (COPY_TO_REGCLASS (v4f16 DPR:$src), DPR_VFP2)),
(SSubReg_f16_reg imm_odd:$lane)))>;
+let hasSideEffects = 0 in
def VCVTTSH: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
/* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$Sd, $Sm",
[/* Intentionally left blank, see patterns below */]>,
// Encode instruction operands.
let Inst{3-0} = Sm{4-1};
let Inst{5} = Sm{0};
+
+ let hasSideEffects = 0;
}
def : FullFP16Pat<(f64 (fpextend (f16 HPR:$Sm))),
let Inst{5} = Dm{4};
let Inst{15-12} = Sd{4-1};
let Inst{22} = Sd{0};
+
+ let hasSideEffects = 0;
}
def : FullFP16Pat<(f16 (fpround DPR:$Dm)),
// Encode instruction operands.
let Inst{3-0} = Sm{4-1};
let Inst{5} = Sm{0};
+
+ let hasSideEffects = 0;
}
def VCVTTDH : ADuI<0b11101, 0b11, 0b0011, 0b11, 0,
let Inst{22} = Sd{0};
let Inst{3-0} = Dm{3-0};
let Inst{5} = Dm{4};
+
+ let hasSideEffects = 0;
}
multiclass vcvt_inst<string opc, bits<2> rm,
SDPatternOperator node = null_frag> {
- let PostEncoderMethod = "", DecoderNamespace = "VFPV8" in {
+ let PostEncoderMethod = "", DecoderNamespace = "VFPV8", hasSideEffects = 0 in {
def SH : AHuInp<0b11101, 0b11, 0b1100, 0b11, 0,
(outs SPR:$Sd), (ins HPR:$Sm),
NoItinerary, !strconcat("vcvt", opc, ".s32.f16\t$Sd, $Sm"),
let Inst{22} = Dd{4};
let Predicates = [HasVFP2, HasDPVFP];
+ let hasSideEffects = 0;
}
class AVConv1InSs_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
let Inst{5} = Sm{0};
let Inst{15-12} = Sd{4-1};
let Inst{22} = Sd{0};
+
+ let hasSideEffects = 0;
}
class AVConv1IHs_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
let Inst{22} = Sd{0};
let Predicates = [HasFullFP16];
+ let hasSideEffects = 0;
}
def VSITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011,
let Inst{22} = Sd{0};
let Predicates = [HasVFP2, HasDPVFP];
+ let hasSideEffects = 0;
}
class AVConv1InsS_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
let Inst{22} = Sd{0};
let Predicates = [HasFullFP16];
+ let hasSideEffects = 0;
}
// Always set Z bit in the instruction, i.e. "round towards zero" variants.
// if dp_operation then UInt(D:Vd) else UInt(Vd:D);
let Inst{22} = dst{0};
let Inst{15-12} = dst{4-1};
+
+ let hasSideEffects = 0;
}
// Double Precision register
let Inst{22} = dst{4};
let Inst{15-12} = dst{3-0};
+ let hasSideEffects = 0;
let Predicates = [HasVFP2, HasDPVFP];
}
let Inst{11-8} = 0b1001;
let Inst{7-6} = op7_6;
let Inst{4} = 0;
+
let DecoderNamespace = "VFPV8";
+ let hasSideEffects = 0;
}
def BF16_VCVTB : BF16_VCVT<"vcvtb", 0b01>;
-; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+fullfp16 %s -o - | FileCheck %s --check-prefixes CHECK-THUMB,CHECK
-; RUN: llc -mtriple=armv8.2a-arm-none-eabi -mattr=+fullfp16 %s -o - | FileCheck %s --check-prefixes CHECK-ARM,CHECK
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+fullfp16 %s -o - | FileCheck %s --check-prefixes CHECK,CHECK-THUMB
+; RUN: llc -mtriple=armv8.2a-arm-none-eabi -mattr=+fullfp16 %s -o - | FileCheck %s --check-prefixes CHECK,CHECK-ARM
define i32 @test_ne(i32 %x, i32 %y, i32 %a, i32 %b) {
; CHECK-LABEL: test_ne:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov s2, r0
-; CHECK-NEXT: cmp r2, r3
; CHECK-NEXT: vmov s0, r1
-; CHECK-NEXT: vcvt.f16.u32 s2, s2
+; CHECK-NEXT: cmp r2, r3
+; CHECK-NEXT: vmov s2, r0
; CHECK-NEXT: vcvt.f16.u32 s0, s0
+; CHECK-NEXT: vcvt.f16.u32 s2, s2
; CHECK-NEXT: vseleq.f16 s0, s0, s2
; CHECK-NEXT: vmov.f16 r0, s0
; CHECK-NEXT: bx lr
define i32 @test_eq(i32 %x, i32 %y, i32 %a, i32 %b) {
; CHECK-LABEL: test_eq:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov s2, r1
-; CHECK-NEXT: cmp r2, r3
; CHECK-NEXT: vmov s0, r0
-; CHECK-NEXT: vcvt.f16.u32 s2, s2
+; CHECK-NEXT: cmp r2, r3
+; CHECK-NEXT: vmov s2, r1
; CHECK-NEXT: vcvt.f16.u32 s0, s0
+; CHECK-NEXT: vcvt.f16.u32 s2, s2
; CHECK-NEXT: vseleq.f16 s0, s0, s2
; CHECK-NEXT: vmov.f16 r0, s0
; CHECK-NEXT: bx lr
define i32 @test_gt(i32 %x, i32 %y, i32 %a, i32 %b) {
; CHECK-LABEL: test_gt:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov s2, r1
-; CHECK-NEXT: cmp r2, r3
; CHECK-NEXT: vmov s0, r0
-; CHECK-NEXT: vcvt.f16.u32 s2, s2
+; CHECK-NEXT: cmp r2, r3
+; CHECK-NEXT: vmov s2, r1
; CHECK-NEXT: vcvt.f16.u32 s0, s0
+; CHECK-NEXT: vcvt.f16.u32 s2, s2
; CHECK-NEXT: vselgt.f16 s0, s0, s2
; CHECK-NEXT: vmov.f16 r0, s0
; CHECK-NEXT: bx lr
define i32 @test_ge(i32 %x, i32 %y, i32 %a, i32 %b) {
; CHECK-LABEL: test_ge:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov s2, r1
-; CHECK-NEXT: cmp r2, r3
; CHECK-NEXT: vmov s0, r0
-; CHECK-NEXT: vcvt.f16.u32 s2, s2
+; CHECK-NEXT: cmp r2, r3
+; CHECK-NEXT: vmov s2, r1
; CHECK-NEXT: vcvt.f16.u32 s0, s0
+; CHECK-NEXT: vcvt.f16.u32 s2, s2
; CHECK-NEXT: vselge.f16 s0, s0, s2
; CHECK-NEXT: vmov.f16 r0, s0
; CHECK-NEXT: bx lr
define i32 @test_lt(i32 %x, i32 %y, i32 %a, i32 %b) {
; CHECK-LABEL: test_lt:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov s2, r0
-; CHECK-NEXT: cmp r2, r3
; CHECK-NEXT: vmov s0, r1
-; CHECK-NEXT: vcvt.f16.u32 s2, s2
+; CHECK-NEXT: cmp r2, r3
+; CHECK-NEXT: vmov s2, r0
; CHECK-NEXT: vcvt.f16.u32 s0, s0
+; CHECK-NEXT: vcvt.f16.u32 s2, s2
; CHECK-NEXT: vselge.f16 s0, s0, s2
; CHECK-NEXT: vmov.f16 r0, s0
; CHECK-NEXT: bx lr
define i32 @test_le(i32 %x, i32 %y, i32 %a, i32 %b) {
; CHECK-LABEL: test_le:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov s2, r0
-; CHECK-NEXT: cmp r2, r3
; CHECK-NEXT: vmov s0, r1
-; CHECK-NEXT: vcvt.f16.u32 s2, s2
+; CHECK-NEXT: cmp r2, r3
+; CHECK-NEXT: vmov s2, r0
; CHECK-NEXT: vcvt.f16.u32 s0, s0
+; CHECK-NEXT: vcvt.f16.u32 s2, s2
; CHECK-NEXT: vselgt.f16 s0, s0, s2
; CHECK-NEXT: vmov.f16 r0, s0
; CHECK-NEXT: bx lr
define i32 @test_hi(i32 %x, i32 %y, i32 %a, i32 %b) {
; CHECK-THUMB-LABEL: test_hi:
; CHECK-THUMB: @ %bb.0: @ %entry
-; CHECK-THUMB-NEXT: vmov s2, r0
-; CHECK-THUMB-NEXT: cmp r2, r3
; CHECK-THUMB-NEXT: vmov s0, r1
-; CHECK-THUMB-NEXT: vcvt.f16.u32 s2, s2
+; CHECK-THUMB-NEXT: cmp r2, r3
+; CHECK-THUMB-NEXT: vmov s2, r0
; CHECK-THUMB-NEXT: vcvt.f16.u32 s0, s0
+; CHECK-THUMB-NEXT: vcvt.f16.u32 s2, s2
; CHECK-THUMB-NEXT: it hi
; CHECK-THUMB-NEXT: vmovhi.f32 s0, s2
; CHECK-THUMB-NEXT: vmov.f16 r0, s0
;
; CHECK-ARM-LABEL: test_hi:
; CHECK-ARM: @ %bb.0: @ %entry
-; CHECK-ARM-NEXT: vmov s2, r0
-; CHECK-ARM-NEXT: cmp r2, r3
; CHECK-ARM-NEXT: vmov s0, r1
-; CHECK-ARM-NEXT: vcvt.f16.u32 s2, s2
+; CHECK-ARM-NEXT: cmp r2, r3
+; CHECK-ARM-NEXT: vmov s2, r0
; CHECK-ARM-NEXT: vcvt.f16.u32 s0, s0
+; CHECK-ARM-NEXT: vcvt.f16.u32 s2, s2
; CHECK-ARM-NEXT: vmovhi.f32 s0, s2
; CHECK-ARM-NEXT: vmov.f16 r0, s0
; CHECK-ARM-NEXT: bx lr
define i32 @test_hs(i32 %x, i32 %y, i32 %a, i32 %b) {
; CHECK-THUMB-LABEL: test_hs:
; CHECK-THUMB: @ %bb.0: @ %entry
-; CHECK-THUMB-NEXT: vmov s2, r0
-; CHECK-THUMB-NEXT: cmp r2, r3
; CHECK-THUMB-NEXT: vmov s0, r1
-; CHECK-THUMB-NEXT: vcvt.f16.u32 s2, s2
+; CHECK-THUMB-NEXT: cmp r2, r3
+; CHECK-THUMB-NEXT: vmov s2, r0
; CHECK-THUMB-NEXT: vcvt.f16.u32 s0, s0
+; CHECK-THUMB-NEXT: vcvt.f16.u32 s2, s2
; CHECK-THUMB-NEXT: it hs
; CHECK-THUMB-NEXT: vmovhs.f32 s0, s2
; CHECK-THUMB-NEXT: vmov.f16 r0, s0
;
; CHECK-ARM-LABEL: test_hs:
; CHECK-ARM: @ %bb.0: @ %entry
-; CHECK-ARM-NEXT: vmov s2, r0
-; CHECK-ARM-NEXT: cmp r2, r3
; CHECK-ARM-NEXT: vmov s0, r1
-; CHECK-ARM-NEXT: vcvt.f16.u32 s2, s2
+; CHECK-ARM-NEXT: cmp r2, r3
+; CHECK-ARM-NEXT: vmov s2, r0
; CHECK-ARM-NEXT: vcvt.f16.u32 s0, s0
+; CHECK-ARM-NEXT: vcvt.f16.u32 s2, s2
; CHECK-ARM-NEXT: vmovhs.f32 s0, s2
; CHECK-ARM-NEXT: vmov.f16 r0, s0
; CHECK-ARM-NEXT: bx lr
define i32 @test_lo(i32 %x, i32 %y, i32 %a, i32 %b) {
; CHECK-THUMB-LABEL: test_lo:
; CHECK-THUMB: @ %bb.0: @ %entry
-; CHECK-THUMB-NEXT: vmov s2, r0
-; CHECK-THUMB-NEXT: cmp r2, r3
; CHECK-THUMB-NEXT: vmov s0, r1
-; CHECK-THUMB-NEXT: vcvt.f16.u32 s2, s2
+; CHECK-THUMB-NEXT: cmp r2, r3
+; CHECK-THUMB-NEXT: vmov s2, r0
; CHECK-THUMB-NEXT: vcvt.f16.u32 s0, s0
+; CHECK-THUMB-NEXT: vcvt.f16.u32 s2, s2
; CHECK-THUMB-NEXT: it lo
; CHECK-THUMB-NEXT: vmovlo.f32 s0, s2
; CHECK-THUMB-NEXT: vmov.f16 r0, s0
;
; CHECK-ARM-LABEL: test_lo:
; CHECK-ARM: @ %bb.0: @ %entry
-; CHECK-ARM-NEXT: vmov s2, r0
-; CHECK-ARM-NEXT: cmp r2, r3
; CHECK-ARM-NEXT: vmov s0, r1
-; CHECK-ARM-NEXT: vcvt.f16.u32 s2, s2
+; CHECK-ARM-NEXT: cmp r2, r3
+; CHECK-ARM-NEXT: vmov s2, r0
; CHECK-ARM-NEXT: vcvt.f16.u32 s0, s0
+; CHECK-ARM-NEXT: vcvt.f16.u32 s2, s2
; CHECK-ARM-NEXT: vmovlo.f32 s0, s2
; CHECK-ARM-NEXT: vmov.f16 r0, s0
; CHECK-ARM-NEXT: bx lr
define i32 @test_ls(i32 %x, i32 %y, i32 %a, i32 %b) {
; CHECK-THUMB-LABEL: test_ls:
; CHECK-THUMB: @ %bb.0: @ %entry
-; CHECK-THUMB-NEXT: vmov s2, r0
-; CHECK-THUMB-NEXT: cmp r2, r3
; CHECK-THUMB-NEXT: vmov s0, r1
-; CHECK-THUMB-NEXT: vcvt.f16.u32 s2, s2
+; CHECK-THUMB-NEXT: cmp r2, r3
+; CHECK-THUMB-NEXT: vmov s2, r0
; CHECK-THUMB-NEXT: vcvt.f16.u32 s0, s0
+; CHECK-THUMB-NEXT: vcvt.f16.u32 s2, s2
; CHECK-THUMB-NEXT: it ls
; CHECK-THUMB-NEXT: vmovls.f32 s0, s2
; CHECK-THUMB-NEXT: vmov.f16 r0, s0
;
; CHECK-ARM-LABEL: test_ls:
; CHECK-ARM: @ %bb.0: @ %entry
-; CHECK-ARM-NEXT: vmov s2, r0
-; CHECK-ARM-NEXT: cmp r2, r3
; CHECK-ARM-NEXT: vmov s0, r1
-; CHECK-ARM-NEXT: vcvt.f16.u32 s2, s2
+; CHECK-ARM-NEXT: cmp r2, r3
+; CHECK-ARM-NEXT: vmov s2, r0
; CHECK-ARM-NEXT: vcvt.f16.u32 s0, s0
+; CHECK-ARM-NEXT: vcvt.f16.u32 s2, s2
; CHECK-ARM-NEXT: vmovls.f32 s0, s2
; CHECK-ARM-NEXT: vmov.f16 r0, s0
; CHECK-ARM-NEXT: bx lr
define half @foo(half %a, half %b) {
; SOFT-LABEL: foo:
; SOFT: @ %bb.0: @ %entry
-; SOFT-NEXT: vmov s2, r1
; SOFT-NEXT: vmov s0, r0
-; SOFT-NEXT: vcvtb.f32.f16 s2, s2
+; SOFT-NEXT: vmov s2, r1
; SOFT-NEXT: vcvtb.f32.f16 s0, s0
+; SOFT-NEXT: vcvtb.f32.f16 s2, s2
; SOFT-NEXT: vadd.f32 s0, s0, s2
; SOFT-NEXT: vcvtb.f16.f32 s0, s0
; SOFT-NEXT: vmov r0, s0
define half @addf16(half %a, half %b) {
; CHECK-VFPV4-SOFT-LABEL: addf16:
; CHECK-VFPV4-SOFT: @ %bb.0: @ %entry
-; CHECK-VFPV4-SOFT-NEXT: vmov s2, r1
; CHECK-VFPV4-SOFT-NEXT: vmov s0, r0
-; CHECK-VFPV4-SOFT-NEXT: vcvtb.f32.f16 s2, s2
+; CHECK-VFPV4-SOFT-NEXT: vmov s2, r1
; CHECK-VFPV4-SOFT-NEXT: vcvtb.f32.f16 s0, s0
+; CHECK-VFPV4-SOFT-NEXT: vcvtb.f32.f16 s2, s2
; CHECK-VFPV4-SOFT-NEXT: vadd.f32 s0, s0, s2
; CHECK-VFPV4-SOFT-NEXT: vcvtb.f16.f32 s0, s0
; CHECK-VFPV4-SOFT-NEXT: vmov r0, s0
; CHECK-LABEL: test_frem:
; CHECK: .save {r4, lr}
; CHECK-NEXT: push {r4, lr}
-; CHECK-NEXT: vldr.16 s2, [r1]
; CHECK-NEXT: vldr.16 s0, [r0]
+; CHECK-NEXT: vldr.16 s2, [r1]
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: vcvtb.f32.f16 s0, s0
; CHECK-NEXT: vcvtb.f32.f16 s1, s2
; CHECK-LABEL: test_pow:
; CHECK: .save {r4, lr}
; CHECK-NEXT: push {r4, lr}
-; CHECK-NEXT: vldr.16 s2, [r1]
; CHECK-NEXT: vldr.16 s0, [r0]
+; CHECK-NEXT: vldr.16 s2, [r1]
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: vcvtb.f32.f16 s0, s0
; CHECK-NEXT: vcvtb.f32.f16 s1, s2
; CHECK-SOFTFP-VFP3: vadd.f32
; CHECK-SOFTFP-VFP3: bl __aeabi_f2h
-; CHECK-SOFTFP-FP16: vmov [[S2:s[0-9]]], r1
-; CHECK-SOFTFP-FP16: vmov [[S0:s[0-9]]], r0
-; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2]], [[S2]]
-; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S0]], [[S0]]
+; CHECK-SOFTFP-FP16-DAG: vmov [[S0:s[0-9]]], r0
+; CHECK-SOFTFP-FP16-DAG: vmov [[S2:s[0-9]]], r1
+; CHECK-SOFTFP-FP16-DAG: vcvtb.f32.f16 [[S0]], [[S0]]
+; CHECK-SOFTFP-FP16-DAG: vcvtb.f32.f16 [[S2]], [[S2]]
; CHECK-SOFTFP-FP16: vadd.f32 [[S0]], [[S0]], [[S2]]
; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]]
; CHECK-SOFTFP-FP16: vmov r0, s0
; CHECK-SOFTFP-VFP3: vdiv.f32
; CHECK-SOFTFP-VFP3: bl __aeabi_f2h
-; CHECK-SOFTFP-FP16: vmov [[S2:s[0-9]]], r1
-; CHECK-SOFTFP-FP16: vmov [[S0:s[0-9]]], r0
-; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2]], [[S2]]
-; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S0]], [[S0]]
+; CHECK-SOFTFP-FP16-DAG: vmov [[S0:s[0-9]]], r0
+; CHECK-SOFTFP-FP16-DAG: vmov [[S2:s[0-9]]], r1
+; CHECK-SOFTFP-FP16-DAG: vcvtb.f32.f16 [[S0]], [[S0]]
+; CHECK-SOFTFP-FP16-DAG: vcvtb.f32.f16 [[S2]], [[S2]]
; CHECK-SOFTFP-FP16: vdiv.f32 [[S0]], [[S0]], [[S2]]
; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]]
; CHECK-SOFTFP-FP16: vmov r0, s0
; CHECK-SOFTFP-VFP3: vmul.f32
; CHECK-SOFTFP-VFP3: bl __aeabi_f2h
-; CHECK-SOFTFP-FP16: vmov [[S2:s[0-9]]], r1
-; CHECK-SOFTFP-FP16: vmov [[S0:s[0-9]]], r0
-; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2]], [[S2]]
-; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S0]], [[S0]]
+; CHECK-SOFTFP-FP16-DAG: vmov [[S0:s[0-9]]], r0
+; CHECK-SOFTFP-FP16-DAG: vmov [[S2:s[0-9]]], r1
+; CHECK-SOFTFP-FP16-DAG: vcvtb.f32.f16 [[S0]], [[S0]]
+; CHECK-SOFTFP-FP16-DAG: vcvtb.f32.f16 [[S2]], [[S2]]
; CHECK-SOFTFP-FP16: vmul.f32 [[S0]], [[S0]], [[S2]]
; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]]
; CHECK-SOFTFP-FP16: vmov r0, s0
; CHECK-SOFTFP-VFP3: vsub.f32
; CHECK-SOFTFP-VFP3: bl __aeabi_f2h
-; CHECK-SOFTFP-FP16: vmov [[S2:s[0-9]]], r1
-; CHECK-SOFTFP-FP16: vmov [[S0:s[0-9]]], r0
-; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2]], [[S2]]
-; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S0]], [[S0]]
+; CHECK-SOFTFP-FP16-DAG: vmov [[S0:s[0-9]]], r0
+; CHECK-SOFTFP-FP16-DAG: vmov [[S2:s[0-9]]], r1
+; CHECK-SOFTFP-FP16-DAG: vcvtb.f32.f16 [[S0]], [[S0]]
+; CHECK-SOFTFP-FP16-DAG: vcvtb.f32.f16 [[S2]], [[S2]]
; CHECK-SOFTFP-FP16: vsub.f32 [[S0]], [[S0]], [[S2]]
; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]]
; CHECK-SOFTFP-FP16: vmov r0, s0
; CHECK-LABEL: eggs:
; CHECK: sub sp, #8
; VMRS instruction comes before any other instruction writing FPSCR:
-; CHECK-NEXT: vmrs r0, fpscr
+; CHECK-NOT: vcmp
+; CHECK: vmrs {{r[0-9]}}, fpscr
+; CHECK; vcmp
; ...
; CHECK: add sp, #8
; CHECK: bx lr
; CHECK-NEXT: vcvt.s32.f64 s0, d18
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: vadd.f64 d20, d16, d16
-; CHECK-NEXT: vadd.f64 d19, d19, d19
; CHECK-NEXT: vadd.f64 d16, d17, d17
; CHECK-NEXT: vcvt.s32.f64 s2, d20
-; CHECK-NEXT: vcvt.s32.f64 s4, d19
; CHECK-NEXT: vcvt.s32.f64 s6, d16
; CHECK-NEXT: vmov.32 d16[0], r0
; CHECK-NEXT: vmov r0, s2
+; CHECK-NEXT: vadd.f64 d19, d19, d19
+; CHECK-NEXT: vcvt.s32.f64 s4, d19
; CHECK-NEXT: vmov.32 d17[0], r0
; CHECK-NEXT: vmov r0, s4
; CHECK-NEXT: vmov.32 d16[1], r0
; CHECK: @ %bb.0:
; CHECK-NEXT: vmov d16, r0, r1
; CHECK-NEXT: vcvt.s32.f64 s0, d16
+; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: vstr s0, [r2]
-; CHECK-NEXT: vcvt.s32.f64 s0, d16
-; CHECK-NEXT: vcvt.s32.f64 s2, d16
-; CHECK-NEXT: vmov r0, s2
; CHECK-NEXT: vstr s0, [r3]
; CHECK-NEXT: mov pc, lr
%conv = fptosi double %c to i32
; CHECK: @ %bb.0:
; CHECK-NEXT: vmov d16, r0, r1
; CHECK-NEXT: vcvt.u32.f64 s0, d16
+; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: vstr s0, [r2]
-; CHECK-NEXT: vcvt.u32.f64 s0, d16
-; CHECK-NEXT: vcvt.u32.f64 s2, d16
-; CHECK-NEXT: vmov r0, s2
; CHECK-NEXT: vstr s0, [r3]
; CHECK-NEXT: mov pc, lr
%conv = fptoui double %c to i32
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldr r4, [r6], #4
; CHECK-NEXT: add.w r12, r12, #1
-; CHECK-NEXT: vmov s0, r4
-; CHECK-NEXT: vcvt.f32.s32 s0, s0
; CHECK-NEXT: vldr s2, [r5]
; CHECK-NEXT: adds r5, #4
+; CHECK-NEXT: vmov s0, r4
+; CHECK-NEXT: vcvt.f32.s32 s0, s0
; CHECK-NEXT: vmul.f32 s0, s2, s0
; CHECK-NEXT: vstr s0, [r7]
; CHECK-NEXT: adds r7, #4
; CHECK-NEXT: vmul.f32 s0, s2, s0
; CHECK-NEXT: vstr s0, [r6]
; CHECK-NEXT: vldr s0, [r1, #-4]
-; CHECK-NEXT: vcvt.f32.s32 s0, s0
; CHECK-NEXT: vldr s2, [r7, #4]
+; CHECK-NEXT: vcvt.f32.s32 s0, s0
; CHECK-NEXT: vmul.f32 s0, s2, s0
; CHECK-NEXT: vstr s0, [r6, #4]
; CHECK-NEXT: vldr s0, [r1]
-; CHECK-NEXT: vcvt.f32.s32 s0, s0
; CHECK-NEXT: vldr s2, [r7, #8]
+; CHECK-NEXT: vcvt.f32.s32 s0, s0
; CHECK-NEXT: vmul.f32 s0, s2, s0
; CHECK-NEXT: vstr s0, [r6, #8]
; CHECK-NEXT: vldr s0, [r1, #4]
; CHECK-NEXT: add.w r1, r1, #16
-; CHECK-NEXT: vcvt.f32.s32 s0, s0
; CHECK-NEXT: vldr s2, [r7, #12]
+; CHECK-NEXT: vcvt.f32.s32 s0, s0
; CHECK-NEXT: vmul.f32 s0, s2, s0
; CHECK-NEXT: vstr s0, [r6, #12]
; CHECK-NEXT: bne .LBB3_12
; CHECK-NEXT: vmul.f16 s2, s4, s2
; CHECK-NEXT: vldr.16 s4, [r2, #4]
; CHECK-NEXT: vldr.16 s10, [r4]
-; CHECK-NEXT: adds r3, #8
+; CHECK-NEXT: vcvtb.f32.f16 s2, s2
; CHECK-NEXT: vmul.f16 s4, s6, s4
; CHECK-NEXT: vldr.16 s6, [r2, #2]
-; CHECK-NEXT: add.w r12, r12, #4
+; CHECK-NEXT: vcvtb.f32.f16 s4, s4
+; CHECK-NEXT: adds r3, #8
; CHECK-NEXT: vmul.f16 s6, s8, s6
; CHECK-NEXT: vldr.16 s8, [r2]
+; CHECK-NEXT: vcvtb.f32.f16 s6, s6
+; CHECK-NEXT: add.w r12, r12, #4
; CHECK-NEXT: vmul.f16 s8, s10, s8
; CHECK-NEXT: vcvtb.f32.f16 s8, s8
-; CHECK-NEXT: vcvtb.f32.f16 s6, s6
; CHECK-NEXT: vadd.f32 s0, s0, s8
-; CHECK-NEXT: vcvtb.f32.f16 s4, s4
-; CHECK-NEXT: vcvtb.f32.f16 s2, s2
; CHECK-NEXT: vadd.f32 s0, s0, s6
; CHECK-NEXT: vadd.f32 s0, s0, s4
; CHECK-NEXT: vadd.f32 s0, s0, s2
; CHECK-NEXT: vadd.f16 s2, s4, s2
; CHECK-NEXT: vldr.16 s4, [r2, #4]
; CHECK-NEXT: vldr.16 s10, [r4]
-; CHECK-NEXT: adds r3, #8
+; CHECK-NEXT: vcvtb.f32.f16 s2, s2
; CHECK-NEXT: vadd.f16 s4, s6, s4
; CHECK-NEXT: vldr.16 s6, [r2, #2]
-; CHECK-NEXT: add.w r12, r12, #4
+; CHECK-NEXT: vcvtb.f32.f16 s4, s4
+; CHECK-NEXT: adds r3, #8
; CHECK-NEXT: vadd.f16 s6, s8, s6
; CHECK-NEXT: vldr.16 s8, [r2]
+; CHECK-NEXT: vcvtb.f32.f16 s6, s6
+; CHECK-NEXT: add.w r12, r12, #4
; CHECK-NEXT: vadd.f16 s8, s10, s8
; CHECK-NEXT: vcvtb.f32.f16 s8, s8
-; CHECK-NEXT: vcvtb.f32.f16 s6, s6
; CHECK-NEXT: vadd.f32 s0, s0, s8
-; CHECK-NEXT: vcvtb.f32.f16 s4, s4
-; CHECK-NEXT: vcvtb.f32.f16 s2, s2
; CHECK-NEXT: vadd.f32 s0, s0, s6
; CHECK-NEXT: vadd.f32 s0, s0, s4
; CHECK-NEXT: vadd.f32 s0, s0, s2
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldrsh.w r4, [r3, #2]
; CHECK-NEXT: vldr.16 s2, [r2, #2]
+; CHECK-NEXT: ldrsh r5, [r3, #-2]
; CHECK-NEXT: add.w r12, r12, #4
; CHECK-NEXT: vmov s4, r4
-; CHECK-NEXT: vcvt.f16.s32 s4, s4
; CHECK-NEXT: ldrsh.w r4, [r3]
+; CHECK-NEXT: vcvt.f16.s32 s4, s4
+; CHECK-NEXT: vmov s8, r5
; CHECK-NEXT: vmul.f16 s2, s2, s4
; CHECK-NEXT: vldr.16 s4, [r2]
; CHECK-NEXT: vmov s6, r4
-; CHECK-NEXT: vcvt.f16.s32 s6, s6
-; CHECK-NEXT: ldrsh r5, [r3, #-2]
; CHECK-NEXT: ldrsh r4, [r3, #-4]
+; CHECK-NEXT: vcvt.f16.s32 s6, s6
+; CHECK-NEXT: vcvt.f16.s32 s8, s8
; CHECK-NEXT: vmul.f16 s4, s4, s6
; CHECK-NEXT: vldr.16 s6, [r2, #-2]
-; CHECK-NEXT: adds r3, #8
-; CHECK-NEXT: vmov s8, r5
-; CHECK-NEXT: vcvt.f16.s32 s8, s8
; CHECK-NEXT: vmov s10, r4
+; CHECK-NEXT: vcvtb.f32.f16 s4, s4
; CHECK-NEXT: vmul.f16 s6, s6, s8
; CHECK-NEXT: vldr.16 s8, [r2, #-4]
; CHECK-NEXT: vcvt.f16.s32 s10, s10
-; CHECK-NEXT: adds r2, #8
+; CHECK-NEXT: vcvtb.f32.f16 s6, s6
; CHECK-NEXT: vmul.f16 s8, s8, s10
+; CHECK-NEXT: vcvtb.f32.f16 s2, s2
; CHECK-NEXT: vcvtb.f32.f16 s8, s8
-; CHECK-NEXT: vcvtb.f32.f16 s6, s6
+; CHECK-NEXT: adds r3, #8
; CHECK-NEXT: vadd.f32 s0, s0, s8
-; CHECK-NEXT: vcvtb.f32.f16 s4, s4
-; CHECK-NEXT: vcvtb.f32.f16 s2, s2
+; CHECK-NEXT: adds r2, #8
; CHECK-NEXT: vadd.f32 s0, s0, s6
; CHECK-NEXT: vadd.f32 s0, s0, s4
; CHECK-NEXT: vadd.f32 s0, s0, s2
; CHECK-LE: @ %bb.0: @ %entry
; CHECK-LE-NEXT: .pad #4
; CHECK-LE-NEXT: sub sp, #4
-; CHECK-LE-NEXT: vcvtb.f16.f32 s4, s0
; CHECK-LE-NEXT: vcmp.f32 s0, #0
-; CHECK-LE-NEXT: vcvtt.f16.f32 s4, s1
+; CHECK-LE-NEXT: movs r1, #0
; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-LE-NEXT: vcvtb.f16.f32 s5, s2
-; CHECK-LE-NEXT: mov.w r1, #0
-; CHECK-LE-NEXT: vcvtt.f16.f32 s5, s3
; CHECK-LE-NEXT: it gt
; CHECK-LE-NEXT: movgt r1, #1
; CHECK-LE-NEXT: cmp r1, #0
; CHECK-LE-NEXT: cset r2, ne
; CHECK-LE-NEXT: and r2, r2, #1
; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: vcvtb.f16.f32 s4, s0
; CHECK-LE-NEXT: bfi r1, r3, #2, #1
; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: vcvtt.f16.f32 s4, s1
; CHECK-LE-NEXT: bfi r1, r2, #3, #1
+; CHECK-LE-NEXT: vcvtb.f16.f32 s5, s2
+; CHECK-LE-NEXT: vcvtt.f16.f32 s5, s3
; CHECK-LE-NEXT: lsls r2, r1, #31
; CHECK-LE-NEXT: bne .LBB25_5
; CHECK-LE-NEXT: @ %bb.1: @ %else
; CHECK-BE-NEXT: sub sp, #4
; CHECK-BE-NEXT: vrev64.32 q1, q0
; CHECK-BE-NEXT: movs r1, #0
-; CHECK-BE-NEXT: vcvtb.f16.f32 s0, s4
; CHECK-BE-NEXT: vcmp.f32 s4, #0
-; CHECK-BE-NEXT: vcvtt.f16.f32 s0, s5
+; CHECK-BE-NEXT: movs r2, #0
; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-BE-NEXT: vcvtb.f16.f32 s1, s6
-; CHECK-BE-NEXT: vcmp.f32 s5, #0
-; CHECK-BE-NEXT: vcvtt.f16.f32 s1, s7
; CHECK-BE-NEXT: it gt
; CHECK-BE-NEXT: movgt r1, #1
; CHECK-BE-NEXT: cmp r1, #0
-; CHECK-BE-NEXT: mov.w r2, #0
+; CHECK-BE-NEXT: vcmp.f32 s5, #0
; CHECK-BE-NEXT: cset r1, ne
; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-BE-NEXT: and r1, r1, #1
; CHECK-BE-NEXT: and r3, r3, #1
; CHECK-BE-NEXT: vcmp.f32 s7, #0
; CHECK-BE-NEXT: rsb.w r3, r3, #0
+; CHECK-BE-NEXT: vcvtb.f16.f32 s0, s4
; CHECK-BE-NEXT: bfi r1, r3, #1, #1
; CHECK-BE-NEXT: mov.w r3, #0
; CHECK-BE-NEXT: it gt
; CHECK-BE-NEXT: rsbs r3, r3, #0
; CHECK-BE-NEXT: bfi r1, r3, #2, #1
; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: vcvtt.f16.f32 s0, s5
; CHECK-BE-NEXT: bfi r1, r2, #3, #1
+; CHECK-BE-NEXT: vcvtb.f16.f32 s1, s6
+; CHECK-BE-NEXT: vcvtt.f16.f32 s1, s7
; CHECK-BE-NEXT: lsls r2, r1, #31
; CHECK-BE-NEXT: bne .LBB25_5
; CHECK-BE-NEXT: @ %bb.1: @ %else
; CHECK-LE: @ %bb.0: @ %entry
; CHECK-LE-NEXT: .pad #4
; CHECK-LE-NEXT: sub sp, #4
-; CHECK-LE-NEXT: vcvtb.f16.f32 s4, s0
; CHECK-LE-NEXT: vcmp.f32 s0, #0
-; CHECK-LE-NEXT: vcvtt.f16.f32 s4, s1
+; CHECK-LE-NEXT: movs r1, #0
; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-LE-NEXT: vcvtb.f16.f32 s5, s2
-; CHECK-LE-NEXT: mov.w r1, #0
-; CHECK-LE-NEXT: vcvtt.f16.f32 s5, s3
; CHECK-LE-NEXT: it gt
; CHECK-LE-NEXT: movgt r1, #1
; CHECK-LE-NEXT: cmp r1, #0
; CHECK-LE-NEXT: cset r2, ne
; CHECK-LE-NEXT: and r2, r2, #1
; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: vcvtb.f16.f32 s4, s0
; CHECK-LE-NEXT: bfi r1, r3, #2, #1
; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: vcvtt.f16.f32 s4, s1
; CHECK-LE-NEXT: bfi r1, r2, #3, #1
+; CHECK-LE-NEXT: vcvtb.f16.f32 s5, s2
+; CHECK-LE-NEXT: vcvtt.f16.f32 s5, s3
; CHECK-LE-NEXT: lsls r2, r1, #31
; CHECK-LE-NEXT: bne .LBB26_5
; CHECK-LE-NEXT: @ %bb.1: @ %else
; CHECK-BE-NEXT: sub sp, #4
; CHECK-BE-NEXT: vrev64.32 q1, q0
; CHECK-BE-NEXT: movs r1, #0
-; CHECK-BE-NEXT: vcvtb.f16.f32 s0, s4
; CHECK-BE-NEXT: vcmp.f32 s4, #0
-; CHECK-BE-NEXT: vcvtt.f16.f32 s0, s5
+; CHECK-BE-NEXT: movs r2, #0
; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-BE-NEXT: vcvtb.f16.f32 s1, s6
-; CHECK-BE-NEXT: vcmp.f32 s5, #0
-; CHECK-BE-NEXT: vcvtt.f16.f32 s1, s7
; CHECK-BE-NEXT: it gt
; CHECK-BE-NEXT: movgt r1, #1
; CHECK-BE-NEXT: cmp r1, #0
-; CHECK-BE-NEXT: mov.w r2, #0
+; CHECK-BE-NEXT: vcmp.f32 s5, #0
; CHECK-BE-NEXT: cset r1, ne
; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-BE-NEXT: and r1, r1, #1
; CHECK-BE-NEXT: and r3, r3, #1
; CHECK-BE-NEXT: vcmp.f32 s7, #0
; CHECK-BE-NEXT: rsb.w r3, r3, #0
+; CHECK-BE-NEXT: vcvtb.f16.f32 s0, s4
; CHECK-BE-NEXT: bfi r1, r3, #1, #1
; CHECK-BE-NEXT: mov.w r3, #0
; CHECK-BE-NEXT: it gt
; CHECK-BE-NEXT: rsbs r3, r3, #0
; CHECK-BE-NEXT: bfi r1, r3, #2, #1
; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: vcvtt.f16.f32 s0, s5
; CHECK-BE-NEXT: bfi r1, r2, #3, #1
+; CHECK-BE-NEXT: vcvtb.f16.f32 s1, s6
+; CHECK-BE-NEXT: vcvtt.f16.f32 s1, s7
; CHECK-BE-NEXT: lsls r2, r1, #31
; CHECK-BE-NEXT: bne .LBB26_5
; CHECK-BE-NEXT: @ %bb.1: @ %else
; CHECK-LE: @ %bb.0: @ %entry
; CHECK-LE-NEXT: .pad #20
; CHECK-LE-NEXT: sub sp, #20
-; CHECK-LE-NEXT: vcvtb.f16.f32 s4, s0
; CHECK-LE-NEXT: vcmp.f32 s0, #0
-; CHECK-LE-NEXT: vcvtt.f16.f32 s4, s1
+; CHECK-LE-NEXT: movs r1, #0
; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-LE-NEXT: vcvtb.f16.f32 s5, s2
-; CHECK-LE-NEXT: mov.w r1, #0
-; CHECK-LE-NEXT: vcvtt.f16.f32 s5, s3
; CHECK-LE-NEXT: it gt
; CHECK-LE-NEXT: movgt r1, #1
; CHECK-LE-NEXT: cmp r1, #0
; CHECK-LE-NEXT: cset r2, ne
; CHECK-LE-NEXT: and r2, r2, #1
; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: vcvtb.f16.f32 s4, s0
; CHECK-LE-NEXT: bfi r1, r3, #2, #1
; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: vcvtt.f16.f32 s4, s1
; CHECK-LE-NEXT: bfi r1, r2, #3, #1
+; CHECK-LE-NEXT: vcvtb.f16.f32 s5, s2
+; CHECK-LE-NEXT: vcvtt.f16.f32 s5, s3
; CHECK-LE-NEXT: lsls r2, r1, #31
; CHECK-LE-NEXT: bne .LBB27_5
; CHECK-LE-NEXT: @ %bb.1: @ %else
; CHECK-BE-NEXT: sub sp, #20
; CHECK-BE-NEXT: vrev64.32 q1, q0
; CHECK-BE-NEXT: movs r1, #0
-; CHECK-BE-NEXT: vcvtb.f16.f32 s0, s4
; CHECK-BE-NEXT: vcmp.f32 s4, #0
-; CHECK-BE-NEXT: vcvtt.f16.f32 s0, s5
+; CHECK-BE-NEXT: movs r2, #0
; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-BE-NEXT: vcvtb.f16.f32 s1, s6
-; CHECK-BE-NEXT: vcmp.f32 s5, #0
-; CHECK-BE-NEXT: vcvtt.f16.f32 s1, s7
; CHECK-BE-NEXT: it gt
; CHECK-BE-NEXT: movgt r1, #1
; CHECK-BE-NEXT: cmp r1, #0
-; CHECK-BE-NEXT: mov.w r2, #0
+; CHECK-BE-NEXT: vcmp.f32 s5, #0
; CHECK-BE-NEXT: cset r1, ne
; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-BE-NEXT: and r1, r1, #1
; CHECK-BE-NEXT: and r3, r3, #1
; CHECK-BE-NEXT: vcmp.f32 s7, #0
; CHECK-BE-NEXT: rsb.w r3, r3, #0
+; CHECK-BE-NEXT: vcvtb.f16.f32 s0, s4
; CHECK-BE-NEXT: bfi r1, r3, #1, #1
; CHECK-BE-NEXT: mov.w r3, #0
; CHECK-BE-NEXT: it gt
; CHECK-BE-NEXT: rsbs r3, r3, #0
; CHECK-BE-NEXT: bfi r1, r3, #2, #1
; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: vcvtt.f16.f32 s0, s5
; CHECK-BE-NEXT: bfi r1, r2, #3, #1
+; CHECK-BE-NEXT: vcvtb.f16.f32 s1, s6
+; CHECK-BE-NEXT: vcvtt.f16.f32 s1, s7
; CHECK-BE-NEXT: lsls r2, r1, #31
; CHECK-BE-NEXT: bne .LBB27_5
; CHECK-BE-NEXT: @ %bb.1: @ %else
define arm_aapcs_vfpcc <8 x i16> @foo_int16_half(<8 x half> %src) {
; CHECK-MVE-LABEL: foo_int16_half:
; CHECK-MVE: @ %bb.0: @ %entry
+; CHECK-MVE-NEXT: vmovx.f16 s14, s0
+; CHECK-MVE-NEXT: vcvt.s32.f16 s0, s0
+; CHECK-MVE-NEXT: vcvt.s32.f16 s14, s14
+; CHECK-MVE-NEXT: vmov r0, s0
; CHECK-MVE-NEXT: vmovx.f16 s4, s3
; CHECK-MVE-NEXT: vmovx.f16 s6, s2
; CHECK-MVE-NEXT: vmovx.f16 s10, s1
-; CHECK-MVE-NEXT: vmovx.f16 s14, s0
-; CHECK-MVE-NEXT: vcvt.s32.f16 s4, s4
-; CHECK-MVE-NEXT: vcvt.s32.f16 s6, s6
; CHECK-MVE-NEXT: vcvt.s32.f16 s8, s3
-; CHECK-MVE-NEXT: vcvt.s32.f16 s10, s10
; CHECK-MVE-NEXT: vcvt.s32.f16 s12, s2
-; CHECK-MVE-NEXT: vcvt.s32.f16 s14, s14
; CHECK-MVE-NEXT: vcvt.s32.f16 s5, s1
-; CHECK-MVE-NEXT: vcvt.s32.f16 s0, s0
-; CHECK-MVE-NEXT: vmov r0, s0
; CHECK-MVE-NEXT: vmov.16 q0[0], r0
; CHECK-MVE-NEXT: vmov r0, s14
; CHECK-MVE-NEXT: vmov.16 q0[1], r0
; CHECK-MVE-NEXT: vmov r0, s5
+; CHECK-MVE-NEXT: vcvt.s32.f16 s10, s10
; CHECK-MVE-NEXT: vmov.16 q0[2], r0
; CHECK-MVE-NEXT: vmov r0, s10
+; CHECK-MVE-NEXT: vcvt.s32.f16 s6, s6
; CHECK-MVE-NEXT: vmov.16 q0[3], r0
; CHECK-MVE-NEXT: vmov r0, s12
; CHECK-MVE-NEXT: vmov.16 q0[4], r0
; CHECK-MVE-NEXT: vmov r0, s6
; CHECK-MVE-NEXT: vmov.16 q0[5], r0
; CHECK-MVE-NEXT: vmov r0, s8
+; CHECK-MVE-NEXT: vcvt.s32.f16 s4, s4
; CHECK-MVE-NEXT: vmov.16 q0[6], r0
; CHECK-MVE-NEXT: vmov r0, s4
; CHECK-MVE-NEXT: vmov.16 q0[7], r0
define arm_aapcs_vfpcc <8 x i16> @foo_uint16_half(<8 x half> %src) {
; CHECK-MVE-LABEL: foo_uint16_half:
; CHECK-MVE: @ %bb.0: @ %entry
+; CHECK-MVE-NEXT: vmovx.f16 s14, s0
+; CHECK-MVE-NEXT: vcvt.s32.f16 s0, s0
+; CHECK-MVE-NEXT: vcvt.s32.f16 s14, s14
+; CHECK-MVE-NEXT: vmov r0, s0
; CHECK-MVE-NEXT: vmovx.f16 s4, s3
; CHECK-MVE-NEXT: vmovx.f16 s6, s2
; CHECK-MVE-NEXT: vmovx.f16 s10, s1
-; CHECK-MVE-NEXT: vmovx.f16 s14, s0
-; CHECK-MVE-NEXT: vcvt.s32.f16 s4, s4
-; CHECK-MVE-NEXT: vcvt.s32.f16 s6, s6
; CHECK-MVE-NEXT: vcvt.s32.f16 s8, s3
-; CHECK-MVE-NEXT: vcvt.s32.f16 s10, s10
; CHECK-MVE-NEXT: vcvt.s32.f16 s12, s2
-; CHECK-MVE-NEXT: vcvt.s32.f16 s14, s14
; CHECK-MVE-NEXT: vcvt.s32.f16 s5, s1
-; CHECK-MVE-NEXT: vcvt.s32.f16 s0, s0
-; CHECK-MVE-NEXT: vmov r0, s0
; CHECK-MVE-NEXT: vmov.16 q0[0], r0
; CHECK-MVE-NEXT: vmov r0, s14
; CHECK-MVE-NEXT: vmov.16 q0[1], r0
; CHECK-MVE-NEXT: vmov r0, s5
+; CHECK-MVE-NEXT: vcvt.s32.f16 s10, s10
; CHECK-MVE-NEXT: vmov.16 q0[2], r0
; CHECK-MVE-NEXT: vmov r0, s10
+; CHECK-MVE-NEXT: vcvt.s32.f16 s6, s6
; CHECK-MVE-NEXT: vmov.16 q0[3], r0
; CHECK-MVE-NEXT: vmov r0, s12
; CHECK-MVE-NEXT: vmov.16 q0[4], r0
; CHECK-MVE-NEXT: vmov r0, s6
; CHECK-MVE-NEXT: vmov.16 q0[5], r0
; CHECK-MVE-NEXT: vmov r0, s8
+; CHECK-MVE-NEXT: vcvt.s32.f16 s4, s4
; CHECK-MVE-NEXT: vmov.16 q0[6], r0
; CHECK-MVE-NEXT: vmov r0, s4
; CHECK-MVE-NEXT: vmov.16 q0[7], r0
define arm_aapcs_vfpcc <8 x float> @fpext_8(<8 x half> %src1) {
; CHECK-LABEL: fpext_8:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vcvtt.f32.f16 s11, s1
-; CHECK-NEXT: vcvtb.f32.f16 s10, s1
-; CHECK-NEXT: vcvtt.f32.f16 s9, s0
-; CHECK-NEXT: vcvtb.f32.f16 s8, s0
-; CHECK-NEXT: vcvtt.f32.f16 s7, s3
-; CHECK-NEXT: vcvtb.f32.f16 s6, s3
-; CHECK-NEXT: vcvtt.f32.f16 s5, s2
-; CHECK-NEXT: vcvtb.f32.f16 s4, s2
-; CHECK-NEXT: vmov q0, q2
+; CHECK-NEXT: vmov q2, q0
+; CHECK-NEXT: vcvtt.f32.f16 s3, s9
+; CHECK-NEXT: vcvtt.f32.f16 s7, s11
+; CHECK-NEXT: vcvtb.f32.f16 s2, s9
+; CHECK-NEXT: vcvtb.f32.f16 s6, s11
+; CHECK-NEXT: vcvtt.f32.f16 s1, s8
+; CHECK-NEXT: vcvtt.f32.f16 s5, s10
+; CHECK-NEXT: vcvtb.f32.f16 s0, s8
+; CHECK-NEXT: vcvtb.f32.f16 s4, s10
; CHECK-NEXT: bx lr
entry:
%out = fpext <8 x half> %src1 to <8 x float>
; CHECK-NEXT: vld20.16 {q2, q3}, [r0]
; CHECK-NEXT: vld21.16 {q2, q3}, [r0]
; CHECK-NEXT: vcvtt.f32.f16 s3, s9
-; CHECK-NEXT: vcvtb.f32.f16 s2, s9
-; CHECK-NEXT: vcvtt.f32.f16 s1, s8
-; CHECK-NEXT: vcvtb.f32.f16 s0, s8
; CHECK-NEXT: vcvtt.f32.f16 s7, s11
+; CHECK-NEXT: vcvtb.f32.f16 s2, s9
; CHECK-NEXT: vcvtb.f32.f16 s6, s11
+; CHECK-NEXT: vcvtt.f32.f16 s1, s8
; CHECK-NEXT: vcvtt.f32.f16 s5, s10
+; CHECK-NEXT: vcvtb.f32.f16 s0, s8
; CHECK-NEXT: vcvtb.f32.f16 s4, s10
; CHECK-NEXT: bx lr
entry:
TEST(MachineInstr, HasSideEffects) {
using namespace ARM;
std::set<unsigned> UnpredictableOpcodes = {
- MVE_VCTP8, MVE_VCTP16, MVE_VCTP32, MVE_VCTP64, MVE_VPST,
- MVE_VPTv16i8, MVE_VPTv8i16, MVE_VPTv4i32, MVE_VPTv16i8r, MVE_VPTv8i16r,
- MVE_VPTv4i32r, MVE_VPTv16s8, MVE_VPTv8s16, MVE_VPTv4s32, MVE_VPTv16s8r,
- MVE_VPTv8s16r, MVE_VPTv4s32r, MVE_VPTv16u8, MVE_VPTv8u16, MVE_VPTv4u32,
- MVE_VPTv16u8r, MVE_VPTv8u16r, MVE_VPTv4u32r, MVE_VPTv8f16, MVE_VPTv4f32,
- MVE_VPTv8f16r, MVE_VPTv4f32r, MVE_VADC, MVE_VADCI, MVE_VSBC,
- MVE_VSBCI, MVE_VSHLC,
+ // MVE Instructions
+ MVE_VCTP8,
+ MVE_VCTP16,
+ MVE_VCTP32,
+ MVE_VCTP64,
+ MVE_VPST,
+ MVE_VPTv16i8,
+ MVE_VPTv8i16,
+ MVE_VPTv4i32,
+ MVE_VPTv16i8r,
+ MVE_VPTv8i16r,
+ MVE_VPTv4i32r,
+ MVE_VPTv16s8,
+ MVE_VPTv8s16,
+ MVE_VPTv4s32,
+ MVE_VPTv16s8r,
+ MVE_VPTv8s16r,
+ MVE_VPTv4s32r,
+ MVE_VPTv16u8,
+ MVE_VPTv8u16,
+ MVE_VPTv4u32,
+ MVE_VPTv16u8r,
+ MVE_VPTv8u16r,
+ MVE_VPTv4u32r,
+ MVE_VPTv8f16,
+ MVE_VPTv4f32,
+ MVE_VPTv8f16r,
+ MVE_VPTv4f32r,
+ MVE_VADC,
+ MVE_VADCI,
+ MVE_VSBC,
+ MVE_VSBCI,
+ MVE_VSHLC,
+ // FP Instructions
+ FLDMXIA,
+ FLDMXDB_UPD,
+ FLDMXIA_UPD,
+ FSTMXDB_UPD,
+ FSTMXIA,
+ FSTMXIA_UPD,
+ VLDR_FPCXTNS_off,
+ VLDR_FPCXTNS_off,
+ VLDR_FPCXTNS_post,
+ VLDR_FPCXTNS_pre,
+ VLDR_FPCXTS_off,
+ VLDR_FPCXTS_post,
+ VLDR_FPCXTS_pre,
+ VLDR_FPSCR_NZCVQC_off,
+ VLDR_FPSCR_NZCVQC_post,
+ VLDR_FPSCR_NZCVQC_pre,
+ VLDR_FPSCR_off,
+ VLDR_FPSCR_post,
+ VLDR_FPSCR_pre,
+ VLDR_P0_off,
+ VLDR_P0_post,
+ VLDR_P0_pre,
+ VLDR_VPR_off,
+ VLDR_VPR_post,
+ VLDR_VPR_pre,
+ VLLDM,
+ VLSTM,
+ VMRS,
+ VMRS_FPCXTNS,
+ VMRS_FPCXTS,
+ VMRS_FPEXC,
+ VMRS_FPINST,
+ VMRS_FPINST2,
+ VMRS_FPSCR_NZCVQC,
+ VMRS_FPSID,
+ VMRS_MVFR0,
+ VMRS_MVFR1,
+ VMRS_MVFR2,
+ VMRS_P0,
+ VMRS_VPR,
+ VMSR,
+ VMSR_FPCXTNS,
+ VMSR_FPCXTS,
+ VMSR_FPEXC,
+ VMSR_FPINST,
+ VMSR_FPINST2,
+ VMSR_FPSCR_NZCVQC,
+ VMSR_FPSID,
+ VMSR_P0,
+ VMSR_VPR,
+ VSCCLRMD,
+ VSCCLRMS,
+ VSTR_FPCXTNS_off,
+ VSTR_FPCXTNS_post,
+ VSTR_FPCXTNS_pre,
+ VSTR_FPCXTS_off,
+ VSTR_FPCXTS_post,
+ VSTR_FPCXTS_pre,
+ VSTR_FPSCR_NZCVQC_off,
+ VSTR_FPSCR_NZCVQC_post,
+ VSTR_FPSCR_NZCVQC_pre,
+ VSTR_FPSCR_off,
+ VSTR_FPSCR_post,
+ VSTR_FPSCR_pre,
+ VSTR_P0_off,
+ VSTR_P0_post,
+ VSTR_P0_pre,
+ VSTR_VPR_off,
+ VSTR_VPR_post,
+ VSTR_VPR_pre,
};
LLVMInitializeARMTargetInfo();
for (unsigned Op = 0; Op < ARM::INSTRUCTION_LIST_END; ++Op) {
const MCInstrDesc &Desc = TII->get(Op);
- if ((Desc.TSFlags & ARMII::DomainMask) != ARMII::DomainMVE)
+ if ((Desc.TSFlags & ARMII::DomainMask) != ARMII::DomainMVE &&
+ (Desc.TSFlags & ARMII::DomainMask) != ARMII::DomainVFP)
continue;
if (UnpredictableOpcodes.count(Op))
continue;