Opc = ARM::VMOVRS;
else if (SPRDest && GPRSrc)
Opc = ARM::VMOVSR;
- else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.hasFP64())
+ else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.hasFPRegs64())
Opc = ARM::VMOVD;
else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MQPRCopy;
-; RUN: llc < %s -mtriple=thumbv7-none-eabi -mcpu=cortex-m4 -mattr=-vfp2 | FileCheck %s -check-prefix=CHECK -check-prefix=SOFT
-; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m4 -mattr=+vfp4,-fp64 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=SP
-; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-a8 -mattr=+vfp3 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP
+; RUN: llc < %s -mtriple=thumbv7-none-eabi -mcpu=cortex-m4 -mattr=-vfp2 | FileCheck %s -check-prefix=CHECK -check-prefix=SOFT
+; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m4 -mattr=+vfp4d16sp | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=SP
+; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-a8 -mattr=+vfp3 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP
define float @float_in_reg(float %a, float %b) {
entry:
; CHECK-NEXT: sbc.w r12, r3, r1
; CHECK-NEXT: vmov r2, r3, d1
; CHECK-NEXT: vmov r1, r0, d4
-; CHECK-NEXT: vmov.f32 s2, s4
-; CHECK-NEXT: vmov.f32 s3, s5
+; CHECK-NEXT: vmov.f64 d1, d2
; CHECK-NEXT: subs r1, r1, r2
; CHECK-NEXT: vmov q2[2], q2[0], r1, lr
; CHECK-NEXT: sbcs r0, r3
; CHECK-NEXT: vmov q2[3], q2[1], r0, r12
-; CHECK-NEXT: vmov.f32 s0, s8
-; CHECK-NEXT: vmov.f32 s4, s10
-; CHECK-NEXT: vmov.f32 s1, s9
-; CHECK-NEXT: vmov.f32 s5, s11
+; CHECK-NEXT: vmov.f64 d0, d4
+; CHECK-NEXT: vmov.f64 d2, d5
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: pop {r7, pc}
entry:
; CHECK-NEXT: sbc.w r12, r3, r1
; CHECK-NEXT: vmov r2, r3, d1
; CHECK-NEXT: vmov r1, r0, d12
-; CHECK-NEXT: vmov.f32 s2, s4
-; CHECK-NEXT: vmov.f32 s3, s5
+; CHECK-NEXT: vmov.f64 d1, d2
; CHECK-NEXT: subs r1, r1, r2
; CHECK-NEXT: add r2, sp, #104
; CHECK-NEXT: vldrw.u32 q5, [r2]
; CHECK-NEXT: vmov q4[3], q4[1], r0, r12
; CHECK-NEXT: vmov r0, r1, d6
; CHECK-NEXT: vmov r2, r3, d11
-; CHECK-NEXT: vmov.f32 s0, s16
-; CHECK-NEXT: vmov.f32 s4, s18
-; CHECK-NEXT: vmov.f32 s1, s17
-; CHECK-NEXT: vmov.f32 s5, s19
+; CHECK-NEXT: vmov.f64 d0, d8
+; CHECK-NEXT: vmov.f64 d2, d9
; CHECK-NEXT: adds.w lr, r2, r0
; CHECK-NEXT: adc.w r12, r3, r1
; CHECK-NEXT: add r1, sp, #88
; CHECK-NEXT: sbc.w r12, r3, r1
; CHECK-NEXT: vmov r2, r3, d5
; CHECK-NEXT: vmov r1, r0, d12
-; CHECK-NEXT: vmov.f32 s10, s16
-; CHECK-NEXT: vmov.f32 s11, s17
+; CHECK-NEXT: vmov.f64 d5, d8
; CHECK-NEXT: subs r1, r1, r2
; CHECK-NEXT: vmov q3[2], q3[0], r1, lr
; CHECK-NEXT: sbcs r0, r3
; CHECK-NEXT: vmov q3[3], q3[1], r0, r12
-; CHECK-NEXT: vmov.f32 s16, s14
-; CHECK-NEXT: vmov.f32 s8, s12
-; CHECK-NEXT: vmov.f32 s17, s15
-; CHECK-NEXT: vmov.f32 s9, s13
+; CHECK-NEXT: vmov.f64 d8, d7
+; CHECK-NEXT: vmov.f64 d4, d6
; CHECK-NEXT: vmov q3, q4
; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13}
; CHECK-NEXT: pop {r7, pc}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: .pad #24
; CHECK-NEXT: sub sp, #24
-; CHECK-NEXT: vmov.f32 s16, s0
-; CHECK-NEXT: vmov.f32 s17, s1
+; CHECK-NEXT: vmov.f64 d8, d0
; CHECK-NEXT: vldr d0, .LCPI10_0
; CHECK-NEXT: vmov r4, r6, d1
+; CHECK-NEXT: vmov.f64 d9, d2
; CHECK-NEXT: vmov r2, r11, d0
-; CHECK-NEXT: vmov.f32 s18, s4
-; CHECK-NEXT: vmov.f32 s19, s5
-; CHECK-NEXT: str r2, [sp, #20] @ 4-byte Spill
; CHECK-NEXT: mov r0, r4
; CHECK-NEXT: mov r1, r6
-; CHECK-NEXT: mov r3, r11
+; CHECK-NEXT: str r2, [sp, #20] @ 4-byte Spill
; CHECK-NEXT: str.w r11, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT: mov r3, r11
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: vldr d0, .LCPI10_1
; CHECK-NEXT: mov r1, r6
; CHECK-NEXT: vpush {d8, d9, d10, d11}
; CHECK-NEXT: .pad #32
; CHECK-NEXT: sub sp, #32
-; CHECK-NEXT: vmov.f32 s16, s0
+; CHECK-NEXT: vmov.f64 d8, d0
; CHECK-NEXT: mov r7, r0
-; CHECK-NEXT: vmov.f32 s17, s1
+; CHECK-NEXT: str r0, [sp, #16] @ 4-byte Spill
; CHECK-NEXT: vldr d0, .LCPI12_0
; CHECK-NEXT: vmov r5, r4, d4
-; CHECK-NEXT: str r0, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT: vmov.f64 d10, d3
; CHECK-NEXT: vmov r2, r3, d0
-; CHECK-NEXT: vmov.f32 s20, s6
-; CHECK-NEXT: vmov.f32 s18, s4
-; CHECK-NEXT: vmov.f32 s22, s2
-; CHECK-NEXT: vmov.f32 s21, s7
-; CHECK-NEXT: vmov.f32 s19, s5
-; CHECK-NEXT: vmov.f32 s23, s3
+; CHECK-NEXT: vmov.f64 d9, d2
+; CHECK-NEXT: vmov.f64 d11, d1
; CHECK-NEXT: mov r0, r5
; CHECK-NEXT: mov r1, r4
; CHECK-NEXT: strd r2, r3, [sp, #20] @ 8-byte Folded Spill
; CHECK-NEXT: vpush {d8, d9, d10, d11, d12}
; CHECK-NEXT: .pad #40
; CHECK-NEXT: sub sp, #40
-; CHECK-NEXT: vmov.f32 s16, s0
+; CHECK-NEXT: vmov.f64 d8, d0
; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill
-; CHECK-NEXT: vmov.f32 s17, s1
; CHECK-NEXT: vldr d0, .LCPI13_0
; CHECK-NEXT: vmov r9, r4, d5
+; CHECK-NEXT: vmov.f64 d11, d4
; CHECK-NEXT: vmov r2, r6, d0
-; CHECK-NEXT: vmov.f32 s22, s8
-; CHECK-NEXT: vmov.f32 s20, s6
-; CHECK-NEXT: vmov.f32 s18, s4
-; CHECK-NEXT: vmov.f32 s24, s2
-; CHECK-NEXT: vmov.f32 s23, s9
-; CHECK-NEXT: vmov.f32 s21, s7
-; CHECK-NEXT: vmov.f32 s19, s5
-; CHECK-NEXT: vmov.f32 s25, s3
-; CHECK-NEXT: str r2, [sp, #24] @ 4-byte Spill
+; CHECK-NEXT: vmov.f64 d10, d3
+; CHECK-NEXT: vmov.f64 d9, d2
+; CHECK-NEXT: vmov.f64 d12, d1
; CHECK-NEXT: mov r0, r9
; CHECK-NEXT: mov r1, r4
-; CHECK-NEXT: mov r3, r6
+; CHECK-NEXT: str r2, [sp, #24] @ 4-byte Spill
; CHECK-NEXT: str r6, [sp, #28] @ 4-byte Spill
+; CHECK-NEXT: mov r3, r6
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: vldr d0, .LCPI13_1
; CHECK-NEXT: mov r1, r4
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: .pad #24
; CHECK-NEXT: sub sp, #24
-; CHECK-NEXT: vmov.f32 s18, s0
-; CHECK-NEXT: vmov.f32 s19, s1
+; CHECK-NEXT: vmov.f64 d9, d0
; CHECK-NEXT: vldr d0, .LCPI10_0
; CHECK-NEXT: vmov r4, r5, d1
+; CHECK-NEXT: vmov.f64 d8, d2
; CHECK-NEXT: vmov r9, r7, d0
-; CHECK-NEXT: vmov.f32 s16, s4
-; CHECK-NEXT: vmov.f32 s17, s5
-; CHECK-NEXT: str.w r9, [sp, #8] @ 4-byte Spill
; CHECK-NEXT: mov r0, r4
; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: str.w r9, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT: str r7, [sp, #12] @ 4-byte Spill
; CHECK-NEXT: mov r2, r9
; CHECK-NEXT: mov r3, r7
-; CHECK-NEXT: str r7, [sp, #12] @ 4-byte Spill
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: vldr d0, .LCPI10_1
; CHECK-NEXT: mov r1, r5
; CHECK-NEXT: vpush {d8, d9, d10, d11}
; CHECK-NEXT: .pad #40
; CHECK-NEXT: sub sp, #40
-; CHECK-NEXT: vmov.f32 s16, s0
+; CHECK-NEXT: vmov.f64 d8, d0
; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: vmov.f32 s17, s1
+; CHECK-NEXT: str r0, [sp, #28] @ 4-byte Spill
; CHECK-NEXT: vldr d0, .LCPI12_0
; CHECK-NEXT: vmov r5, r6, d4
-; CHECK-NEXT: str r0, [sp, #28] @ 4-byte Spill
+; CHECK-NEXT: vmov.f64 d10, d3
; CHECK-NEXT: vmov r2, r3, d0
-; CHECK-NEXT: vmov.f32 s20, s6
-; CHECK-NEXT: vmov.f32 s18, s4
-; CHECK-NEXT: vmov.f32 s22, s2
-; CHECK-NEXT: vmov.f32 s21, s7
-; CHECK-NEXT: vmov.f32 s19, s5
-; CHECK-NEXT: vmov.f32 s23, s3
+; CHECK-NEXT: vmov.f64 d9, d2
+; CHECK-NEXT: vmov.f64 d11, d1
; CHECK-NEXT: mov r0, r5
; CHECK-NEXT: mov r1, r6
; CHECK-NEXT: strd r2, r3, [sp, #32] @ 8-byte Folded Spill
; CHECK-NEXT: vpush {d8, d9, d10, d11, d12}
; CHECK-NEXT: .pad #40
; CHECK-NEXT: sub sp, #40
-; CHECK-NEXT: vmov.f32 s16, s0
+; CHECK-NEXT: vmov.f64 d8, d0
; CHECK-NEXT: str r0, [sp, #32] @ 4-byte Spill
-; CHECK-NEXT: vmov.f32 s17, s1
; CHECK-NEXT: vldr d0, .LCPI13_0
; CHECK-NEXT: vmov r5, r6, d5
+; CHECK-NEXT: vmov.f64 d11, d4
; CHECK-NEXT: vmov r11, r3, d0
-; CHECK-NEXT: vmov.f32 s22, s8
-; CHECK-NEXT: vmov.f32 s20, s6
-; CHECK-NEXT: vmov.f32 s18, s4
-; CHECK-NEXT: vmov.f32 s24, s2
-; CHECK-NEXT: vmov.f32 s23, s9
-; CHECK-NEXT: vmov.f32 s21, s7
-; CHECK-NEXT: vmov.f32 s19, s5
-; CHECK-NEXT: vmov.f32 s25, s3
-; CHECK-NEXT: str r3, [sp, #36] @ 4-byte Spill
+; CHECK-NEXT: vmov.f64 d10, d3
+; CHECK-NEXT: vmov.f64 d9, d2
+; CHECK-NEXT: vmov.f64 d12, d1
; CHECK-NEXT: mov r0, r5
; CHECK-NEXT: mov r1, r6
-; CHECK-NEXT: mov r2, r11
+; CHECK-NEXT: str r3, [sp, #36] @ 4-byte Spill
; CHECK-NEXT: str.w r11, [sp, #28] @ 4-byte Spill
+; CHECK-NEXT: mov r2, r11
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: vldr d0, .LCPI13_1
; CHECK-NEXT: mov r7, r0
define arm_aapcs_vfpcc <2 x i64> @shuffle2_i64(<2 x i64> %src) {
; CHECK-LABEL: shuffle2_i64:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.f32 s4, s2
-; CHECK-NEXT: vmov.f32 s6, s0
-; CHECK-NEXT: vmov.f32 s5, s3
-; CHECK-NEXT: vmov.f32 s7, s1
+; CHECK-NEXT: vmov.f64 d2, d1
+; CHECK-NEXT: vmov.f64 d3, d0
; CHECK-NEXT: vmov q0, q1
; CHECK-NEXT: bx lr
entry:
define arm_aapcs_vfpcc <2 x double> @shuffle2_f64(<2 x double> %src) {
; CHECK-LABEL: shuffle2_f64:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.f32 s4, s2
-; CHECK-NEXT: vmov.f32 s6, s0
-; CHECK-NEXT: vmov.f32 s5, s3
-; CHECK-NEXT: vmov.f32 s7, s1
+; CHECK-NEXT: vmov.f64 d2, d1
+; CHECK-NEXT: vmov.f64 d3, d0
; CHECK-NEXT: vmov q0, q1
; CHECK-NEXT: bx lr
entry:
define arm_aapcs_vfpcc <4 x double> @shuffle4_f64(<2 x double> %src1, <2 x double> %src2) {
; CHECK-LABEL: shuffle4_f64:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.f32 s8, s6
-; CHECK-NEXT: vmov.f32 s6, s0
-; CHECK-NEXT: vmov.f32 s9, s7
-; CHECK-NEXT: vmov.f32 s7, s1
-; CHECK-NEXT: vmov.f32 s10, s2
-; CHECK-NEXT: vmov.f32 s11, s3
+; CHECK-NEXT: vmov.f64 d4, d3
+; CHECK-NEXT: vmov.f64 d5, d1
+; CHECK-NEXT: vmov.f64 d3, d0
; CHECK-NEXT: vmov q0, q2
; CHECK-NEXT: bx lr
entry:
define arm_aapcs_vfpcc <4 x double> @shuffle5_f64(<2 x double> %src1, <2 x double> %src2) {
; CHECK-LABEL: shuffle5_f64:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.f32 s8, s6
-; CHECK-NEXT: vmov.f32 s10, s4
-; CHECK-NEXT: vmov.f32 s4, s2
-; CHECK-NEXT: vmov.f32 s6, s0
-; CHECK-NEXT: vmov.f32 s9, s7
-; CHECK-NEXT: vmov.f32 s11, s5
-; CHECK-NEXT: vmov.f32 s5, s3
-; CHECK-NEXT: vmov.f32 s7, s1
+; CHECK-NEXT: vmov.f64 d4, d3
+; CHECK-NEXT: vmov.f64 d5, d2
+; CHECK-NEXT: vmov.f64 d2, d1
+; CHECK-NEXT: vmov.f64 d3, d0
; CHECK-NEXT: vmov q0, q2
; CHECK-NEXT: bx lr
entry:
define arm_aapcs_vfpcc <2 x double> @shuffle6_f64(<2 x double> %src1, <2 x double> %src2) {
; CHECK-LABEL: shuffle6_f64:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.f32 s2, s6
-; CHECK-NEXT: vmov.f32 s3, s7
+; CHECK-NEXT: vmov.f64 d1, d3
; CHECK-NEXT: bx lr
entry:
%out = shufflevector <2 x double> %src1, <2 x double> %src2, <2 x i32> <i32 0, i32 3>
define arm_aapcs_vfpcc <2 x double> @shuffle7_f64(<2 x double> %src1, <2 x double> %src2) {
; CHECK-LABEL: shuffle7_f64:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.f32 s0, s6
-; CHECK-NEXT: vmov.f32 s1, s7
+; CHECK-NEXT: vmov.f64 d0, d3
; CHECK-NEXT: bx lr
entry:
%out = shufflevector <2 x double> %src1, <2 x double> %src2, <2 x i32> <i32 3, i32 1>
define arm_aapcs_vfpcc <2 x double> @shuffle8_f64(<2 x double> %src1, <2 x double> %src2) {
; CHECK-LABEL: shuffle8_f64:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.f32 s6, s2
-; CHECK-NEXT: vmov.f32 s7, s3
+; CHECK-NEXT: vmov.f64 d3, d1
; CHECK-NEXT: vmov q0, q1
; CHECK-NEXT: bx lr
entry:
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
; CHECK-NEXT: vpush {d8, d9, d10, d11}
; CHECK-NEXT: vmov q5, q2
-; CHECK-NEXT: vmov.f32 s16, s0
-; CHECK-NEXT: vmov.f32 s18, s20
-; CHECK-NEXT: vmov.f32 s20, s2
-; CHECK-NEXT: vmov.f32 s10, s12
-; CHECK-NEXT: vmov.f32 s19, s21
-; CHECK-NEXT: vmov.f32 s8, s4
-; CHECK-NEXT: vmov.f32 s17, s1
-; CHECK-NEXT: vmov.f32 s21, s3
+; CHECK-NEXT: vmov.f64 d8, d0
+; CHECK-NEXT: vmov.f64 d9, d10
+; CHECK-NEXT: vmov.f64 d10, d1
; CHECK-NEXT: vmov q0, q4
-; CHECK-NEXT: vmov.f32 s12, s6
-; CHECK-NEXT: vmov.f32 s11, s13
-; CHECK-NEXT: vmov.f32 s9, s5
-; CHECK-NEXT: vmov.f32 s13, s7
+; CHECK-NEXT: vmov.f64 d5, d6
+; CHECK-NEXT: vmov.f64 d4, d2
+; CHECK-NEXT: vmov.f64 d6, d3
; CHECK-NEXT: vmov q1, q5
; CHECK-NEXT: vpop {d8, d9, d10, d11}
; CHECK-NEXT: bx lr
define arm_aapcs_vfpcc <4 x i64> @shuffle4_i64(<2 x i64> %src1, <2 x i64> %src2) {
; CHECK-LABEL: shuffle4_i64:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.f32 s8, s6
-; CHECK-NEXT: vmov.f32 s6, s0
-; CHECK-NEXT: vmov.f32 s9, s7
-; CHECK-NEXT: vmov.f32 s7, s1
-; CHECK-NEXT: vmov.f32 s10, s2
-; CHECK-NEXT: vmov.f32 s11, s3
+; CHECK-NEXT: vmov.f64 d4, d3
+; CHECK-NEXT: vmov.f64 d5, d1
+; CHECK-NEXT: vmov.f64 d3, d0
; CHECK-NEXT: vmov q0, q2
; CHECK-NEXT: bx lr
entry:
define arm_aapcs_vfpcc <4 x i64> @shuffle5_i64(<2 x i64> %src1, <2 x i64> %src2) {
; CHECK-LABEL: shuffle5_i64:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.f32 s8, s6
-; CHECK-NEXT: vmov.f32 s10, s4
-; CHECK-NEXT: vmov.f32 s4, s2
-; CHECK-NEXT: vmov.f32 s6, s0
-; CHECK-NEXT: vmov.f32 s9, s7
-; CHECK-NEXT: vmov.f32 s11, s5
-; CHECK-NEXT: vmov.f32 s5, s3
-; CHECK-NEXT: vmov.f32 s7, s1
+; CHECK-NEXT: vmov.f64 d4, d3
+; CHECK-NEXT: vmov.f64 d5, d2
+; CHECK-NEXT: vmov.f64 d2, d1
+; CHECK-NEXT: vmov.f64 d3, d0
; CHECK-NEXT: vmov q0, q2
; CHECK-NEXT: bx lr
entry:
define arm_aapcs_vfpcc <2 x i64> @shuffle6_i64(<2 x i64> %src1, <2 x i64> %src2) {
; CHECK-LABEL: shuffle6_i64:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.f32 s2, s6
-; CHECK-NEXT: vmov.f32 s3, s7
+; CHECK-NEXT: vmov.f64 d1, d3
; CHECK-NEXT: bx lr
entry:
%out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> <i32 0, i32 3>
define arm_aapcs_vfpcc <2 x i64> @shuffle7_i64(<2 x i64> %src1, <2 x i64> %src2) {
; CHECK-LABEL: shuffle7_i64:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.f32 s0, s6
-; CHECK-NEXT: vmov.f32 s1, s7
+; CHECK-NEXT: vmov.f64 d0, d3
; CHECK-NEXT: bx lr
entry:
%out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> <i32 3, i32 1>
define arm_aapcs_vfpcc <2 x i64> @shuffle8_i64(<2 x i64> %src1, <2 x i64> %src2) {
; CHECK-LABEL: shuffle8_i64:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.f32 s6, s2
-; CHECK-NEXT: vmov.f32 s7, s3
+; CHECK-NEXT: vmov.f64 d3, d1
; CHECK-NEXT: vmov q0, q1
; CHECK-NEXT: bx lr
entry:
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
; CHECK-NEXT: vpush {d8, d9, d10, d11}
; CHECK-NEXT: vmov q5, q2
-; CHECK-NEXT: vmov.f32 s16, s0
-; CHECK-NEXT: vmov.f32 s18, s20
-; CHECK-NEXT: vmov.f32 s20, s2
-; CHECK-NEXT: vmov.f32 s10, s12
-; CHECK-NEXT: vmov.f32 s19, s21
-; CHECK-NEXT: vmov.f32 s8, s4
-; CHECK-NEXT: vmov.f32 s17, s1
-; CHECK-NEXT: vmov.f32 s21, s3
+; CHECK-NEXT: vmov.f64 d8, d0
+; CHECK-NEXT: vmov.f64 d9, d10
+; CHECK-NEXT: vmov.f64 d10, d1
; CHECK-NEXT: vmov q0, q4
-; CHECK-NEXT: vmov.f32 s12, s6
-; CHECK-NEXT: vmov.f32 s11, s13
-; CHECK-NEXT: vmov.f32 s9, s5
-; CHECK-NEXT: vmov.f32 s13, s7
+; CHECK-NEXT: vmov.f64 d5, d6
+; CHECK-NEXT: vmov.f64 d4, d2
+; CHECK-NEXT: vmov.f64 d6, d3
; CHECK-NEXT: vmov q1, q5
; CHECK-NEXT: vpop {d8, d9, d10, d11}
; CHECK-NEXT: bx lr
define arm_aapcs_vfpcc double @extract_f64_1(<2 x double> %a) {
; CHECK-LABEL: extract_f64_1:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.f32 s0, s2
-; CHECK-NEXT: vmov.f32 s1, s3
+; CHECK-NEXT: vmov.f64 d0, d1
; CHECK-NEXT: bx lr
entry:
%res = extractelement <2 x double> %a, i32 1
define arm_aapcs_vfpcc <8 x i16> @shuffle_i16_45670123(<8 x i16> %s1, <8 x i16> %s2) {
; CHECK-LABEL: shuffle_i16_45670123:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.f32 s4, s2
-; CHECK-NEXT: vmov.f32 s6, s0
-; CHECK-NEXT: vmov.f32 s5, s3
-; CHECK-NEXT: vmov.f32 s7, s1
+; CHECK-NEXT: vmov.f64 d2, d1
+; CHECK-NEXT: vmov.f64 d3, d0
; CHECK-NEXT: vmov q0, q1
; CHECK-NEXT: bx lr
entry:
define arm_aapcs_vfpcc <8 x i16> @shuffle_i16_0123cdef(<8 x i16> %s1, <8 x i16> %s2) {
; CHECK-LABEL: shuffle_i16_0123cdef:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.f32 s2, s6
-; CHECK-NEXT: vmov.f32 s3, s7
+; CHECK-NEXT: vmov.f64 d1, d3
; CHECK-NEXT: bx lr
entry:
%out = shufflevector <8 x i16> %s1, <8 x i16> %s2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
define arm_aapcs_vfpcc <8 x half> @shuffle_f16_45670123(<8 x half> %s1, <8 x half> %s2) {
; CHECK-LABEL: shuffle_f16_45670123:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.f32 s4, s2
-; CHECK-NEXT: vmov.f32 s6, s0
-; CHECK-NEXT: vmov.f32 s5, s3
-; CHECK-NEXT: vmov.f32 s7, s1
+; CHECK-NEXT: vmov.f64 d2, d1
+; CHECK-NEXT: vmov.f64 d3, d0
; CHECK-NEXT: vmov q0, q1
; CHECK-NEXT: bx lr
entry:
define arm_aapcs_vfpcc <8 x half> @shuffle_f16_0123cdef(<8 x half> %s1, <8 x half> %s2) {
; CHECK-LABEL: shuffle_f16_0123cdef:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.f32 s2, s6
-; CHECK-NEXT: vmov.f32 s3, s7
+; CHECK-NEXT: vmov.f64 d1, d3
; CHECK-NEXT: bx lr
entry:
%out = shufflevector <8 x half> %s1, <8 x half> %s2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
define arm_aapcs_vfpcc <2 x double> @vdup_f64(double %src) {
; CHECK-LABEL: vdup_f64:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.f32 s2, s0
-; CHECK-NEXT: vmov.f32 s3, s1
+; CHECK-NEXT: vmov.f64 d1, d0
; CHECK-NEXT: bx lr
entry:
%0 = insertelement <2 x double> undef, double %src, i32 0
define arm_aapcs_vfpcc <2 x i64> @vduplane_i64(<2 x i64> %src) {
; CHECK-LABEL: vduplane_i64:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.f32 s0, s2
-; CHECK-NEXT: vmov.f32 s1, s3
+; CHECK-NEXT: vmov.f64 d0, d1
; CHECK-NEXT: bx lr
entry:
%out = shufflevector <2 x i64> %src, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
define arm_aapcs_vfpcc <2 x double> @vduplane_f64(<2 x double> %src) {
; CHECK-LABEL: vduplane_f64:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.f32 s0, s2
-; CHECK-NEXT: vmov.f32 s1, s3
+; CHECK-NEXT: vmov.f64 d0, d1
; CHECK-NEXT: bx lr
entry:
%out = shufflevector <2 x double> %src, <2 x double> undef, <2 x i32> <i32 1, i32 1>
define arm_aapcs_vfpcc <2 x i64> @bitcast_v2f64_v2i64(<2 x double> %a) {
; CHECK-LABEL: bitcast_v2f64_v2i64:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov.f32 s2, s0
-; CHECK-NEXT: vmov.f32 s3, s1
+; CHECK-NEXT: vmov.f64 d1, d0
; CHECK-NEXT: bx lr
%b = bitcast <2 x double> %a to <2 x i64>
%r = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer
define arm_aapcs_vfpcc <2 x i64> @bitcast_v8i16_v2i64(<8 x i16> %a) {
; CHECK-LABEL: bitcast_v8i16_v2i64:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov.f32 s2, s0
-; CHECK-NEXT: vmov.f32 s3, s1
+; CHECK-NEXT: vmov.f64 d1, d0
; CHECK-NEXT: bx lr
%b = bitcast <8 x i16> %a to <2 x i64>
%r = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer
define arm_aapcs_vfpcc <2 x i64> @vmovn64_t1(<2 x i64> %src1, <2 x i64> %src2) {
; CHECK-LABEL: vmovn64_t1:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.f32 s2, s4
-; CHECK-NEXT: vmov.f32 s3, s5
+; CHECK-NEXT: vmov.f64 d1, d2
; CHECK-NEXT: bx lr
;
; CHECKBE-LABEL: vmovn64_t1:
; CHECKBE: @ %bb.0: @ %entry
-; CHECKBE-NEXT: vmov.f32 s2, s4
-; CHECKBE-NEXT: vmov.f32 s3, s5
+; CHECKBE-NEXT: vmov.f64 d1, d2
; CHECKBE-NEXT: bx lr
entry:
%out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> <i32 0, i32 2>
define arm_aapcs_vfpcc <2 x i64> @vmovn64_t2(<2 x i64> %src1, <2 x i64> %src2) {
; CHECK-LABEL: vmovn64_t2:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.f32 s6, s0
-; CHECK-NEXT: vmov.f32 s7, s1
+; CHECK-NEXT: vmov.f64 d3, d0
; CHECK-NEXT: vmov q0, q1
; CHECK-NEXT: bx lr
;
; CHECKBE-LABEL: vmovn64_t2:
; CHECKBE: @ %bb.0: @ %entry
-; CHECKBE-NEXT: vmov.f32 s6, s0
-; CHECKBE-NEXT: vmov.f32 s7, s1
+; CHECKBE-NEXT: vmov.f64 d3, d0
; CHECKBE-NEXT: vmov q0, q1
; CHECKBE-NEXT: bx lr
entry:
define arm_aapcs_vfpcc <2 x i64> @vmovn64_b1(<2 x i64> %src1, <2 x i64> %src2) {
; CHECK-LABEL: vmovn64_b1:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.f32 s2, s6
-; CHECK-NEXT: vmov.f32 s3, s7
+; CHECK-NEXT: vmov.f64 d1, d3
; CHECK-NEXT: bx lr
;
; CHECKBE-LABEL: vmovn64_b1:
; CHECKBE: @ %bb.0: @ %entry
-; CHECKBE-NEXT: vmov.f32 s2, s6
-; CHECKBE-NEXT: vmov.f32 s3, s7
+; CHECKBE-NEXT: vmov.f64 d1, d3
; CHECKBE-NEXT: bx lr
entry:
%out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> <i32 0, i32 3>
define arm_aapcs_vfpcc <2 x i64> @vmovn64_b2(<2 x i64> %src1, <2 x i64> %src2) {
; CHECK-LABEL: vmovn64_b2:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.f32 s4, s6
-; CHECK-NEXT: vmov.f32 s6, s0
-; CHECK-NEXT: vmov.f32 s5, s7
-; CHECK-NEXT: vmov.f32 s7, s1
+; CHECK-NEXT: vmov.f64 d2, d3
+; CHECK-NEXT: vmov.f64 d3, d0
; CHECK-NEXT: vmov q0, q1
; CHECK-NEXT: bx lr
;
; CHECKBE-LABEL: vmovn64_b2:
; CHECKBE: @ %bb.0: @ %entry
-; CHECKBE-NEXT: vmov.f32 s4, s6
-; CHECKBE-NEXT: vmov.f32 s6, s0
-; CHECKBE-NEXT: vmov.f32 s5, s7
-; CHECKBE-NEXT: vmov.f32 s7, s1
+; CHECKBE-NEXT: vmov.f64 d2, d3
+; CHECKBE-NEXT: vmov.f64 d3, d0
; CHECKBE-NEXT: vmov q0, q1
; CHECKBE-NEXT: bx lr
entry:
define arm_aapcs_vfpcc <2 x i64> @vmovn64_b3(<2 x i64> %src1, <2 x i64> %src2) {
; CHECK-LABEL: vmovn64_b3:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.f32 s0, s2
-; CHECK-NEXT: vmov.f32 s2, s4
-; CHECK-NEXT: vmov.f32 s1, s3
-; CHECK-NEXT: vmov.f32 s3, s5
+; CHECK-NEXT: vmov.f64 d0, d1
+; CHECK-NEXT: vmov.f64 d1, d2
; CHECK-NEXT: bx lr
;
; CHECKBE-LABEL: vmovn64_b3:
; CHECKBE: @ %bb.0: @ %entry
-; CHECKBE-NEXT: vmov.f32 s0, s2
-; CHECKBE-NEXT: vmov.f32 s2, s4
-; CHECKBE-NEXT: vmov.f32 s1, s3
-; CHECKBE-NEXT: vmov.f32 s3, s5
+; CHECKBE-NEXT: vmov.f64 d0, d1
+; CHECKBE-NEXT: vmov.f64 d1, d2
; CHECKBE-NEXT: bx lr
entry:
%out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> <i32 1, i32 2>
define arm_aapcs_vfpcc <2 x i64> @vmovn64_b4(<2 x i64> %src1, <2 x i64> %src2) {
; CHECK-LABEL: vmovn64_b4:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.f32 s6, s2
-; CHECK-NEXT: vmov.f32 s7, s3
+; CHECK-NEXT: vmov.f64 d3, d1
; CHECK-NEXT: vmov q0, q1
; CHECK-NEXT: bx lr
;
; CHECKBE-LABEL: vmovn64_b4:
; CHECKBE: @ %bb.0: @ %entry
-; CHECKBE-NEXT: vmov.f32 s6, s2
-; CHECKBE-NEXT: vmov.f32 s7, s3
+; CHECKBE-NEXT: vmov.f64 d3, d1
; CHECKBE-NEXT: vmov q0, q1
; CHECKBE-NEXT: bx lr
entry:
define arm_aapcs_vfpcc void @vmovn64_t1(<2 x i64> %src1, <2 x i64> %src2, ptr %dest) {
; CHECK-LABEL: vmovn64_t1:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.f32 s2, s4
-; CHECK-NEXT: vmov.f32 s3, s5
+; CHECK-NEXT: vmov.f64 d1, d2
; CHECK-NEXT: vstrw.32 q0, [r0]
; CHECK-NEXT: bx lr
entry:
define arm_aapcs_vfpcc void @vmovn64_t2(<2 x i64> %src1, <2 x i64> %src2, ptr %dest) {
; CHECK-LABEL: vmovn64_t2:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.f32 s6, s0
-; CHECK-NEXT: vmov.f32 s7, s1
+; CHECK-NEXT: vmov.f64 d3, d0
; CHECK-NEXT: vstrw.32 q1, [r0]
; CHECK-NEXT: bx lr
entry:
define arm_aapcs_vfpcc void @vmovn64_b1(<2 x i64> %src1, <2 x i64> %src2, ptr %dest) {
; CHECK-LABEL: vmovn64_b1:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.f32 s2, s6
-; CHECK-NEXT: vmov.f32 s3, s7
+; CHECK-NEXT: vmov.f64 d1, d3
; CHECK-NEXT: vstrw.32 q0, [r0]
; CHECK-NEXT: bx lr
entry:
define arm_aapcs_vfpcc void @vmovn64_b2(<2 x i64> %src1, <2 x i64> %src2, ptr %dest) {
; CHECK-LABEL: vmovn64_b2:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.f32 s4, s6
-; CHECK-NEXT: vmov.f32 s6, s0
-; CHECK-NEXT: vmov.f32 s5, s7
-; CHECK-NEXT: vmov.f32 s7, s1
+; CHECK-NEXT: vmov.f64 d2, d3
+; CHECK-NEXT: vmov.f64 d3, d0
; CHECK-NEXT: vstrw.32 q1, [r0]
; CHECK-NEXT: bx lr
entry:
define arm_aapcs_vfpcc void @vmovn64_b3(<2 x i64> %src1, <2 x i64> %src2, ptr %dest) {
; CHECK-LABEL: vmovn64_b3:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.f32 s0, s2
-; CHECK-NEXT: vmov.f32 s2, s4
-; CHECK-NEXT: vmov.f32 s1, s3
-; CHECK-NEXT: vmov.f32 s3, s5
+; CHECK-NEXT: vmov.f64 d0, d1
+; CHECK-NEXT: vmov.f64 d1, d2
; CHECK-NEXT: vstrw.32 q0, [r0]
; CHECK-NEXT: bx lr
entry:
define arm_aapcs_vfpcc void @vmovn64_b4(<2 x i64> %src1, <2 x i64> %src2, ptr %dest) {
; CHECK-LABEL: vmovn64_b4:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.f32 s6, s2
-; CHECK-NEXT: vmov.f32 s7, s3
+; CHECK-NEXT: vmov.f64 d3, d1
; CHECK-NEXT: vstrw.32 q1, [r0]
; CHECK-NEXT: bx lr
entry:
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: .pad #8
; CHECK-NEXT: sub sp, #8
-; CHECK-NEXT: vmov.f32 s16, s2
-; CHECK-NEXT: vmov.f32 s18, s0
-; CHECK-NEXT: vmov.f32 s17, s3
-; CHECK-NEXT: vmov.f32 s19, s1
+; CHECK-NEXT: vmov.f64 d8, d1
+; CHECK-NEXT: vmov.f64 d9, d0
; CHECK-NEXT: bl sqrt
; CHECK-NEXT: vmov r2, r3, d8
; CHECK-NEXT: vmov r0, r1, d0