From 0a762ec1b09d96734a3462f8792a5574d089b24d Mon Sep 17 00:00:00 2001 From: David Green Date: Sun, 28 May 2023 19:12:45 +0100 Subject: [PATCH] [ARM] Allow D-reg copies to use VMOVD with fpregs64 This instruction should be available with MVE, where we have D regs, not requiring the full FP64 target feature. --- llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp | 2 +- llvm/test/CodeGen/Thumb2/aapcs.ll | 6 +- .../Thumb2/mve-complex-deinterleaving-i64-add.ll | 27 ++--- llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll | 43 +++----- llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll | 43 +++----- llvm/test/CodeGen/Thumb2/mve-shuffle.ll | 111 +++++++-------------- llvm/test/CodeGen/Thumb2/mve-shufflemov.ll | 18 ++-- llvm/test/CodeGen/Thumb2/mve-vdup.ll | 15 +-- llvm/test/CodeGen/Thumb2/mve-vmovn.ll | 48 +++------ llvm/test/CodeGen/Thumb2/mve-vmovnstore.ll | 24 ++--- llvm/test/CodeGen/Thumb2/vmovdrroffset.ll | 6 +- 11 files changed, 119 insertions(+), 224 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index 2ffa540..792d486 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -913,7 +913,7 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, Opc = ARM::VMOVRS; else if (SPRDest && GPRSrc) Opc = ARM::VMOVSR; - else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.hasFP64()) + else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.hasFPRegs64()) Opc = ARM::VMOVD; else if (ARM::QPRRegClass.contains(DestReg, SrcReg)) Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MQPRCopy; diff --git a/llvm/test/CodeGen/Thumb2/aapcs.ll b/llvm/test/CodeGen/Thumb2/aapcs.ll index 651b994..b8a93d4 100644 --- a/llvm/test/CodeGen/Thumb2/aapcs.ll +++ b/llvm/test/CodeGen/Thumb2/aapcs.ll @@ -1,6 +1,6 @@ -; RUN: llc < %s -mtriple=thumbv7-none-eabi -mcpu=cortex-m4 -mattr=-vfp2 | FileCheck %s -check-prefix=CHECK -check-prefix=SOFT -; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m4 -mattr=+vfp4,-fp64 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=SP -; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-a8 -mattr=+vfp3 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP +; RUN: llc < %s -mtriple=thumbv7-none-eabi -mcpu=cortex-m4 -mattr=-vfp2 | FileCheck %s -check-prefix=CHECK -check-prefix=SOFT +; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m4 -mattr=+vfp4d16sp | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=SP +; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-a8 -mattr=+vfp3 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP define float @float_in_reg(float %a, float %b) { entry: diff --git a/llvm/test/CodeGen/Thumb2/mve-complex-deinterleaving-i64-add.ll b/llvm/test/CodeGen/Thumb2/mve-complex-deinterleaving-i64-add.ll index dea6990..157ca2a 100644 --- a/llvm/test/CodeGen/Thumb2/mve-complex-deinterleaving-i64-add.ll +++ b/llvm/test/CodeGen/Thumb2/mve-complex-deinterleaving-i64-add.ll @@ -57,16 +57,13 @@ define arm_aapcs_vfpcc <4 x i64> @complex_add_v4i64(<4 x i64> %a, <4 x i64> %b) ; CHECK-NEXT: sbc.w r12, r3, r1 ; CHECK-NEXT: vmov r2, r3, d1 ; CHECK-NEXT: vmov r1, r0, d4 -; CHECK-NEXT: vmov.f32 s2, s4 -; CHECK-NEXT: vmov.f32 s3, s5 +; CHECK-NEXT: vmov.f64 d1, d2 ; CHECK-NEXT: subs r1, r1, r2 ; CHECK-NEXT: vmov q2[2], q2[0], r1, lr ; CHECK-NEXT: sbcs r0, r3 ; CHECK-NEXT: vmov q2[3], q2[1], r0, r12 -; CHECK-NEXT: vmov.f32 s0, s8 -; CHECK-NEXT: vmov.f32 s4, s10 -; CHECK-NEXT: vmov.f32 s1, s9 -; CHECK-NEXT: vmov.f32 s5, s11 +; CHECK-NEXT: vmov.f64 d0, d4 +; CHECK-NEXT: vmov.f64 d2, d5 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: pop {r7, pc} entry: @@ -109,8 +106,7 @@ define arm_aapcs_vfpcc <8 x i64> @complex_add_v8i64(<8 x i64> %a, <8 x i64> %b) ; CHECK-NEXT: sbc.w r12, r3, r1 ; CHECK-NEXT: vmov r2, r3, d1 ; CHECK-NEXT: vmov r1, r0, d12 -; CHECK-NEXT: vmov.f32 s2, s4 -; CHECK-NEXT: vmov.f32 s3, s5 +; CHECK-NEXT: vmov.f64 d1, d2 ; CHECK-NEXT: subs r1, r1, r2 ; CHECK-NEXT: add r2, sp, #104 ; CHECK-NEXT: vldrw.u32 q5, [r2] @@ -119,10 +115,8 @@ define arm_aapcs_vfpcc <8 x i64> @complex_add_v8i64(<8 x i64> %a, <8 x i64> %b) ; CHECK-NEXT: vmov q4[3], q4[1], r0, r12 ; CHECK-NEXT: vmov r0, r1, d6 ; CHECK-NEXT: vmov r2, r3, d11 -; CHECK-NEXT: vmov.f32 s0, s16 -; CHECK-NEXT: vmov.f32 s4, s18 -; CHECK-NEXT: vmov.f32 s1, s17 -; CHECK-NEXT: vmov.f32 s5, s19 +; CHECK-NEXT: vmov.f64 d0, d8 +; CHECK-NEXT: vmov.f64 d2, d9 ; CHECK-NEXT: adds.w lr, r2, r0 ; CHECK-NEXT: adc.w r12, r3, r1 ; CHECK-NEXT: add r1, sp, #88 @@ -139,16 +133,13 @@ define arm_aapcs_vfpcc <8 x i64> @complex_add_v8i64(<8 x i64> %a, <8 x i64> %b) ; CHECK-NEXT: sbc.w r12, r3, r1 ; CHECK-NEXT: vmov r2, r3, d5 ; CHECK-NEXT: vmov r1, r0, d12 -; CHECK-NEXT: vmov.f32 s10, s16 -; CHECK-NEXT: vmov.f32 s11, s17 +; CHECK-NEXT: vmov.f64 d5, d8 ; CHECK-NEXT: subs r1, r1, r2 ; CHECK-NEXT: vmov q3[2], q3[0], r1, lr ; CHECK-NEXT: sbcs r0, r3 ; CHECK-NEXT: vmov q3[3], q3[1], r0, r12 -; CHECK-NEXT: vmov.f32 s16, s14 -; CHECK-NEXT: vmov.f32 s8, s12 -; CHECK-NEXT: vmov.f32 s17, s15 -; CHECK-NEXT: vmov.f32 s9, s13 +; CHECK-NEXT: vmov.f64 d8, d7 +; CHECK-NEXT: vmov.f64 d4, d6 ; CHECK-NEXT: vmov q3, q4 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: pop {r7, pc} diff --git a/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll b/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll index 3ca01cf..bd66c90 100644 --- a/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll +++ b/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll @@ -490,18 +490,16 @@ define arm_aapcs_vfpcc <3 x i32> @test_signed_v3f64_v3i32(<3 x double> %f) { ; CHECK-NEXT: vpush {d8, d9} ; CHECK-NEXT: .pad #24 ; CHECK-NEXT: sub sp, #24 -; CHECK-NEXT: vmov.f32 s16, s0 -; CHECK-NEXT: vmov.f32 s17, s1 +; CHECK-NEXT: vmov.f64 d8, d0 ; CHECK-NEXT: vldr d0, .LCPI10_0 ; CHECK-NEXT: vmov r4, r6, d1 +; CHECK-NEXT: vmov.f64 d9, d2 ; CHECK-NEXT: vmov r2, r11, d0 -; CHECK-NEXT: vmov.f32 s18, s4 -; CHECK-NEXT: vmov.f32 s19, s5 -; CHECK-NEXT: str r2, [sp, #20] @ 4-byte Spill ; CHECK-NEXT: mov r0, r4 ; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: mov r3, r11 +; CHECK-NEXT: str r2, [sp, #20] @ 4-byte Spill ; CHECK-NEXT: str.w r11, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: mov r3, r11 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: vldr d0, .LCPI10_1 ; CHECK-NEXT: mov r1, r6 @@ -791,19 +789,15 @@ define arm_aapcs_vfpcc <5 x i32> @test_signed_v5f64_v5i32(<5 x double> %f) { ; CHECK-NEXT: vpush {d8, d9, d10, d11} ; CHECK-NEXT: .pad #32 ; CHECK-NEXT: sub sp, #32 -; CHECK-NEXT: vmov.f32 s16, s0 +; CHECK-NEXT: vmov.f64 d8, d0 ; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: vmov.f32 s17, s1 +; CHECK-NEXT: str r0, [sp, #16] @ 4-byte Spill ; CHECK-NEXT: vldr d0, .LCPI12_0 ; CHECK-NEXT: vmov r5, r4, d4 -; CHECK-NEXT: str r0, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: vmov.f64 d10, d3 ; CHECK-NEXT: vmov r2, r3, d0 -; CHECK-NEXT: vmov.f32 s20, s6 -; CHECK-NEXT: vmov.f32 s18, s4 -; CHECK-NEXT: vmov.f32 s22, s2 -; CHECK-NEXT: vmov.f32 s21, s7 -; CHECK-NEXT: vmov.f32 s19, s5 -; CHECK-NEXT: vmov.f32 s23, s3 +; CHECK-NEXT: vmov.f64 d9, d2 +; CHECK-NEXT: vmov.f64 d11, d1 ; CHECK-NEXT: mov r0, r5 ; CHECK-NEXT: mov r1, r4 ; CHECK-NEXT: strd r2, r3, [sp, #20] @ 8-byte Folded Spill @@ -1000,25 +994,20 @@ define arm_aapcs_vfpcc <6 x i32> @test_signed_v6f64_v6i32(<6 x double> %f) { ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12} ; CHECK-NEXT: .pad #40 ; CHECK-NEXT: sub sp, #40 -; CHECK-NEXT: vmov.f32 s16, s0 +; CHECK-NEXT: vmov.f64 d8, d0 ; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill -; CHECK-NEXT: vmov.f32 s17, s1 ; CHECK-NEXT: vldr d0, .LCPI13_0 ; CHECK-NEXT: vmov r9, r4, d5 +; CHECK-NEXT: vmov.f64 d11, d4 ; CHECK-NEXT: vmov r2, r6, d0 -; CHECK-NEXT: vmov.f32 s22, s8 -; CHECK-NEXT: vmov.f32 s20, s6 -; CHECK-NEXT: vmov.f32 s18, s4 -; CHECK-NEXT: vmov.f32 s24, s2 -; CHECK-NEXT: vmov.f32 s23, s9 -; CHECK-NEXT: vmov.f32 s21, s7 -; CHECK-NEXT: vmov.f32 s19, s5 -; CHECK-NEXT: vmov.f32 s25, s3 -; CHECK-NEXT: str r2, [sp, #24] @ 4-byte Spill +; CHECK-NEXT: vmov.f64 d10, d3 +; CHECK-NEXT: vmov.f64 d9, d2 +; CHECK-NEXT: vmov.f64 d12, d1 ; CHECK-NEXT: mov r0, r9 ; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: mov r3, r6 +; CHECK-NEXT: str r2, [sp, #24] @ 4-byte Spill ; CHECK-NEXT: str r6, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: mov r3, r6 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: vldr d0, .LCPI13_1 ; CHECK-NEXT: mov r1, r4 diff --git a/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll b/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll index 8ea12bd..0fdd4a2 100644 --- a/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll +++ b/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll @@ -432,19 +432,17 @@ define arm_aapcs_vfpcc <3 x i32> @test_unsigned_v3f64_v3i32(<3 x double> %f) { ; CHECK-NEXT: vpush {d8, d9} ; CHECK-NEXT: .pad #24 ; CHECK-NEXT: sub sp, #24 -; CHECK-NEXT: vmov.f32 s18, s0 -; CHECK-NEXT: vmov.f32 s19, s1 +; CHECK-NEXT: vmov.f64 d9, d0 ; CHECK-NEXT: vldr d0, .LCPI10_0 ; CHECK-NEXT: vmov r4, r5, d1 +; CHECK-NEXT: vmov.f64 d8, d2 ; CHECK-NEXT: vmov r9, r7, d0 -; CHECK-NEXT: vmov.f32 s16, s4 -; CHECK-NEXT: vmov.f32 s17, s5 -; CHECK-NEXT: str.w r9, [sp, #8] @ 4-byte Spill ; CHECK-NEXT: mov r0, r4 ; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: str.w r9, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: str r7, [sp, #12] @ 4-byte Spill ; CHECK-NEXT: mov r2, r9 ; CHECK-NEXT: mov r3, r7 -; CHECK-NEXT: str r7, [sp, #12] @ 4-byte Spill ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: vldr d0, .LCPI10_1 ; CHECK-NEXT: mov r1, r5 @@ -665,19 +663,15 @@ define arm_aapcs_vfpcc <5 x i32> @test_unsigned_v5f64_v5i32(<5 x double> %f) { ; CHECK-NEXT: vpush {d8, d9, d10, d11} ; CHECK-NEXT: .pad #40 ; CHECK-NEXT: sub sp, #40 -; CHECK-NEXT: vmov.f32 s16, s0 +; CHECK-NEXT: vmov.f64 d8, d0 ; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: vmov.f32 s17, s1 +; CHECK-NEXT: str r0, [sp, #28] @ 4-byte Spill ; CHECK-NEXT: vldr d0, .LCPI12_0 ; CHECK-NEXT: vmov r5, r6, d4 -; CHECK-NEXT: str r0, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: vmov.f64 d10, d3 ; CHECK-NEXT: vmov r2, r3, d0 -; CHECK-NEXT: vmov.f32 s20, s6 -; CHECK-NEXT: vmov.f32 s18, s4 -; CHECK-NEXT: vmov.f32 s22, s2 -; CHECK-NEXT: vmov.f32 s21, s7 -; CHECK-NEXT: vmov.f32 s19, s5 -; CHECK-NEXT: vmov.f32 s23, s3 +; CHECK-NEXT: vmov.f64 d9, d2 +; CHECK-NEXT: vmov.f64 d11, d1 ; CHECK-NEXT: mov r0, r5 ; CHECK-NEXT: mov r1, r6 ; CHECK-NEXT: strd r2, r3, [sp, #32] @ 8-byte Folded Spill @@ -834,25 +828,20 @@ define arm_aapcs_vfpcc <6 x i32> @test_unsigned_v6f64_v6i32(<6 x double> %f) { ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12} ; CHECK-NEXT: .pad #40 ; CHECK-NEXT: sub sp, #40 -; CHECK-NEXT: vmov.f32 s16, s0 +; CHECK-NEXT: vmov.f64 d8, d0 ; CHECK-NEXT: str r0, [sp, #32] @ 4-byte Spill -; CHECK-NEXT: vmov.f32 s17, s1 ; CHECK-NEXT: vldr d0, .LCPI13_0 ; CHECK-NEXT: vmov r5, r6, d5 +; CHECK-NEXT: vmov.f64 d11, d4 ; CHECK-NEXT: vmov r11, r3, d0 -; CHECK-NEXT: vmov.f32 s22, s8 -; CHECK-NEXT: vmov.f32 s20, s6 -; CHECK-NEXT: vmov.f32 s18, s4 -; CHECK-NEXT: vmov.f32 s24, s2 -; CHECK-NEXT: vmov.f32 s23, s9 -; CHECK-NEXT: vmov.f32 s21, s7 -; CHECK-NEXT: vmov.f32 s19, s5 -; CHECK-NEXT: vmov.f32 s25, s3 -; CHECK-NEXT: str r3, [sp, #36] @ 4-byte Spill +; CHECK-NEXT: vmov.f64 d10, d3 +; CHECK-NEXT: vmov.f64 d9, d2 +; CHECK-NEXT: vmov.f64 d12, d1 ; CHECK-NEXT: mov r0, r5 ; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: mov r2, r11 +; CHECK-NEXT: str r3, [sp, #36] @ 4-byte Spill ; CHECK-NEXT: str.w r11, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: mov r2, r11 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: vldr d0, .LCPI13_1 ; CHECK-NEXT: mov r7, r0 diff --git a/llvm/test/CodeGen/Thumb2/mve-shuffle.ll b/llvm/test/CodeGen/Thumb2/mve-shuffle.ll index 93a0588..3f2310b 100644 --- a/llvm/test/CodeGen/Thumb2/mve-shuffle.ll +++ b/llvm/test/CodeGen/Thumb2/mve-shuffle.ll @@ -921,10 +921,8 @@ entry: define arm_aapcs_vfpcc <2 x i64> @shuffle2_i64(<2 x i64> %src) { ; CHECK-LABEL: shuffle2_i64: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.f32 s4, s2 -; CHECK-NEXT: vmov.f32 s6, s0 -; CHECK-NEXT: vmov.f32 s5, s3 -; CHECK-NEXT: vmov.f32 s7, s1 +; CHECK-NEXT: vmov.f64 d2, d1 +; CHECK-NEXT: vmov.f64 d3, d0 ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: @@ -1381,10 +1379,8 @@ entry: define arm_aapcs_vfpcc <2 x double> @shuffle2_f64(<2 x double> %src) { ; CHECK-LABEL: shuffle2_f64: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.f32 s4, s2 -; CHECK-NEXT: vmov.f32 s6, s0 -; CHECK-NEXT: vmov.f32 s5, s3 -; CHECK-NEXT: vmov.f32 s7, s1 +; CHECK-NEXT: vmov.f64 d2, d1 +; CHECK-NEXT: vmov.f64 d3, d0 ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: @@ -1404,12 +1400,9 @@ entry: define arm_aapcs_vfpcc <4 x double> @shuffle4_f64(<2 x double> %src1, <2 x double> %src2) { ; CHECK-LABEL: shuffle4_f64: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.f32 s8, s6 -; CHECK-NEXT: vmov.f32 s6, s0 -; CHECK-NEXT: vmov.f32 s9, s7 -; CHECK-NEXT: vmov.f32 s7, s1 -; CHECK-NEXT: vmov.f32 s10, s2 -; CHECK-NEXT: vmov.f32 s11, s3 +; CHECK-NEXT: vmov.f64 d4, d3 +; CHECK-NEXT: vmov.f64 d5, d1 +; CHECK-NEXT: vmov.f64 d3, d0 ; CHECK-NEXT: vmov q0, q2 ; CHECK-NEXT: bx lr entry: @@ -1419,14 +1412,10 @@ entry: define arm_aapcs_vfpcc <4 x double> @shuffle5_f64(<2 x double> %src1, <2 x double> %src2) { ; CHECK-LABEL: shuffle5_f64: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.f32 s8, s6 -; CHECK-NEXT: vmov.f32 s10, s4 -; CHECK-NEXT: vmov.f32 s4, s2 -; CHECK-NEXT: vmov.f32 s6, s0 -; CHECK-NEXT: vmov.f32 s9, s7 -; CHECK-NEXT: vmov.f32 s11, s5 -; CHECK-NEXT: vmov.f32 s5, s3 -; CHECK-NEXT: vmov.f32 s7, s1 +; CHECK-NEXT: vmov.f64 d4, d3 +; CHECK-NEXT: vmov.f64 d5, d2 +; CHECK-NEXT: vmov.f64 d2, d1 +; CHECK-NEXT: vmov.f64 d3, d0 ; CHECK-NEXT: vmov q0, q2 ; CHECK-NEXT: bx lr entry: @@ -1436,8 +1425,7 @@ entry: define arm_aapcs_vfpcc <2 x double> @shuffle6_f64(<2 x double> %src1, <2 x double> %src2) { ; CHECK-LABEL: shuffle6_f64: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.f32 s2, s6 -; CHECK-NEXT: vmov.f32 s3, s7 +; CHECK-NEXT: vmov.f64 d1, d3 ; CHECK-NEXT: bx lr entry: %out = shufflevector <2 x double> %src1, <2 x double> %src2, <2 x i32> @@ -1446,8 +1434,7 @@ entry: define arm_aapcs_vfpcc <2 x double> @shuffle7_f64(<2 x double> %src1, <2 x double> %src2) { ; CHECK-LABEL: shuffle7_f64: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.f32 s0, s6 -; CHECK-NEXT: vmov.f32 s1, s7 +; CHECK-NEXT: vmov.f64 d0, d3 ; CHECK-NEXT: bx lr entry: %out = shufflevector <2 x double> %src1, <2 x double> %src2, <2 x i32> @@ -1456,8 +1443,7 @@ entry: define arm_aapcs_vfpcc <2 x double> @shuffle8_f64(<2 x double> %src1, <2 x double> %src2) { ; CHECK-LABEL: shuffle8_f64: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.f32 s6, s2 -; CHECK-NEXT: vmov.f32 s7, s3 +; CHECK-NEXT: vmov.f64 d3, d1 ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: @@ -1470,19 +1456,13 @@ define arm_aapcs_vfpcc <8 x double> @shuffle9_f64(<4 x double> %src1, <4 x doubl ; CHECK-NEXT: .vsave {d8, d9, d10, d11} ; CHECK-NEXT: vpush {d8, d9, d10, d11} ; CHECK-NEXT: vmov q5, q2 -; CHECK-NEXT: vmov.f32 s16, s0 -; CHECK-NEXT: vmov.f32 s18, s20 -; CHECK-NEXT: vmov.f32 s20, s2 -; CHECK-NEXT: vmov.f32 s10, s12 -; CHECK-NEXT: vmov.f32 s19, s21 -; CHECK-NEXT: vmov.f32 s8, s4 -; CHECK-NEXT: vmov.f32 s17, s1 -; CHECK-NEXT: vmov.f32 s21, s3 +; CHECK-NEXT: vmov.f64 d8, d0 +; CHECK-NEXT: vmov.f64 d9, d10 +; CHECK-NEXT: vmov.f64 d10, d1 ; CHECK-NEXT: vmov q0, q4 -; CHECK-NEXT: vmov.f32 s12, s6 -; CHECK-NEXT: vmov.f32 s11, s13 -; CHECK-NEXT: vmov.f32 s9, s5 -; CHECK-NEXT: vmov.f32 s13, s7 +; CHECK-NEXT: vmov.f64 d5, d6 +; CHECK-NEXT: vmov.f64 d4, d2 +; CHECK-NEXT: vmov.f64 d6, d3 ; CHECK-NEXT: vmov q1, q5 ; CHECK-NEXT: vpop {d8, d9, d10, d11} ; CHECK-NEXT: bx lr @@ -1497,12 +1477,9 @@ entry: define arm_aapcs_vfpcc <4 x i64> @shuffle4_i64(<2 x i64> %src1, <2 x i64> %src2) { ; CHECK-LABEL: shuffle4_i64: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.f32 s8, s6 -; CHECK-NEXT: vmov.f32 s6, s0 -; CHECK-NEXT: vmov.f32 s9, s7 -; CHECK-NEXT: vmov.f32 s7, s1 -; CHECK-NEXT: vmov.f32 s10, s2 -; CHECK-NEXT: vmov.f32 s11, s3 +; CHECK-NEXT: vmov.f64 d4, d3 +; CHECK-NEXT: vmov.f64 d5, d1 +; CHECK-NEXT: vmov.f64 d3, d0 ; CHECK-NEXT: vmov q0, q2 ; CHECK-NEXT: bx lr entry: @@ -1512,14 +1489,10 @@ entry: define arm_aapcs_vfpcc <4 x i64> @shuffle5_i64(<2 x i64> %src1, <2 x i64> %src2) { ; CHECK-LABEL: shuffle5_i64: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.f32 s8, s6 -; CHECK-NEXT: vmov.f32 s10, s4 -; CHECK-NEXT: vmov.f32 s4, s2 -; CHECK-NEXT: vmov.f32 s6, s0 -; CHECK-NEXT: vmov.f32 s9, s7 -; CHECK-NEXT: vmov.f32 s11, s5 -; CHECK-NEXT: vmov.f32 s5, s3 -; CHECK-NEXT: vmov.f32 s7, s1 +; CHECK-NEXT: vmov.f64 d4, d3 +; CHECK-NEXT: vmov.f64 d5, d2 +; CHECK-NEXT: vmov.f64 d2, d1 +; CHECK-NEXT: vmov.f64 d3, d0 ; CHECK-NEXT: vmov q0, q2 ; CHECK-NEXT: bx lr entry: @@ -1529,8 +1502,7 @@ entry: define arm_aapcs_vfpcc <2 x i64> @shuffle6_i64(<2 x i64> %src1, <2 x i64> %src2) { ; CHECK-LABEL: shuffle6_i64: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.f32 s2, s6 -; CHECK-NEXT: vmov.f32 s3, s7 +; CHECK-NEXT: vmov.f64 d1, d3 ; CHECK-NEXT: bx lr entry: %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> @@ -1539,8 +1511,7 @@ entry: define arm_aapcs_vfpcc <2 x i64> @shuffle7_i64(<2 x i64> %src1, <2 x i64> %src2) { ; CHECK-LABEL: shuffle7_i64: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.f32 s0, s6 -; CHECK-NEXT: vmov.f32 s1, s7 +; CHECK-NEXT: vmov.f64 d0, d3 ; CHECK-NEXT: bx lr entry: %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> @@ -1549,8 +1520,7 @@ entry: define arm_aapcs_vfpcc <2 x i64> @shuffle8_i64(<2 x i64> %src1, <2 x i64> %src2) { ; CHECK-LABEL: shuffle8_i64: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.f32 s6, s2 -; CHECK-NEXT: vmov.f32 s7, s3 +; CHECK-NEXT: vmov.f64 d3, d1 ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: @@ -1563,19 +1533,13 @@ define arm_aapcs_vfpcc <8 x i64> @shuffle9_i64(<4 x i64> %src1, <4 x i64> %src2) ; CHECK-NEXT: .vsave {d8, d9, d10, d11} ; CHECK-NEXT: vpush {d8, d9, d10, d11} ; CHECK-NEXT: vmov q5, q2 -; CHECK-NEXT: vmov.f32 s16, s0 -; CHECK-NEXT: vmov.f32 s18, s20 -; CHECK-NEXT: vmov.f32 s20, s2 -; CHECK-NEXT: vmov.f32 s10, s12 -; CHECK-NEXT: vmov.f32 s19, s21 -; CHECK-NEXT: vmov.f32 s8, s4 -; CHECK-NEXT: vmov.f32 s17, s1 -; CHECK-NEXT: vmov.f32 s21, s3 +; CHECK-NEXT: vmov.f64 d8, d0 +; CHECK-NEXT: vmov.f64 d9, d10 +; CHECK-NEXT: vmov.f64 d10, d1 ; CHECK-NEXT: vmov q0, q4 -; CHECK-NEXT: vmov.f32 s12, s6 -; CHECK-NEXT: vmov.f32 s11, s13 -; CHECK-NEXT: vmov.f32 s9, s5 -; CHECK-NEXT: vmov.f32 s13, s7 +; CHECK-NEXT: vmov.f64 d5, d6 +; CHECK-NEXT: vmov.f64 d4, d2 +; CHECK-NEXT: vmov.f64 d6, d3 ; CHECK-NEXT: vmov q1, q5 ; CHECK-NEXT: vpop {d8, d9, d10, d11} ; CHECK-NEXT: bx lr @@ -1810,8 +1774,7 @@ entry: define arm_aapcs_vfpcc double @extract_f64_1(<2 x double> %a) { ; CHECK-LABEL: extract_f64_1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.f32 s0, s2 -; CHECK-NEXT: vmov.f32 s1, s3 +; CHECK-NEXT: vmov.f64 d0, d1 ; CHECK-NEXT: bx lr entry: %res = extractelement <2 x double> %a, i32 1 diff --git a/llvm/test/CodeGen/Thumb2/mve-shufflemov.ll b/llvm/test/CodeGen/Thumb2/mve-shufflemov.ll index 6ce7550..4812ae5 100644 --- a/llvm/test/CodeGen/Thumb2/mve-shufflemov.ll +++ b/llvm/test/CodeGen/Thumb2/mve-shufflemov.ll @@ -7,10 +7,8 @@ define arm_aapcs_vfpcc <8 x i16> @shuffle_i16_45670123(<8 x i16> %s1, <8 x i16> %s2) { ; CHECK-LABEL: shuffle_i16_45670123: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.f32 s4, s2 -; CHECK-NEXT: vmov.f32 s6, s0 -; CHECK-NEXT: vmov.f32 s5, s3 -; CHECK-NEXT: vmov.f32 s7, s1 +; CHECK-NEXT: vmov.f64 d2, d1 +; CHECK-NEXT: vmov.f64 d3, d0 ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: @@ -58,8 +56,7 @@ entry: define arm_aapcs_vfpcc <8 x i16> @shuffle_i16_0123cdef(<8 x i16> %s1, <8 x i16> %s2) { ; CHECK-LABEL: shuffle_i16_0123cdef: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.f32 s2, s6 -; CHECK-NEXT: vmov.f32 s3, s7 +; CHECK-NEXT: vmov.f64 d1, d3 ; CHECK-NEXT: bx lr entry: %out = shufflevector <8 x i16> %s1, <8 x i16> %s2, <8 x i32> @@ -278,10 +275,8 @@ entry: define arm_aapcs_vfpcc <8 x half> @shuffle_f16_45670123(<8 x half> %s1, <8 x half> %s2) { ; CHECK-LABEL: shuffle_f16_45670123: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.f32 s4, s2 -; CHECK-NEXT: vmov.f32 s6, s0 -; CHECK-NEXT: vmov.f32 s5, s3 -; CHECK-NEXT: vmov.f32 s7, s1 +; CHECK-NEXT: vmov.f64 d2, d1 +; CHECK-NEXT: vmov.f64 d3, d0 ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: @@ -329,8 +324,7 @@ entry: define arm_aapcs_vfpcc <8 x half> @shuffle_f16_0123cdef(<8 x half> %s1, <8 x half> %s2) { ; CHECK-LABEL: shuffle_f16_0123cdef: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.f32 s2, s6 -; CHECK-NEXT: vmov.f32 s3, s7 +; CHECK-NEXT: vmov.f64 d1, d3 ; CHECK-NEXT: bx lr entry: %out = shufflevector <8 x half> %s1, <8 x half> %s2, <8 x i32> diff --git a/llvm/test/CodeGen/Thumb2/mve-vdup.ll b/llvm/test/CodeGen/Thumb2/mve-vdup.ll index 9ba3866..b87cc83 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vdup.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vdup.ll @@ -208,8 +208,7 @@ entry: define arm_aapcs_vfpcc <2 x double> @vdup_f64(double %src) { ; CHECK-LABEL: vdup_f64: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.f32 s2, s0 -; CHECK-NEXT: vmov.f32 s3, s1 +; CHECK-NEXT: vmov.f64 d1, d0 ; CHECK-NEXT: bx lr entry: %0 = insertelement <2 x double> undef, double %src, i32 0 @@ -279,8 +278,7 @@ entry: define arm_aapcs_vfpcc <2 x i64> @vduplane_i64(<2 x i64> %src) { ; CHECK-LABEL: vduplane_i64: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.f32 s0, s2 -; CHECK-NEXT: vmov.f32 s1, s3 +; CHECK-NEXT: vmov.f64 d0, d1 ; CHECK-NEXT: bx lr entry: %out = shufflevector <2 x i64> %src, <2 x i64> undef, <2 x i32> @@ -328,8 +326,7 @@ entry: define arm_aapcs_vfpcc <2 x double> @vduplane_f64(<2 x double> %src) { ; CHECK-LABEL: vduplane_f64: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.f32 s0, s2 -; CHECK-NEXT: vmov.f32 s1, s3 +; CHECK-NEXT: vmov.f64 d0, d1 ; CHECK-NEXT: bx lr entry: %out = shufflevector <2 x double> %src, <2 x double> undef, <2 x i32> @@ -506,8 +503,7 @@ define arm_aapcs_vfpcc <2 x i64> @bitcast_i64_v2f64(i64 %a) { define arm_aapcs_vfpcc <2 x i64> @bitcast_v2f64_v2i64(<2 x double> %a) { ; CHECK-LABEL: bitcast_v2f64_v2i64: ; CHECK: @ %bb.0: -; CHECK-NEXT: vmov.f32 s2, s0 -; CHECK-NEXT: vmov.f32 s3, s1 +; CHECK-NEXT: vmov.f64 d1, d0 ; CHECK-NEXT: bx lr %b = bitcast <2 x double> %a to <2 x i64> %r = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer @@ -517,8 +513,7 @@ define arm_aapcs_vfpcc <2 x i64> @bitcast_v2f64_v2i64(<2 x double> %a) { define arm_aapcs_vfpcc <2 x i64> @bitcast_v8i16_v2i64(<8 x i16> %a) { ; CHECK-LABEL: bitcast_v8i16_v2i64: ; CHECK: @ %bb.0: -; CHECK-NEXT: vmov.f32 s2, s0 -; CHECK-NEXT: vmov.f32 s3, s1 +; CHECK-NEXT: vmov.f64 d1, d0 ; CHECK-NEXT: bx lr %b = bitcast <8 x i16> %a to <2 x i64> %r = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/Thumb2/mve-vmovn.ll b/llvm/test/CodeGen/Thumb2/mve-vmovn.ll index b005cb9..93848aa 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vmovn.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vmovn.ll @@ -192,14 +192,12 @@ entry: define arm_aapcs_vfpcc <2 x i64> @vmovn64_t1(<2 x i64> %src1, <2 x i64> %src2) { ; CHECK-LABEL: vmovn64_t1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.f32 s2, s4 -; CHECK-NEXT: vmov.f32 s3, s5 +; CHECK-NEXT: vmov.f64 d1, d2 ; CHECK-NEXT: bx lr ; ; CHECKBE-LABEL: vmovn64_t1: ; CHECKBE: @ %bb.0: @ %entry -; CHECKBE-NEXT: vmov.f32 s2, s4 -; CHECKBE-NEXT: vmov.f32 s3, s5 +; CHECKBE-NEXT: vmov.f64 d1, d2 ; CHECKBE-NEXT: bx lr entry: %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> @@ -209,15 +207,13 @@ entry: define arm_aapcs_vfpcc <2 x i64> @vmovn64_t2(<2 x i64> %src1, <2 x i64> %src2) { ; CHECK-LABEL: vmovn64_t2: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.f32 s6, s0 -; CHECK-NEXT: vmov.f32 s7, s1 +; CHECK-NEXT: vmov.f64 d3, d0 ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr ; ; CHECKBE-LABEL: vmovn64_t2: ; CHECKBE: @ %bb.0: @ %entry -; CHECKBE-NEXT: vmov.f32 s6, s0 -; CHECKBE-NEXT: vmov.f32 s7, s1 +; CHECKBE-NEXT: vmov.f64 d3, d0 ; CHECKBE-NEXT: vmov q0, q1 ; CHECKBE-NEXT: bx lr entry: @@ -228,14 +224,12 @@ entry: define arm_aapcs_vfpcc <2 x i64> @vmovn64_b1(<2 x i64> %src1, <2 x i64> %src2) { ; CHECK-LABEL: vmovn64_b1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.f32 s2, s6 -; CHECK-NEXT: vmov.f32 s3, s7 +; CHECK-NEXT: vmov.f64 d1, d3 ; CHECK-NEXT: bx lr ; ; CHECKBE-LABEL: vmovn64_b1: ; CHECKBE: @ %bb.0: @ %entry -; CHECKBE-NEXT: vmov.f32 s2, s6 -; CHECKBE-NEXT: vmov.f32 s3, s7 +; CHECKBE-NEXT: vmov.f64 d1, d3 ; CHECKBE-NEXT: bx lr entry: %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> @@ -245,19 +239,15 @@ entry: define arm_aapcs_vfpcc <2 x i64> @vmovn64_b2(<2 x i64> %src1, <2 x i64> %src2) { ; CHECK-LABEL: vmovn64_b2: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.f32 s4, s6 -; CHECK-NEXT: vmov.f32 s6, s0 -; CHECK-NEXT: vmov.f32 s5, s7 -; CHECK-NEXT: vmov.f32 s7, s1 +; CHECK-NEXT: vmov.f64 d2, d3 +; CHECK-NEXT: vmov.f64 d3, d0 ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr ; ; CHECKBE-LABEL: vmovn64_b2: ; CHECKBE: @ %bb.0: @ %entry -; CHECKBE-NEXT: vmov.f32 s4, s6 -; CHECKBE-NEXT: vmov.f32 s6, s0 -; CHECKBE-NEXT: vmov.f32 s5, s7 -; CHECKBE-NEXT: vmov.f32 s7, s1 +; CHECKBE-NEXT: vmov.f64 d2, d3 +; CHECKBE-NEXT: vmov.f64 d3, d0 ; CHECKBE-NEXT: vmov q0, q1 ; CHECKBE-NEXT: bx lr entry: @@ -268,18 +258,14 @@ entry: define arm_aapcs_vfpcc <2 x i64> @vmovn64_b3(<2 x i64> %src1, <2 x i64> %src2) { ; CHECK-LABEL: vmovn64_b3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.f32 s0, s2 -; CHECK-NEXT: vmov.f32 s2, s4 -; CHECK-NEXT: vmov.f32 s1, s3 -; CHECK-NEXT: vmov.f32 s3, s5 +; CHECK-NEXT: vmov.f64 d0, d1 +; CHECK-NEXT: vmov.f64 d1, d2 ; CHECK-NEXT: bx lr ; ; CHECKBE-LABEL: vmovn64_b3: ; CHECKBE: @ %bb.0: @ %entry -; CHECKBE-NEXT: vmov.f32 s0, s2 -; CHECKBE-NEXT: vmov.f32 s2, s4 -; CHECKBE-NEXT: vmov.f32 s1, s3 -; CHECKBE-NEXT: vmov.f32 s3, s5 +; CHECKBE-NEXT: vmov.f64 d0, d1 +; CHECKBE-NEXT: vmov.f64 d1, d2 ; CHECKBE-NEXT: bx lr entry: %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> @@ -289,15 +275,13 @@ entry: define arm_aapcs_vfpcc <2 x i64> @vmovn64_b4(<2 x i64> %src1, <2 x i64> %src2) { ; CHECK-LABEL: vmovn64_b4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.f32 s6, s2 -; CHECK-NEXT: vmov.f32 s7, s3 +; CHECK-NEXT: vmov.f64 d3, d1 ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr ; ; CHECKBE-LABEL: vmovn64_b4: ; CHECKBE: @ %bb.0: @ %entry -; CHECKBE-NEXT: vmov.f32 s6, s2 -; CHECKBE-NEXT: vmov.f32 s7, s3 +; CHECKBE-NEXT: vmov.f64 d3, d1 ; CHECKBE-NEXT: vmov q0, q1 ; CHECKBE-NEXT: bx lr entry: diff --git a/llvm/test/CodeGen/Thumb2/mve-vmovnstore.ll b/llvm/test/CodeGen/Thumb2/mve-vmovnstore.ll index 5850ad1..2428cd8 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vmovnstore.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vmovnstore.ll @@ -110,8 +110,7 @@ entry: define arm_aapcs_vfpcc void @vmovn64_t1(<2 x i64> %src1, <2 x i64> %src2, ptr %dest) { ; CHECK-LABEL: vmovn64_t1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.f32 s2, s4 -; CHECK-NEXT: vmov.f32 s3, s5 +; CHECK-NEXT: vmov.f64 d1, d2 ; CHECK-NEXT: vstrw.32 q0, [r0] ; CHECK-NEXT: bx lr entry: @@ -123,8 +122,7 @@ entry: define arm_aapcs_vfpcc void @vmovn64_t2(<2 x i64> %src1, <2 x i64> %src2, ptr %dest) { ; CHECK-LABEL: vmovn64_t2: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.f32 s6, s0 -; CHECK-NEXT: vmov.f32 s7, s1 +; CHECK-NEXT: vmov.f64 d3, d0 ; CHECK-NEXT: vstrw.32 q1, [r0] ; CHECK-NEXT: bx lr entry: @@ -136,8 +134,7 @@ entry: define arm_aapcs_vfpcc void @vmovn64_b1(<2 x i64> %src1, <2 x i64> %src2, ptr %dest) { ; CHECK-LABEL: vmovn64_b1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.f32 s2, s6 -; CHECK-NEXT: vmov.f32 s3, s7 +; CHECK-NEXT: vmov.f64 d1, d3 ; CHECK-NEXT: vstrw.32 q0, [r0] ; CHECK-NEXT: bx lr entry: @@ -149,10 +146,8 @@ entry: define arm_aapcs_vfpcc void @vmovn64_b2(<2 x i64> %src1, <2 x i64> %src2, ptr %dest) { ; CHECK-LABEL: vmovn64_b2: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.f32 s4, s6 -; CHECK-NEXT: vmov.f32 s6, s0 -; CHECK-NEXT: vmov.f32 s5, s7 -; CHECK-NEXT: vmov.f32 s7, s1 +; CHECK-NEXT: vmov.f64 d2, d3 +; CHECK-NEXT: vmov.f64 d3, d0 ; CHECK-NEXT: vstrw.32 q1, [r0] ; CHECK-NEXT: bx lr entry: @@ -164,10 +159,8 @@ entry: define arm_aapcs_vfpcc void @vmovn64_b3(<2 x i64> %src1, <2 x i64> %src2, ptr %dest) { ; CHECK-LABEL: vmovn64_b3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.f32 s0, s2 -; CHECK-NEXT: vmov.f32 s2, s4 -; CHECK-NEXT: vmov.f32 s1, s3 -; CHECK-NEXT: vmov.f32 s3, s5 +; CHECK-NEXT: vmov.f64 d0, d1 +; CHECK-NEXT: vmov.f64 d1, d2 ; CHECK-NEXT: vstrw.32 q0, [r0] ; CHECK-NEXT: bx lr entry: @@ -179,8 +172,7 @@ entry: define arm_aapcs_vfpcc void @vmovn64_b4(<2 x i64> %src1, <2 x i64> %src2, ptr %dest) { ; CHECK-LABEL: vmovn64_b4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.f32 s6, s2 -; CHECK-NEXT: vmov.f32 s7, s3 +; CHECK-NEXT: vmov.f64 d3, d1 ; CHECK-NEXT: vstrw.32 q1, [r0] ; CHECK-NEXT: bx lr entry: diff --git a/llvm/test/CodeGen/Thumb2/vmovdrroffset.ll b/llvm/test/CodeGen/Thumb2/vmovdrroffset.ll index 9d0c9c0..56f1ead 100644 --- a/llvm/test/CodeGen/Thumb2/vmovdrroffset.ll +++ b/llvm/test/CodeGen/Thumb2/vmovdrroffset.ll @@ -13,10 +13,8 @@ define arm_aapcs_vfpcc double @zero(double %a, double %b, double %c) { ; CHECK-NEXT: vpush {d8, d9} ; CHECK-NEXT: .pad #8 ; CHECK-NEXT: sub sp, #8 -; CHECK-NEXT: vmov.f32 s16, s2 -; CHECK-NEXT: vmov.f32 s18, s0 -; CHECK-NEXT: vmov.f32 s17, s3 -; CHECK-NEXT: vmov.f32 s19, s1 +; CHECK-NEXT: vmov.f64 d8, d1 +; CHECK-NEXT: vmov.f64 d9, d0 ; CHECK-NEXT: bl sqrt ; CHECK-NEXT: vmov r2, r3, d8 ; CHECK-NEXT: vmov r0, r1, d0 -- 2.7.4