From 7eb95d672dbf302a422ae4dbb24dd7cc583b65df Mon Sep 17 00:00:00 2001
From: Sjoerd Meijer <sjoerd.meijer@arm.com>
Date: Thu, 30 May 2019 12:37:05 +0000
Subject: [PATCH] [ARM] Introduce separate features for FP registers

The MVE extension in Arm v8.1-M permits the use of some move, load and
store isntructions which access the FP registers, even if there's no
actual FP support in the processor (in particular, if you have the
integer-only version of MVE).

Therefore, we need separate subtarget features to condition those
instructions on, which are implied by both FP and MVE but are not part
of either.

Patch mostly by Simon Tatham.

Differential Revision: https://reviews.llvm.org/D60694

llvm-svn: 362088
---
 llvm/lib/Target/ARM/ARM.td             |  25 ++++++-
 llvm/lib/Target/ARM/ARMInstrNEON.td    |   2 +-
 llvm/lib/Target/ARM/ARMInstrVFP.td     |  45 +++++++----
 llvm/lib/Target/ARM/ARMPredicates.td   |   9 +++
 llvm/lib/Target/ARM/ARMSubtarget.h     |   6 ++
 llvm/test/MC/ARM/fullfp16-neg.s        |  20 ++---
 llvm/test/MC/ARM/mve-fp-registers.s    | 133 +++++++++++++++++++++++++++++++++
 llvm/test/MC/ARM/single-precision-fp.s |   4 +-
 llvm/test/MC/ARM/vmrs_vmsr.s           |  12 +--
 9 files changed, 220 insertions(+), 36 deletions(-)
 create mode 100644 llvm/test/MC/ARM/mve-fp-registers.s
diff --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td
index 9af350c..62cd79c 100644
--- a/llvm/lib/Target/ARM/ARM.td
+++ b/llvm/lib/Target/ARM/ARM.td
@@ -32,9 +32,26 @@ def ModeSoftFloat         : SubtargetFeature<"soft-float","UseSoftFloat",
 //
 
 // Floating Point, HW Division and Neon Support
+
+// FP loads/stores/moves, shared between VFP and MVE (even in the integer-only
+// version).
+def FeatureFPRegs         : SubtargetFeature<"fpregs", "HasFPRegs", "true",
+                                             "Enable FP registers">;
+
+// 16-bit FP loads/stores/moves, shared between VFP (with the v8.2A FP16
+// extension) and MVE (even in the integer-only version).
+def FeatureFPRegs16       : SubtargetFeature<"fpregs16", "HasFPRegs16", "true",
+                                             "Enable 16-bit FP registers",
+                                             [FeatureFPRegs]>;
+
+def FeatureFPRegs64       : SubtargetFeature<"fpregs64", "HasFPRegs64", "true",
+                                             "Enable 64-bit FP registers",
+                                             [FeatureFPRegs]>;
+
 def FeatureFP64           : SubtargetFeature<"fp64", "HasFP64", "true",
                                              "Floating point unit supports "
-                                             "double precision">;
+                                             "double precision",
+                                             [FeatureFPRegs64]>;
 
 def FeatureD32            : SubtargetFeature<"d32", "HasD32", "true",
                                              "Extend FP to 32 double registers">;
@@ -63,7 +80,9 @@ multiclass VFPver<string name, string query, string description,
         !cast<SubtargetFeature>(NAME # "_SP")]>;
 }
 
-defm FeatureVFP2: VFPver<"vfp2", "HasVFPv2", "Enable VFP2 instructions">;
+defm FeatureVFP2: VFPver<"vfp2", "HasVFPv2", "Enable VFP2 instructions",
+                         [], [FeatureFPRegs]>;
+
 defm FeatureVFP3: VFPver<"vfp3", "HasVFPv3", "Enable VFP3 instructions",
                          [FeatureVFP2]>;
 
@@ -84,7 +103,7 @@ defm FeatureFPARMv8: VFPver<"fp-armv8", "HasFPARMv8", "Enable ARMv8 FP",
 def FeatureFullFP16       : SubtargetFeature<"fullfp16", "HasFullFP16", "true",
                                              "Enable full half-precision "
                                              "floating point",
-                                             [FeatureFPARMv8_D16_SP]>;
+                                             [FeatureFPARMv8_D16_SP, FeatureFPRegs16]>;
 
 def FeatureFP16FML        : SubtargetFeature<"fp16fml", "HasFP16FML", "true",
                                              "Enable full half-precision "
diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td
index 0c4e765..3aa4431 100644
--- a/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -6179,7 +6179,7 @@ def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00,
                           IIC_VMOVSI, "vmov", "32", "$R, $V$lane",
                           [(set GPR:$R, (extractelt (v2i32 DPR:$V),
                                            imm:$lane))]>,
-                Requires<[HasVFP2, HasFastVGETLNi32]> {
+                Requires<[HasFPRegs, HasFastVGETLNi32]> {
   let Inst{21} = lane{0};
 }
 // def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td
diff --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td
index d1b32f5..88405a8 100644
--- a/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -141,11 +141,13 @@ let canFoldAsLoad = 1, isReMaterializable = 1 in {
 
 def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$Dd), (ins addrmode5:$addr),
                  IIC_fpLoad64, "vldr", "\t$Dd, $addr",
-                 [(set DPR:$Dd, (f64 (alignedload32 addrmode5:$addr)))]>;
+                 [(set DPR:$Dd, (f64 (alignedload32 addrmode5:$addr)))]>,
+            Requires<[HasFPRegs]>;
 
 def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr),
                  IIC_fpLoad32, "vldr", "\t$Sd, $addr",
-                 [(set SPR:$Sd, (alignedload32 addrmode5:$addr))]> {
+                 [(set SPR:$Sd, (alignedload32 addrmode5:$addr))]>,
+            Requires<[HasFPRegs]> {
   // Some single precision VFP instructions may be executed on both NEON and VFP
   // pipelines.
   let D = VFPNeonDomain;
@@ -155,17 +157,19 @@ let isUnpredicable = 1 in
 def VLDRH : AHI5<0b1101, 0b01, (outs HPR:$Sd), (ins addrmode5fp16:$addr),
                  IIC_fpLoad16, "vldr", ".16\t$Sd, $addr",
                  [(set HPR:$Sd, (alignedload16 addrmode5fp16:$addr))]>,
-            Requires<[HasFullFP16]>;
+            Requires<[HasFPRegs16]>;
 
 } // End of 'let canFoldAsLoad = 1, isReMaterializable = 1 in'
 
 def VSTRD : ADI5<0b1101, 0b00, (outs), (ins DPR:$Dd, addrmode5:$addr),
                  IIC_fpStore64, "vstr", "\t$Dd, $addr",
-                 [(alignedstore32 (f64 DPR:$Dd), addrmode5:$addr)]>;
+                 [(alignedstore32 (f64 DPR:$Dd), addrmode5:$addr)]>,
+            Requires<[HasFPRegs]>;
 
 def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$Sd, addrmode5:$addr),
                  IIC_fpStore32, "vstr", "\t$Sd, $addr",
-                 [(alignedstore32 SPR:$Sd, addrmode5:$addr)]> {
+                 [(alignedstore32 SPR:$Sd, addrmode5:$addr)]>,
+            Requires<[HasFPRegs]> {
   // Some single precision VFP instructions may be executed on both NEON and VFP
   // pipelines.
   let D = VFPNeonDomain;
@@ -175,7 +179,7 @@ let isUnpredicable = 1 in
 def VSTRH : AHI5<0b1101, 0b00, (outs), (ins HPR:$Sd, addrmode5fp16:$addr),
                  IIC_fpStore16, "vstr", ".16\t$Sd, $addr",
                  [(alignedstore16 HPR:$Sd, addrmode5fp16:$addr)]>,
-            Requires<[HasFullFP16]>;
+            Requires<[HasFPRegs16]>;
 
 //===----------------------------------------------------------------------===//
 //  Load / store multiple Instructions.
@@ -183,6 +187,7 @@ def VSTRH : AHI5<0b1101, 0b00, (outs), (ins HPR:$Sd, addrmode5fp16:$addr),
 
 multiclass vfp_ldst_mult<string asm, bit L_bit,
                          InstrItinClass itin, InstrItinClass itin_upd> {
+  let Predicates = [HasFPRegs] in {
   // Double Precision
   def DIA :
     AXDI4<(outs), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops),
@@ -250,6 +255,7 @@ multiclass vfp_ldst_mult<string asm, bit L_bit,
     // VFP pipelines.
     let D = VFPNeonDomain;
   }
+  }
 }
 
 let hasSideEffects = 0 in {
@@ -318,6 +324,7 @@ defm : VFPDTAnyInstAlias<"vpop${p}", "$r",
 // However, there is no UAL syntax for them, so we keep them around for
 // (dis)assembly only.
 multiclass vfp_ldstx_mult<string asm, bit L_bit> {
+  let Predicates = [HasFPRegs] in {
   // Unknown precision
   def XIA :
     AXXI4<(outs), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops),
@@ -340,6 +347,7 @@ multiclass vfp_ldstx_mult<string asm, bit L_bit> {
     let Inst{21}    = 1;            // Writeback
     let Inst{20}    = L_bit;
   }
+  }
 }
 
 defm FLDM : vfp_ldstx_mult<"fldm", 1>;
@@ -1031,11 +1039,13 @@ let hasSideEffects = 0 in {
 let isMoveReg = 1 in {
 def VMOVD  : ADuI<0b11101, 0b11, 0b0000, 0b01, 0,
                   (outs DPR:$Dd), (ins DPR:$Dm),
-                  IIC_fpUNA64, "vmov", ".f64\t$Dd, $Dm", []>;
+                  IIC_fpUNA64, "vmov", ".f64\t$Dd, $Dm", []>,
+             Requires<[HasFPRegs64]>;
 
 def VMOVS  : ASuI<0b11101, 0b11, 0b0000, 0b01, 0,
                   (outs SPR:$Sd), (ins SPR:$Sm),
-                  IIC_fpUNA32, "vmov", ".f32\t$Sd, $Sm", []>;
+                  IIC_fpUNA32, "vmov", ".f32\t$Sd, $Sm", []>,
+             Requires<[HasFPRegs]>;
 } // isMoveReg
 
 let PostEncoderMethod = "", DecoderNamespace = "VFPV8", isUnpredicable = 1 in {
@@ -1060,6 +1070,7 @@ def VMOVRS : AVConv2I<0b11100001, 0b1010,
                       (outs GPR:$Rt), (ins SPR:$Sn),
                       IIC_fpMOVSI, "vmov", "\t$Rt, $Sn",
                       [(set GPR:$Rt, (bitconvert SPR:$Sn))]>,
+             Requires<[HasFPRegs]>,
              Sched<[WriteFPMOV]> {
   // Instruction operands.
   bits<4> Rt;
@@ -1083,7 +1094,7 @@ def VMOVSR : AVConv4I<0b11100000, 0b1010,
                       (outs SPR:$Sn), (ins GPR:$Rt),
                       IIC_fpMOVIS, "vmov", "\t$Sn, $Rt",
                       [(set SPR:$Sn, (bitconvert GPR:$Rt))]>,
-             Requires<[HasVFP2, UseVMOVSR]>,
+             Requires<[HasFPRegs, UseVMOVSR]>,
              Sched<[WriteFPMOV]> {
   // Instruction operands.
   bits<5> Sn;
@@ -1109,6 +1120,7 @@ def VMOVRRD  : AVConv3I<0b11000101, 0b1011,
                         (outs GPR:$Rt, GPR:$Rt2), (ins DPR:$Dm),
                         IIC_fpMOVDI, "vmov", "\t$Rt, $Rt2, $Dm",
                  [(set GPR:$Rt, GPR:$Rt2, (arm_fmrrd DPR:$Dm))]>,
+               Requires<[HasFPRegs]>,
                Sched<[WriteFPMOV]> {
   // Instruction operands.
   bits<5> Dm;
@@ -1137,6 +1149,7 @@ def VMOVRRS  : AVConv3I<0b11000101, 0b1010,
                       (outs GPR:$Rt, GPR:$Rt2), (ins SPR:$src1, SPR:$src2),
                  IIC_fpMOVDI, "vmov", "\t$Rt, $Rt2, $src1, $src2",
                  [/* For disassembly only; pattern left blank */]>,
+               Requires<[HasFPRegs]>,
                Sched<[WriteFPMOV]> {
   bits<5> src1;
   bits<4> Rt;
@@ -1164,6 +1177,7 @@ def VMOVDRR : AVConv5I<0b11000100, 0b1011,
                       (outs DPR:$Dm), (ins GPR:$Rt, GPR:$Rt2),
                       IIC_fpMOVID, "vmov", "\t$Dm, $Rt, $Rt2",
                       [(set DPR:$Dm, (arm_fmdrr GPR:$Rt, GPR:$Rt2))]>,
+              Requires<[HasFPRegs]>,
               Sched<[WriteFPMOV]> {
   // Instruction operands.
   bits<5> Dm;
@@ -1208,6 +1222,7 @@ def VMOVSRR : AVConv5I<0b11000100, 0b1010,
                      (outs SPR:$dst1, SPR:$dst2), (ins GPR:$src1, GPR:$src2),
                 IIC_fpMOVID, "vmov", "\t$dst1, $dst2, $src1, $src2",
                 [/* For disassembly only; pattern left blank */]>,
+              Requires<[HasFPRegs]>,
               Sched<[WriteFPMOV]> {
   // Instruction operands.
   bits<5> dst1;
@@ -1234,7 +1249,7 @@ def VMOVRH : AVConv2I<0b11100001, 0b1001,
                       (outs GPR:$Rt), (ins HPR:$Sn),
                       IIC_fpMOVSI, "vmov", ".f16\t$Rt, $Sn",
                       [(set GPR:$Rt, (arm_vmovrh HPR:$Sn))]>,
-             Requires<[HasFullFP16]>,
+             Requires<[HasFPRegs16]>,
              Sched<[WriteFPMOV]> {
   // Instruction operands.
   bits<4> Rt;
@@ -1256,7 +1271,7 @@ def VMOVHR : AVConv4I<0b11100000, 0b1001,
                       (outs HPR:$Sn), (ins GPR:$Rt),
                       IIC_fpMOVIS, "vmov", ".f16\t$Sn, $Rt",
                       [(set HPR:$Sn, (arm_vmovhr GPR:$Rt))]>,
-             Requires<[HasFullFP16]>,
+             Requires<[HasFPRegs16]>,
              Sched<[WriteFPMOV]> {
   // Instruction operands.
   bits<5> Sn;
@@ -2286,13 +2301,14 @@ class MovFromVFP<bits<4> opc19_16, dag oops, dag iops, string opc, string asm,
 
 // APSR is the application level alias of CPSR. This FPSCR N, Z, C, V flags
 // to APSR.
-let Defs = [CPSR], Uses = [FPSCR_NZCV], Rt = 0b1111 /* apsr_nzcv */ in
+let Defs = [CPSR], Uses = [FPSCR_NZCV], Predicates = [HasFPRegs],
+    Rt = 0b1111 /* apsr_nzcv */ in
 def FMSTAT : MovFromVFP<0b0001 /* fpscr */, (outs), (ins),
                         "vmrs", "\tAPSR_nzcv, fpscr", [(arm_fmstat)]>;
 
 let DecoderMethod = "DecodeForVMRSandVMSR" in {
  // Application level FPSCR -> GPR
- let hasSideEffects = 1, Uses = [FPSCR] in
+ let hasSideEffects = 1, Uses = [FPSCR], Predicates = [HasFPRegs] in
  def VMRS :  MovFromVFP<0b0001 /* fpscr */, (outs GPRnopc:$Rt), (ins),
                         "vmrs", "\t$Rt, fpscr",
                         [(set GPRnopc:$Rt, (int_arm_get_fpscr))]>;
@@ -2341,6 +2357,7 @@ class MovToVFP<bits<4> opc19_16, dag oops, dag iops, string opc, string asm,
 
 let DecoderMethod = "DecodeForVMRSandVMSR" in {
  let Defs = [FPSCR] in {
+   let Predicates = [HasFPRegs] in
    // Application level GPR -> FPSCR
    def VMSR : MovToVFP<0b0001 /* fpscr */, (outs), (ins GPRnopc:$src),
                        "vmsr", "\tfpscr, $src",
@@ -2474,7 +2491,7 @@ def : VFP2DPInstAlias<"fcmpzd${p} $val", (VCMPZD DPR:$val, pred:$p)>;
 def : VFP2InstAlias<"fcmpzs${p} $val", (VCMPZS SPR:$val, pred:$p)>;
 
 
-def : VFP2InstAlias<"fmstat${p}", (FMSTAT pred:$p)>;
+def : InstAlias<"fmstat${p}", (FMSTAT pred:$p), 0>, Requires<[HasFPRegs]>;
 def : VFP2InstAlias<"fadds${p} $Sd, $Sn, $Sm",
                     (VADDS SPR:$Sd, SPR:$Sn, SPR:$Sm, pred:$p)>;
 def : VFP2DPInstAlias<"faddd${p} $Dd, $Dn, $Dm",
diff --git a/llvm/lib/Target/ARM/ARMPredicates.td b/llvm/lib/Target/ARM/ARMPredicates.td
index 37c3098..ab4ed39 100644
--- a/llvm/lib/Target/ARM/ARMPredicates.td
+++ b/llvm/lib/Target/ARM/ARMPredicates.td
@@ -26,6 +26,15 @@ def HasV8MBaseline   : Predicate<"Subtarget->hasV8MBaselineOps()">,
 def HasV8MMainline   : Predicate<"Subtarget->hasV8MMainlineOps()">,
                                  AssemblerPredicate<"HasV8MMainlineOps",
                                                     "armv8m.main">;
+def HasFPRegs        : Predicate<"Subtarget->hasFPRegs()">,
+                                 AssemblerPredicate<"FeatureFPRegs",
+                                                    "fp registers">;
+def HasFPRegs16      : Predicate<"Subtarget->hasFPRegs16()">,
+                                 AssemblerPredicate<"FeatureFPRegs16",
+                                                    "16-bit fp registers">;
+def HasFPRegs64      : Predicate<"Subtarget->hasFPRegs64()">,
+                                 AssemblerPredicate<"FeatureFPRegs64",
+                                                    "64-bit fp registers">;
 def HasV6T2          : Predicate<"Subtarget->hasV6T2Ops()">,
                                  AssemblerPredicate<"HasV6T2Ops", "armv6t2">;
 def NoV6T2           : Predicate<"!Subtarget->hasV6T2Ops()">;
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h
index abedc6f..03bea35 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -165,6 +165,9 @@ protected:
   bool HasVFPv4 = false;
   bool HasFPARMv8 = false;
   bool HasNEON = false;
+  bool HasFPRegs = false;
+  bool HasFPRegs16 = false;
+  bool HasFPRegs64 = false;
 
   /// Versions of the VFP flags restricted to single precision, or to
   /// 16 d-registers, or both.
@@ -566,6 +569,9 @@ public:
   bool hasV8_5aOps() const { return HasV8_5aOps; }
   bool hasV8MBaselineOps() const { return HasV8MBaselineOps; }
   bool hasV8MMainlineOps() const { return HasV8MMainlineOps; }
+  bool hasFPRegs() const { return HasFPRegs; }
+  bool hasFPRegs16() const { return HasFPRegs16; }
+  bool hasFPRegs64() const { return HasFPRegs64; }
 
   /// @{
   /// These functions are obsolete, please consider adding subtarget features
diff --git a/llvm/test/MC/ARM/fullfp16-neg.s b/llvm/test/MC/ARM/fullfp16-neg.s
index e7fba2a..7069cbc 100644
--- a/llvm/test/MC/ARM/fullfp16-neg.s
+++ b/llvm/test/MC/ARM/fullfp16-neg.s
@@ -165,25 +165,25 @@
   vldr.16 s2, [pc, #510]
   vldr.16 s3, [pc, #-510]
   vldr.16 s4, [r4, #-18]
-@ CHECK: instruction requires: full half-float
-@ CHECK: instruction requires: full half-float
-@ CHECK: instruction requires: full half-float
-@ CHECK: instruction requires: full half-float
+@ CHECK: instruction requires: 16-bit fp registers
+@ CHECK: instruction requires: 16-bit fp registers
+@ CHECK: instruction requires: 16-bit fp registers
+@ CHECK: instruction requires: 16-bit fp registers
 
 
   vstr.16 s1, [pc, #6]
   vstr.16 s2, [pc, #510]
   vstr.16 s3, [pc, #-510]
   vstr.16 s4, [r4, #-18]
-@ CHECK: instruction requires: full half-float
-@ CHECK: instruction requires: full half-float
-@ CHECK: instruction requires: full half-float
-@ CHECK: instruction requires: full half-float
+@ CHECK: instruction requires: 16-bit fp registers
+@ CHECK: instruction requires: 16-bit fp registers
+@ CHECK: instruction requires: 16-bit fp registers
+@ CHECK: instruction requires: 16-bit fp registers
 
   vmov.f16 s0, #1.0
 @ CHECK: instruction requires: full half-float
 
   vmov.f16 s1, r2
   vmov.f16 r3, s4
-@ CHECK: instruction requires: full half-float
-@ CHECK: instruction requires: full half-float
+@ CHECK: instruction requires: 16-bit fp registers
+@ CHECK: instruction requires: 16-bit fp registers
diff --git a/llvm/test/MC/ARM/mve-fp-registers.s b/llvm/test/MC/ARM/mve-fp-registers.s
new file mode 100644
index 0000000..aff7649
--- /dev/null
+++ b/llvm/test/MC/ARM/mve-fp-registers.s
@@ -0,0 +1,133 @@
+// Some simple operations on S, D and Q registers (loads, stores and moves) are
+// also avaliable in MVE, even in the integer-only version. Some of these
+// instructions (operating on D or Q registers, or FP16 values) are only
+// available for certain targets.
+
+// Note that it's not always obvious which instructions are available, for
+// example several instructions operating on D registers are available for
+// single-precision only FPUs.
+
+// All of these instructions are rejected if no VFP or MVE features are
+// present.
+// RUN: not llvm-mc -triple=thumbv8.1m.main -show-encoding 2>%t < %s
+// RUN: FileCheck %s < %t --check-prefix=NOFP16 --check-prefix=NOFP32 --check-prefix=NOFP64
+
+// VFP and NEON implementations by default have FP32 and FP64, but not FP16.
+// The VFPv3 FP16 extension just added conversion instructions, which we don't
+// care about here.
+// RUN: not llvm-mc -triple=thumbv8.1m.main -show-encoding -mattr=+vfp2 2>%t < %s | \
+// RUN:     FileCheck %s --check-prefix=CHECK --check-prefix=FP32 --check-prefix=FP64
+// RUN: FileCheck %s < %t --check-prefix=NOFP16
+// RUN: not llvm-mc -triple=thumbv8.1m.main -show-encoding -mattr=+fp-armv8,+neon 2>%t < %s | \
+// RUN:     FileCheck %s --check-prefix=CHECK --check-prefix=FP32 --check-prefix=FP64
+// RUN: FileCheck %s < %t --check-prefix=NOFP16
+
+// The v8.2A FP16 extension added loads, stores and moves for FP16.
+// RUN: llvm-mc -triple=thumbv8.1m.main -show-encoding -mattr=+fp-armv8,+fullfp16 < %s | \
+// RUN:     FileCheck %s --check-prefix=CHECK --check-prefix=FP16 --check-prefix=FP32 --check-prefix=FP64
+
+// M-profile FPUs (e.g. Cortex-M4/M7/M33) do not have FP16 instructions, and
+// the FP64 instructions are optional. They are also limited to 16 D registers,
+// but we don't test that here.
+// RUN: not llvm-mc -triple=thumbv8.1m.main -show-encoding -mattr=+vfp4d16sp 2>%t < %s | \
+// RUN:     FileCheck %s --check-prefix=CHECK --check-prefix=FP32
+// RUN: FileCheck %s < %t --check-prefix=NOFP16 --check-prefix=NOFP64
+// RUN: not llvm-mc -triple=thumbv8.1m.main -show-encoding -mattr=+vfp4,-d32 2>%t < %s | \
+// RUN:     FileCheck %s --check-prefix=CHECK --check-prefix=FP32 --check-prefix=FP64
+// RUN: FileCheck %s < %t --check-prefix=NOFP16
+
+vldmia  r0, {d0}
+# FP32: vldmia  r0, {d0}               @ encoding: [0x90,0xec,0x02,0x0b]
+# NOFP32: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: fp registers
+
+vstmia  r0, {d0}
+# FP32: vstmia  r0, {d0}                @ encoding: [0x80,0xec,0x02,0x0b]
+# NOFP32: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: fp registers
+
+vldmia  r0, {s0}
+# FP32: vldmia  r0, {s0}                @ encoding: [0x90,0xec,0x01,0x0a]
+# NOFP32: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: fp registers
+
+vstmia  r0, {s0}
+# FP32: vstmia  r0, {s0}                @ encoding: [0x80,0xec,0x01,0x0a]
+# NOFP32: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: fp registers
+
+fldmdbx r0!, {d0}
+# FP32: fldmdbx r0!, {d0}               @ encoding: [0x30,0xed,0x03,0x0b]
+# NOFP32: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: fp registers
+
+fstmiax r0, {d0}
+# FP32: fstmiax r0, {d0}                @ encoding: [0x80,0xec,0x03,0x0b]
+# NOFP32: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: fp registers
+
+vldr.16 s0, [r0]
+# FP16: vldr.16 s0, [r0]                @ encoding: [0x90,0xed,0x00,0x09]
+# NOFP16: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: 16-bit fp registers
+
+vldr s0, [r0]
+# FP32: vldr    s0, [r0]                @ encoding: [0x90,0xed,0x00,0x0a]
+# NOFP32: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: fp registers
+
+vldr d0, [r0]
+# FP32: vldr    d0, [r0]                @ encoding: [0x90,0xed,0x00,0x0b]
+# NOFP32: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: fp registers
+
+vstr.16 s0, [r0]
+# FP16: vstr.16 s0, [r0]                @ encoding: [0x80,0xed,0x00,0x09]
+# NOFP16: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: 16-bit fp registers
+
+vstr s0, [r0]
+# FP32: vstr    s0, [r0]                @ encoding: [0x80,0xed,0x00,0x0a]
+# NOFP32: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: fp registers
+
+vstr d0, [r0]
+# FP32: vstr    d0, [r0]                @ encoding: [0x80,0xed,0x00,0x0b]
+# NOFP32: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: fp registers
+
+vmov.f16 r0, s0
+# FP16: vmov.f16        r0, s0          @ encoding: [0x10,0xee,0x10,0x09]
+# NOFP16: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: 16-bit fp registers
+
+vmov.f16 s0, r0
+# FP16: vmov.f16        s0, r0          @ encoding: [0x00,0xee,0x10,0x09]
+# NOFP16: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: 16-bit fp registers
+
+vmov s0, r0
+# FP32: vmov    s0, r0                  @ encoding: [0x00,0xee,0x10,0x0a]
+# NOFP32: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: fp registers
+
+vmov r0, s0
+# FP32: vmov    r0, s0                  @ encoding: [0x10,0xee,0x10,0x0a]
+# NOFP32: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: fp registers
+
+vmov r0, r1, d0
+# FP32: vmov    r0, r1, d0              @ encoding: [0x51,0xec,0x10,0x0b]
+# NOFP32: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: fp registers
+
+vmov d0, r0, r1
+# FP32: vmov    d0, r0, r1              @ encoding: [0x41,0xec,0x10,0x0b]
+# NOFP32: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: fp registers
+
+vmov r0, r1, s0, s1
+# FP32: vmov    r0, r1, s0, s1          @ encoding: [0x51,0xec,0x10,0x0a]
+# NOFP32: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: fp registers
+
+vmov s0, s1, r0, r1
+# FP32: vmov    s0, s1, r0, r1          @ encoding: [0x41,0xec,0x10,0x0a]
+# NOFP32: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: fp registers
+
+vmov.f32 s0, s1
+# FP32: vmov.f32        s0, s1          @ encoding: [0xb0,0xee,0x60,0x0a]
+# NOFP32: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: fp registers
+
+vmov.f64 d0, d1
+# FP64: vmov.f64        d0, d1          @ encoding: [0xb0,0xee,0x41,0x0b]
+# NOFP64: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: 64-bit fp registers
+
+vmov.32 r0, d1[0]
+# FP32: vmov.32 r0, d1[0]               @ encoding: [0x11,0xee,0x10,0x0b]
+# NOFP32: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires: fp registers
+
+vmrs apsr_nzcv, fpscr
+# FP32: vmrs    APSR_nzcv, fpscr        @ encoding: [0xf1,0xee,0x10,0xfa]
+# NOFP32: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: fp registers
diff --git a/llvm/test/MC/ARM/single-precision-fp.s b/llvm/test/MC/ARM/single-precision-fp.s
index 9de4b10..1b541f8 100644
--- a/llvm/test/MC/ARM/single-precision-fp.s
+++ b/llvm/test/MC/ARM/single-precision-fp.s
@@ -1,4 +1,4 @@
-@ RUN: not llvm-mc < %s -triple thumbv8-unknown-unknown -show-encoding -mattr=-fp64,-neon 2> %t > %t2
+@ RUN: not llvm-mc < %s -triple thumbv8-unknown-unknown -show-encoding -mattr=-fp64,-fpregs64,-neon 2> %t > %t2
 @ RUN:     FileCheck %s < %t --check-prefix=CHECK-ERRORS
 @ RUN:     FileCheck %s < %t2
 
@@ -72,7 +72,7 @@
         @ FIXME: overlapping aliases and a probable TableGen indeterminacy mean
         @ that the actual reason can vary by platform.
         vmov.f64 d11, d10
-@ CHECK-ERRORS: instruction requires: NEON
+@ CHECK-ERRORS: instruction requires: 64-bit fp registers
 @ CHECK-ERRORS-NEXT: vmov.f64 d11, d10
 
         vcvt.f64.s32 d9, s8
diff --git a/llvm/test/MC/ARM/vmrs_vmsr.s b/llvm/test/MC/ARM/vmrs_vmsr.s
index edca917..9193ae1 100644
--- a/llvm/test/MC/ARM/vmrs_vmsr.s
+++ b/llvm/test/MC/ARM/vmrs_vmsr.s
@@ -103,10 +103,10 @@
 // ERROR-V8M: invalid operand for instruction
 // ERROR-V8M: invalid operand for instruction
 
-// ERROR-NOVFP: instruction requires: VFP2
-// ERROR-NOVFP: instruction requires: VFP2
-// ERROR-NOVFP: instruction requires: VFP2
-// ERROR-NOVFP: instruction requires: VFP2
+// ERROR-NOVFP: instruction requires: fp registers
+// ERROR-NOVFP: instruction requires: fp registers
+// ERROR-NOVFP: instruction requires: fp registers
+// ERROR-NOVFP: instruction requires: fp registers
 // ERROR-NOVFP: instruction requires: VFP2
 // ERROR-NOVFP: instruction requires: VFP2
 // ERROR-NOVFP: instruction requires: VFP2
@@ -172,9 +172,9 @@
 // ERROR-V8M: operand must be a register in range [r0, r14]
 
 // ERROR-NOVFP: invalid instruction
+// ERROR-NOVFP: instruction requires: fp registers
 // ERROR-NOVFP: instruction requires: VFP2
 // ERROR-NOVFP: instruction requires: VFP2
-// ERROR-NOVFP: instruction requires: VFP2
-// ERROR-NOVFP: instruction requires: VFP2
+// ERROR-NOVFP: instruction requires: fp registers
 // ERROR-NOVFP: invalid instruction
 // ERROR-NOVFP: invalid instruction
-- 
2.7.4