From 963da5b1191f0ec084247252666d3f669fdaf71c Mon Sep 17 00:00:00 2001
From: Sam Parker <sam.parker@arm.com>
Date: Fri, 29 Sep 2017 13:11:33 +0000
Subject: [PATCH] [ARM] v8.3-a complex number support

New instructions are added to AArch32 and AArch64 to aid
floating-point multiplication and addition of complex numbers, where
the complex numbers are packed in a vector register as a pair of
elements. The Imaginary part of the number is placed in the more
significant element, and the Real part of the number is placed in the
less significant element.

This patch adds assembler for the ARM target.

Differential Revision: https://reviews.llvm.org/D36789

llvm-svn: 314511
---
 llvm/lib/Target/ARM/ARMInstrFormats.td             |  75 +++++++++
 llvm/lib/Target/ARM/ARMInstrNEON.td                | 133 +++++++++++++++
 llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp     |  42 ++++-
 .../Target/ARM/Disassembler/ARMDisassembler.cpp    |  37 +++++
 llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp |   9 ++
 llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h   |   3 +
 llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h     |   1 +
 llvm/test/MC/ARM/neon-complex.s                    | 180 +++++++++++++++++++++
 llvm/test/MC/Disassembler/ARM/neon-complex-arm.txt |  66 ++++++++
 .../MC/Disassembler/ARM/neon-complex-thumb.txt     |  66 ++++++++
 10 files changed, 610 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/MC/ARM/neon-complex.s
 create mode 100644 llvm/test/MC/Disassembler/ARM/neon-complex-arm.txt
 create mode 100644 llvm/test/MC/Disassembler/ARM/neon-complex-thumb.txt
diff --git a/llvm/lib/Target/ARM/ARMInstrFormats.td b/llvm/lib/Target/ARM/ARMInstrFormats.td
index 1bbe7f0..8bb6483 100644
--- a/llvm/lib/Target/ARM/ARMInstrFormats.td
+++ b/llvm/lib/Target/ARM/ARMInstrFormats.td
@@ -69,6 +69,7 @@ def NVExtFrm      : Format<39>;
 def NVMulSLFrm    : Format<40>;
 def NVTBLFrm      : Format<41>;
 def DPSoRegImmFrm  : Format<42>;
+def N3RegCplxFrm  : Format<43>;
 
 // Misc flags.
 
@@ -2513,6 +2514,80 @@ multiclass NEONDTAnyInstAlias<string opc, string asm, dag Result, bit EmitPriori
 class NEONDataTypeAsmPseudoInst<string opc, string dt, string asm, dag iops> :
   AsmPseudoInst<!strconcat(opc, dt, "\t", asm), iops>, Requires<[HasNEON]>;
 
+// Extension of NEON 3-vector data processing instructions in coprocessor 8
+// encoding space, introduced in ARMv8.3-A.
+class N3VCP8<bits<2> op24_23, bits<2> op21_20, bit op6, bit op4,
+             dag oops, dag iops, InstrItinClass itin,
+             string opc, string dt, string asm, string cstr, list<dag> pattern>
+  : NeonInp<oops, iops, AddrModeNone, IndexModeNone, N3RegCplxFrm, itin, opc,
+            dt, asm, cstr, pattern> {
+  bits<5> Vd;
+  bits<5> Vn;
+  bits<5> Vm;
+
+  let DecoderNamespace = "VFPV8";
+  // These have the same encodings in ARM and Thumb2
+  let PostEncoderMethod = "";
+
+  let Inst{31-25} = 0b1111110;
+  let Inst{24-23} = op24_23;
+  let Inst{22}    = Vd{4};
+  let Inst{21-20} = op21_20;
+  let Inst{19-16} = Vn{3-0};
+  let Inst{15-12} = Vd{3-0};
+  let Inst{11-8}  = 0b1000;
+  let Inst{7}     = Vn{4};
+  let Inst{6}     = op6;
+  let Inst{5}     = Vm{4};
+  let Inst{4}     = op4;
+  let Inst{3-0}   = Vm{3-0};
+}
+
+// Extension of NEON 2-vector-and-scalar data processing instructions in
+// coprocessor 8 encoding space, introduced in ARMv8.3-A.
+class N3VLaneCP8<bit op23, bits<2> op21_20, bit op6, bit op4,
+             dag oops, dag iops, InstrItinClass itin,
+             string opc, string dt, string asm, string cstr, list<dag> pattern>
+  : NeonInp<oops, iops, AddrModeNone, IndexModeNone, N3RegCplxFrm, itin, opc,
+            dt, asm, cstr, pattern> {
+  bits<5> Vd;
+  bits<5> Vn;
+  bits<5> Vm;
+
+  let DecoderNamespace = "VFPV8";
+  // These have the same encodings in ARM and Thumb2
+  let PostEncoderMethod = "";
+
+  let Inst{31-24} = 0b11111110;
+  let Inst{23}    = op23;
+  let Inst{22}    = Vd{4};
+  let Inst{21-20} = op21_20;
+  let Inst{19-16} = Vn{3-0};
+  let Inst{15-12} = Vd{3-0};
+  let Inst{11-8}  = 0b1000;
+  let Inst{7}     = Vn{4};
+  let Inst{6}     = op6;
+  // Bit 5 set by sub-classes
+  let Inst{4}     = op4;
+  let Inst{3-0}   = Vm{3-0};
+}
+
+// Operand types for complex instructions
+class ComplexRotationOperand<int Angle, int Remainder, string Type>
+  : AsmOperandClass {
+  let PredicateMethod = "isComplexRotation<" # Angle # ", " # Remainder # ">";
+  let DiagnosticType = "InvalidComplexRotation" # Type;
+  let Name = "ComplexRotation" # Type;
+}
+def complexrotateop : Operand<i32> {
+  let ParserMatchClass = ComplexRotationOperand<90, 0, "Even">;
+  let PrintMethod = "printComplexRotationOp<90, 0>";
+}
+def complexrotateopodd : Operand<i32> {
+  let ParserMatchClass = ComplexRotationOperand<180, 90, "Odd">;
+  let PrintMethod = "printComplexRotationOp<180, 90>";
+}
+
 // Data type suffix token aliases. Implements Table A7-3 in the ARM ARM.
 def : TokenAlias<".s8", ".i8">;
 def : TokenAlias<".u8", ".i8">;
diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td
index 495d44f..cd67dde 100644
--- a/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -108,6 +108,7 @@ def nImmSplatI64 : Operand<i32> {
 def VectorIndex8Operand  : AsmOperandClass { let Name = "VectorIndex8"; }
 def VectorIndex16Operand : AsmOperandClass { let Name = "VectorIndex16"; }
 def VectorIndex32Operand : AsmOperandClass { let Name = "VectorIndex32"; }
+def VectorIndex64Operand : AsmOperandClass { let Name = "VectorIndex64"; }
 def VectorIndex8 : Operand<i32>, ImmLeaf<i32, [{
   return ((uint64_t)Imm) < 8;
 }]> {
@@ -129,6 +130,13 @@ def VectorIndex32 : Operand<i32>, ImmLeaf<i32, [{
   let PrintMethod = "printVectorIndex";
   let MIOperandInfo = (ops i32imm);
 }
+def VectorIndex64 : Operand<i32>, ImmLeaf<i32, [{
+  return ((uint64_t)Imm) < 1;
+}]> {
+  let ParserMatchClass = VectorIndex64Operand;
+  let PrintMethod = "printVectorIndex";
+  let MIOperandInfo = (ops i32imm);
+}
 
 // Register list of one D register.
 def VecListOneDAsmOperand : AsmOperandClass {
@@ -4724,6 +4732,131 @@ def VSDOTQI : DOTI<"vsdot", "s8", 0b1, 0b0, QPR>;
 
 }  // HasDotProd
 
+// ARMv8.3 complex operations
+class BaseN3VCP8ComplexTied<bit op21, bit op4, bit s, bit q,
+                            InstrItinClass itin, dag oops, dag iops,
+                            string opc, string dt, list<dag> pattern>
+  : N3VCP8<{?,?}, {op21,s}, q, op4, oops,
+           iops, itin, opc, dt, "$Vd, $Vn, $Vm, $rot", "$src1 = $Vd", pattern>{
+  bits<2> rot;
+  let Inst{24-23} = rot;
+}
+
+class BaseN3VCP8ComplexOdd<bit op23, bit op21, bit op4, bit s, bit q,
+                           InstrItinClass itin, dag oops, dag iops, string opc,
+                            string dt, list<dag> pattern>
+  : N3VCP8<{?,op23}, {op21,s}, q, op4, oops,
+           iops, itin, opc, dt, "$Vd, $Vn, $Vm, $rot", "", pattern> {
+  bits<1> rot;
+  let Inst{24} = rot;
+}
+
+class BaseN3VCP8ComplexTiedLane32<bit op4, bit s, bit q, InstrItinClass itin,
+                                  dag oops, dag iops, string opc, string dt,
+                                  list<dag> pattern>
+  : N3VLaneCP8<s, {?,?}, q, op4, oops, iops, itin, opc, dt,
+               "$Vd, $Vn, $Vm$lane, $rot", "$src1 = $Vd", pattern> {
+  bits<2> rot;
+  bit lane;
+
+  let Inst{21-20} = rot;
+  let Inst{5} = lane;
+}
+
+class BaseN3VCP8ComplexTiedLane64<bit op4, bit s, bit q, InstrItinClass itin,
+                            dag oops, dag iops, string opc, string dt,
+                            list<dag> pattern>
+  : N3VLaneCP8<s, {?,?}, q, op4, oops, iops, itin, opc, dt,
+               "$Vd, $Vn, $Vm$lane, $rot", "$src1 = $Vd", pattern> {
+  bits<2> rot;
+  bit lane;
+
+  let Inst{21-20} = rot;
+  let Inst{5} = Vm{4};
+  // This is needed because the lane operand does not have any bits in the
+  // encoding (it only has one possible value), so we need to manually set it
+  // to it's default value.
+  let DecoderMethod = "DecodeNEONComplexLane64Instruction";
+}
+
+multiclass N3VCP8ComplexTied<bit op21, bit op4,
+                       string OpcodeStr, SDPatternOperator Op> {
+  let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in {
+  def v4f16 : BaseN3VCP8ComplexTied<op21, op4, 0, 0, IIC_VMACD, (outs DPR:$Vd),
+              (ins DPR:$src1, DPR:$Vn, DPR:$Vm, complexrotateop:$rot),
+              OpcodeStr, "f16", []>;
+  def v8f16 : BaseN3VCP8ComplexTied<op21, op4, 0, 1, IIC_VMACQ, (outs QPR:$Vd),
+              (ins QPR:$src1, QPR:$Vn, QPR:$Vm, complexrotateop:$rot),
+              OpcodeStr, "f16", []>;
+  }
+  let Predicates = [HasNEON,HasV8_3a] in {
+  def v2f32 : BaseN3VCP8ComplexTied<op21, op4, 1, 0, IIC_VMACD, (outs DPR:$Vd),
+              (ins DPR:$src1, DPR:$Vn, DPR:$Vm, complexrotateop:$rot),
+              OpcodeStr, "f32", []>;
+  def v4f32 : BaseN3VCP8ComplexTied<op21, op4, 1, 1, IIC_VMACQ, (outs QPR:$Vd),
+              (ins QPR:$src1, QPR:$Vn, QPR:$Vm, complexrotateop:$rot),
+              OpcodeStr, "f32", []>;
+  }
+}
+
+multiclass N3VCP8ComplexOdd<bit op23, bit op21, bit op4,
+                       string OpcodeStr, SDPatternOperator Op> {
+  let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in {
+  def v4f16 : BaseN3VCP8ComplexOdd<op23, op21, op4, 0, 0, IIC_VMACD,
+              (outs DPR:$Vd),
+              (ins DPR:$Vn, DPR:$Vm, complexrotateopodd:$rot),
+              OpcodeStr, "f16", []>;
+  def v8f16 : BaseN3VCP8ComplexOdd<op23, op21, op4, 0, 1, IIC_VMACQ,
+              (outs QPR:$Vd),
+              (ins QPR:$Vn, QPR:$Vm, complexrotateopodd:$rot),
+              OpcodeStr, "f16", []>;
+  }
+  let Predicates = [HasNEON,HasV8_3a] in {
+  def v2f32 : BaseN3VCP8ComplexOdd<op23, op21, op4, 1, 0, IIC_VMACD,
+              (outs DPR:$Vd),
+              (ins DPR:$Vn, DPR:$Vm, complexrotateopodd:$rot),
+              OpcodeStr, "f32", []>;
+  def v4f32 : BaseN3VCP8ComplexOdd<op23, op21, op4, 1, 1, IIC_VMACQ,
+              (outs QPR:$Vd),
+              (ins QPR:$Vn, QPR:$Vm, complexrotateopodd:$rot),
+              OpcodeStr, "f32", []>;
+  }
+}
+
+// These instructions index by pairs of lanes, so the VectorIndexes are twice
+// as wide as the data types.
+multiclass N3VCP8ComplexTiedLane<bit op4, string OpcodeStr,
+                                 SDPatternOperator Op> {
+  let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in {
+  def v4f16_indexed : BaseN3VCP8ComplexTiedLane32<op4, 0, 0, IIC_VMACD,
+                      (outs DPR:$Vd),
+                      (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
+                      VectorIndex32:$lane, complexrotateop:$rot),
+                      OpcodeStr, "f16", []>;
+  def v8f16_indexed : BaseN3VCP8ComplexTiedLane32<op4, 0, 1, IIC_VMACQ,
+                      (outs QPR:$Vd),
+                      (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm,
+                      VectorIndex32:$lane, complexrotateop:$rot),
+                      OpcodeStr, "f16", []>;
+  }
+  let Predicates = [HasNEON,HasV8_3a] in {
+  def v2f32_indexed : BaseN3VCP8ComplexTiedLane64<op4, 1, 0, IIC_VMACD,
+                      (outs DPR:$Vd),
+                      (ins DPR:$src1, DPR:$Vn, DPR:$Vm, VectorIndex64:$lane,
+                      complexrotateop:$rot),
+                      OpcodeStr, "f32", []>;
+  def v4f32_indexed : BaseN3VCP8ComplexTiedLane64<op4, 1, 1, IIC_VMACQ,
+                      (outs QPR:$Vd),
+                      (ins QPR:$src1, QPR:$Vn, DPR:$Vm, VectorIndex64:$lane,
+                      complexrotateop:$rot),
+                      OpcodeStr, "f32", []>;
+  }
+}
+
+defm VCMLA : N3VCP8ComplexTied<1, 0, "vcmla", null_frag>;
+defm VCADD : N3VCP8ComplexOdd<1, 0, 0, "vcadd", null_frag>;
+defm VCMLA : N3VCP8ComplexTiedLane<0, "vcmla", null_frag>;
+
 // Vector Subtract Operations.
 
 //   VSUB     : Vector Subtract (integer and floating-point)
diff --git a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 287ed20..b84a4e8 100644
--- a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -1756,6 +1756,10 @@ public:
     if (Kind != k_VectorIndex) return false;
     return VectorIndex.Val < 2;
   }
+  bool isVectorIndex64() const {
+    if (Kind != k_VectorIndex) return false;
+    return VectorIndex.Val < 1;
+  }
 
   bool isNEONi8splat() const {
     if (!isImm()) return false;
@@ -1885,6 +1889,17 @@ public:
     return true;
   }
 
+  template<int64_t Angle, int64_t Remainder>
+  bool isComplexRotation() const {
+    if (!isImm()) return false;
+
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    uint64_t Value = CE->getValue();
+
+    return (Value % Angle == Remainder && Value <= 270);
+  }
+
   void addExpr(MCInst &Inst, const MCExpr *Expr) const {
     // Add as immediates when possible.  Null MCExpr = 0.
     if (!Expr)
@@ -2628,6 +2643,11 @@ public:
     Inst.addOperand(MCOperand::createImm(getVectorIndex()));
   }
 
+  void addVectorIndex64Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::createImm(getVectorIndex()));
+  }
+
   void addNEONi8splatOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     // The immediate encodes the type of constant as well as the value.
@@ -2740,6 +2760,18 @@ public:
     Inst.addOperand(MCOperand::createImm(Imm | 0x1e00));
   }
 
+  void addComplexRotationEvenOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    Inst.addOperand(MCOperand::createImm(CE->getValue() / 90));
+  }
+
+  void addComplexRotationOddOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    Inst.addOperand(MCOperand::createImm((CE->getValue() - 90) / 180));
+  }
+
   void print(raw_ostream &OS) const override;
 
   static std::unique_ptr<ARMOperand> CreateITMask(unsigned Mask, SMLoc S) {
@@ -5432,7 +5464,8 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
       Mnemonic == "vrintp" || Mnemonic == "vrintm" || Mnemonic == "hvc" ||
       Mnemonic.startswith("vsel") || Mnemonic == "vins" || Mnemonic == "vmovx" ||
       Mnemonic == "bxns"  || Mnemonic == "blxns" ||
-      Mnemonic == "vudot" || Mnemonic == "vsdot")
+      Mnemonic == "vudot" || Mnemonic == "vsdot" ||
+      Mnemonic == "vcmla" || Mnemonic == "vcadd")
     return Mnemonic;
 
   // First, split out any predication code. Ignore mnemonics we know aren't
@@ -5521,7 +5554,8 @@ void ARMAsmParser::getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst,
       Mnemonic.startswith("sha1") || Mnemonic.startswith("sha256") ||
       (FullInst.startswith("vmull") && FullInst.endswith(".p64")) ||
       Mnemonic == "vmovx" || Mnemonic == "vins" ||
-      Mnemonic == "vudot" || Mnemonic == "vsdot") {
+      Mnemonic == "vudot" || Mnemonic == "vsdot" ||
+      Mnemonic == "vcmla" || Mnemonic == "vcadd") {
     // These mnemonics are never predicable
     CanAcceptPredicationCode = false;
   } else if (!isThumb()) {
@@ -9155,6 +9189,10 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
         return Error(ErrorLoc, "alignment must be 64, 128, 256 or omitted");
     }
   }
+  case Match_InvalidComplexRotationEven:
+      return Error(IDLoc, "complex rotation must be 0, 90, 180 or 270");
+  case Match_InvalidComplexRotationOdd:
+      return Error(IDLoc, "complex rotation must be 90 or 270");
   }
 
   llvm_unreachable("Implement any new match types added!");
diff --git a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index e385498..737450d 100644
--- a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -322,6 +322,10 @@ static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn,
                                 uint64_t Address, const void *Decoder);
 static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn,
                                 uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeNEONComplexLane64Instruction(MCInst &Inst,
+                                                       unsigned Val,
+                                                       uint64_t Address,
+                                                       const void *Decoder);
 
 static DecodeStatus DecodeThumbAddSpecialReg(MCInst &Inst, uint16_t Insn,
                                uint64_t Address, const void *Decoder);
@@ -5215,6 +5219,39 @@ static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn,
   return S;
 }
 
+static DecodeStatus DecodeNEONComplexLane64Instruction(MCInst &Inst,
+                                                       unsigned Insn,
+                                                       uint64_t Address,
+                                                       const void *Decoder) {
+  unsigned Vd = (fieldFromInstruction(Insn, 12, 4) << 0);
+  Vd |= (fieldFromInstruction(Insn, 22, 1) << 4);
+  unsigned Vn = (fieldFromInstruction(Insn, 16, 4) << 0);
+  Vn |= (fieldFromInstruction(Insn, 7, 1) << 4);
+  unsigned Vm = (fieldFromInstruction(Insn, 0, 4) << 0);
+  Vm |= (fieldFromInstruction(Insn, 5, 1) << 4);
+  unsigned q = (fieldFromInstruction(Insn, 6, 1) << 0);
+  unsigned rotate = (fieldFromInstruction(Insn, 20, 2) << 0);
+
+  DecodeStatus S = MCDisassembler::Success;
+
+  auto DestRegDecoder = q ? DecodeQPRRegisterClass : DecodeDPRRegisterClass;
+
+  if (!Check(S, DestRegDecoder(Inst, Vd, Address, Decoder)))
+    return MCDisassembler::Fail;
+  if (!Check(S, DestRegDecoder(Inst, Vd, Address, Decoder)))
+    return MCDisassembler::Fail;
+  if (!Check(S, DestRegDecoder(Inst, Vn, Address, Decoder)))
+    return MCDisassembler::Fail;
+  if (!Check(S, DecodeDPRRegisterClass(Inst, Vm, Address, Decoder)))
+    return MCDisassembler::Fail;
+  // The lane index does not have any bits in the encoding, because it can only
+  // be 0.
+  Inst.addOperand(MCOperand::createImm(0));
+  Inst.addOperand(MCOperand::createImm(rotate));
+
+  return S;
+}
+
 static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val,
                                 uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
diff --git a/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index be6815a..4fc67a4 100644
--- a/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -1535,3 +1535,12 @@ void ARMInstPrinter::printVectorListFourSpaced(const MCInst *MI, unsigned OpNum,
   printRegName(O, MI->getOperand(OpNum).getReg() + 6);
   O << "}";
 }
+
+template<int64_t Angle, int64_t Remainder>
+void ARMInstPrinter::printComplexRotationOp(const MCInst *MI, unsigned OpNo,
+                                            const MCSubtargetInfo &STI,
+                                            raw_ostream &O) {
+  unsigned Val = MI->getOperand(OpNo).getImm();
+  O << "#" << (Val * Angle) + Remainder;
+}
+
diff --git a/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
index 86873a3..7dc3112 100644
--- a/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
+++ b/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
@@ -231,6 +231,9 @@ public:
                                   const MCSubtargetInfo &STI, raw_ostream &O);
   void printVectorListFourSpaced(const MCInst *MI, unsigned OpNum,
                                  const MCSubtargetInfo &STI, raw_ostream &O);
+  template<int64_t Angle, int64_t Remainder>
+  void printComplexRotationOp(const MCInst *MI, unsigned OpNum,
+                              const MCSubtargetInfo &STI, raw_ostream &O);
 };
 
 } // end namespace llvm
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
index 31f081b..17da82b 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
@@ -343,6 +343,7 @@ namespace ARMII {
     NVExtFrm      = 39 << FormShift,
     NVMulSLFrm    = 40 << FormShift,
     NVTBLFrm      = 41 << FormShift,
+    N3RegCplxFrm  = 43 << FormShift,
 
     //===------------------------------------------------------------------===//
     // Misc flags.
diff --git a/llvm/test/MC/ARM/neon-complex.s b/llvm/test/MC/ARM/neon-complex.s
new file mode 100644
index 0000000..54176d8
--- /dev/null
+++ b/llvm/test/MC/ARM/neon-complex.s
@@ -0,0 +1,180 @@
+// RUN: not llvm-mc -triple thumb-none-linux-gnu -mattr=+v8.3a,+neon,+fullfp16 -show-encoding < %s 2>%t | FileCheck %s --check-prefix=THUMB --check-prefix=FP16-THUMB
+// RUN: FileCheck --check-prefix=STDERR <%t %s
+// RUN: not llvm-mc -triple arm-none-linux-gnu -mattr=+v8.3a,+neon,+fullfp16 -show-encoding < %s 2>%t | FileCheck %s --check-prefix=ARM --check-prefix=FP16-ARM
+// RUN: FileCheck --check-prefix=STDERR <%t %s
+
+// RUN: not llvm-mc -triple thumb-none-linux-gnu -mattr=+v8.3a,+neon,-fullfp16 -show-encoding < %s 2>%t | FileCheck %s --check-prefix=THUMB
+// RUN: FileCheck --check-prefix=STDERR --check-prefix=NO-FP16-STDERR <%t %s
+// RUN: not llvm-mc -triple arm-none-linux-gnu -mattr=+v8.3a,+neon,-fullfp16 -show-encoding < %s 2>%t | FileCheck %s --check-prefix=ARM
+// RUN: FileCheck --check-prefix=STDERR --check-prefix=NO-FP16-STDERR <%t %s
+
+// RUN: not llvm-mc -triple thumb-none-linux-gnu -mattr=+v8.3a,-neon,+fullfp16 -show-encoding < %s 2>%t
+// RUN: FileCheck --check-prefix=STDERR --check-prefix=NO-NEON-STDERR <%t %s
+// RUN: not llvm-mc -triple arm-none-linux-gnu -mattr=+v8.3a,-neon,+fullfp16 -show-encoding < %s 2>%t
+// RUN: FileCheck --check-prefix=STDERR --check-prefix=NO-NEON-STDERR <%t %s
+
+// RUN: not llvm-mc -triple thumb-none-linux-gnu -mattr=+v8.2a,+neon,+fullfp16 -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=V82A
+// RUN: not llvm-mc -triple arm-none-linux-gnu -mattr=+v8.2a,+neon,+fullfp16 -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=V82A
+
+/* ==== VCMLA vector ==== */
+
+// Valid types
+  vcmla.f16 d0, d1, d2, #0
+// FP16-ARM: vcmla.f16       d0, d1, d2, #0  @ encoding: [0x02,0x08,0x21,0xfc]
+// FP16-THUMB: vcmla.f16       d0, d1, d2, #0  @ encoding: [0x21,0xfc,0x02,0x08]
+// NO-FP16-STDERR: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: full half-float
+// V82A: :[[@LINE-4]]:{{[0-9]*}}: error: instruction requires: armv8.3a
+// NO-NEON_STDERR: :[[@LINE-5]]:{{[0-9]*}}: error: instruction requires: NEON
+  vcmla.f16 q0, q1, q2, #0
+// FP16-ARM: vcmla.f16       q0, q1, q2, #0  @ encoding: [0x44,0x08,0x22,0xfc]
+// FP16-THUMB: vcmla.f16       q0, q1, q2, #0  @ encoding: [0x22,0xfc,0x44,0x08]
+// NO-FP16-STDERR: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: full half-float
+// V82A: :[[@LINE-4]]:{{[0-9]*}}: error: instruction requires: armv8.3a
+// NO-NEON_STDERR: :[[@LINE-5]]:{{[0-9]*}}: error: instruction requires: NEON
+  vcmla.f32 d0, d1, d2, #0
+// ARM: vcmla.f32       d0, d1, d2, #0  @ encoding: [0x02,0x08,0x31,0xfc]
+// THUMB: vcmla.f32       d0, d1, d2, #0  @ encoding: [0x31,0xfc,0x02,0x08]
+// V82A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a
+// NO-NEON_STDERR: :[[@LINE-4]]:{{[0-9]*}}: error: instruction requires: NEON
+  vcmla.f32 q0, q1, q2, #0
+// ARM: vcmla.f32       q0, q1, q2, #0  @ encoding: [0x44,0x08,0x32,0xfc]
+// THUMB: vcmla.f32       q0, q1, q2, #0  @ encoding: [0x32,0xfc,0x44,0x08]
+// V82A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a
+// NO-NEON_STDERR: :[[@LINE-4]]:{{[0-9]*}}: error: instruction requires: NEON
+
+// Valid rotations
+  vcmla.f32 d0, d1, d2, #90
+// ARM: vcmla.f32       d0, d1, d2, #90 @ encoding: [0x02,0x08,0xb1,0xfc]
+// THUMB: vcmla.f32       d0, d1, d2, #90 @ encoding: [0xb1,0xfc,0x02,0x08]
+// V82A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a
+// NO-NEON_STDERR: :[[@LINE-4]]:{{[0-9]*}}: error: instruction requires: NEON
+  vcmla.f32 d0, d1, d2, #180
+// ARM: vcmla.f32       d0, d1, d2, #180 @ encoding: [0x02,0x08,0x31,0xfd]
+// THUMB: vcmla.f32       d0, d1, d2, #180 @ encoding: [0x31,0xfd,0x02,0x08]
+// V82A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a
+// NO-NEON_STDERR: :[[@LINE-4]]:{{[0-9]*}}: error: instruction requires: NEON
+  vcmla.f32 d0, d1, d2, #270
+// ARM: vcmla.f32       d0, d1, d2, #270 @ encoding: [0x02,0x08,0xb1,0xfd]
+// THUMB: vcmla.f32       d0, d1, d2, #270 @ encoding: [0xb1,0xfd,0x02,0x08]
+// V82A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a
+// NO-NEON_STDERR: :[[@LINE-4]]:{{[0-9]*}}: error: instruction requires: NEON
+
+// Invalid rotations
+  vcmla.f32 d0, d1, d2, #-90
+// STDERR: :[[@LINE-1]]:{{[0-9]*}}: error: complex rotation must be 0, 90, 180 or 270
+  vcmla.f32 d0, d1, d2, #1
+// STDERR: :[[@LINE-1]]:{{[0-9]*}}: error: complex rotation must be 0, 90, 180 or 270
+  vcmla.f32 d0, d1, d2, #360
+// STDERR: :[[@LINE-1]]:{{[0-9]*}}: error: complex rotation must be 0, 90, 180 or 270
+
+/* ==== VCADD vector ==== */
+
+// Valid types
+  vcadd.f16 d0, d1, d2, #90
+// FP16-ARM: vcadd.f16       d0, d1, d2, #90 @ encoding: [0x02,0x08,0x81,0xfc]
+// FP16-THUMB: vcadd.f16       d0, d1, d2, #90 @ encoding: [0x81,0xfc,0x02,0x08]
+// NO-FP16-STDERR: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: full half-float
+// V82A: :[[@LINE-4]]:{{[0-9]*}}: error: instruction requires: armv8.3a
+// NO-NEON_STDERR: :[[@LINE-5]]:{{[0-9]*}}: error: instruction requires: NEON
+  vcadd.f16 q0, q1, q2, #90
+// FP16-ARM: vcadd.f16       q0, q1, q2, #90 @ encoding: [0x44,0x08,0x82,0xfc]
+// FP16-THUMB: vcadd.f16       q0, q1, q2, #90 @ encoding: [0x82,0xfc,0x44,0x08]
+// NO-FP16-STDERR: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: full half-float
+// V82A: :[[@LINE-4]]:{{[0-9]*}}: error: instruction requires: armv8.3a
+// NO-NEON_STDERR: :[[@LINE-5]]:{{[0-9]*}}: error: instruction requires: NEON
+  vcadd.f32 d0, d1, d2, #90
+// ARM: vcadd.f32       d0, d1, d2, #90 @ encoding: [0x02,0x08,0x91,0xfc]
+// THUMB: vcadd.f32       d0, d1, d2, #90 @ encoding: [0x91,0xfc,0x02,0x08]
+// V82A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a
+// NO-NEON_STDERR: :[[@LINE-4]]:{{[0-9]*}}: error: instruction requires: NEON
+  vcadd.f32 q0, q1, q2, #90
+// ARM: vcadd.f32       q0, q1, q2, #90 @ encoding: [0x44,0x08,0x92,0xfc]
+// THUMB: vcadd.f32       q0, q1, q2, #90 @ encoding: [0x92,0xfc,0x44,0x08]
+// V82A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a
+// NO-NEON_STDERR: :[[@LINE-4]]:{{[0-9]*}}: error: instruction requires: NEON
+
+// Valid rotations
+  vcadd.f32 d0, d1, d2, #270
+// ARM: vcadd.f32       d0, d1, d2, #270 @ encoding: [0x02,0x08,0x91,0xfd]
+// THUMB: vcadd.f32       d0, d1, d2, #270 @ encoding: [0x91,0xfd,0x02,0x08]
+// V82A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a
+// NO-NEON_STDERR: :[[@LINE-4]]:{{[0-9]*}}: error: instruction requires: NEON
+
+// Invalid rotations
+  vcadd.f32 d0, d1, d2, #0
+// STDERR: :[[@LINE-1]]:{{[0-9]*}}: error: complex rotation must be 90 or 270
+  vcadd.f32 d0, d1, d2, #180
+// STDERR: :[[@LINE-1]]:{{[0-9]*}}: error: complex rotation must be 90 or 270
+  vcadd.f32 d0, d1, d2, #-90
+// STDERR: :[[@LINE-1]]:{{[0-9]*}}: error: complex rotation must be 90 or 270
+  vcadd.f32 d0, d1, d2, #1
+// STDERR: :[[@LINE-1]]:{{[0-9]*}}: error: complex rotation must be 90 or 270
+  vcadd.f32 d0, d1, d2, #360
+// STDERR: :[[@LINE-1]]:{{[0-9]*}}: error: complex rotation must be 90 or 270
+
+
+/* ==== VCMLA indexed ==== */
+
+// Valid types
+  vcmla.f16 d0, d1, d2[0], #0
+// FP16-ARM: vcmla.f16       d0, d1, d2[0], #0 @ encoding: [0x02,0x08,0x01,0xfe]
+// FP16-THUMB: vcmla.f16       d0, d1, d2[0], #0 @ encoding: [0x01,0xfe,0x02,0x08]
+// NO-FP16-STDERR: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: full half-float
+// V82A: :[[@LINE-4]]:{{[0-9]*}}: error: instruction requires: armv8.3a
+// NO-NEON_STDERR: :[[@LINE-5]]:{{[0-9]*}}: error: instruction requires: NEON
+  vcmla.f16 q0, q1, d2[0], #0
+// FP16-ARM: vcmla.f16       q0, q1, d2[0], #0 @ encoding: [0x42,0x08,0x02,0xfe]
+// FP16-THUMB: vcmla.f16       q0, q1, d2[0], #0 @ encoding: [0x02,0xfe,0x42,0x08]
+// NO-FP16-STDERR: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: full half-float
+// V82A: :[[@LINE-4]]:{{[0-9]*}}: error: instruction requires: armv8.3a
+// NO-NEON_STDERR: :[[@LINE-5]]:{{[0-9]*}}: error: instruction requires: NEON
+  vcmla.f32 d0, d1, d2[0], #0
+// ARM: vcmla.f32       d0, d1, d2[0], #0 @ encoding: [0x02,0x08,0x81,0xfe]
+// THUMB: vcmla.f32       d0, d1, d2[0], #0 @ encoding: [0x81,0xfe,0x02,0x08]
+// V82A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a
+// NO-NEON_STDERR: :[[@LINE-5]]:{{[0-9]*}}: error: instruction requires: NEON
+  vcmla.f32 q0, q1, d2[0], #0
+// ARM: vcmla.f32       q0, q1, d2[0], #0 @ encoding: [0x42,0x08,0x82,0xfe]
+// THUMB: vcmla.f32       q0, q1, d2[0], #0 @ encoding: [0x82,0xfe,0x42,0x08]
+// V82A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a
+// NO-NEON_STDERR: :[[@LINE-5]]:{{[0-9]*}}: error: instruction requires: NEON
+
+// Valid rotations
+  vcmla.f32 d0, d1, d2[0], #90
+// ARM: vcmla.f32       d0, d1, d2[0], #90 @ encoding: [0x02,0x08,0x91,0xfe]
+// THUMB: vcmla.f32       d0, d1, d2[0], #90 @ encoding: [0x91,0xfe,0x02,0x08]
+// V82A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a
+// NO-NEON_STDERR: :[[@LINE-4]]:{{[0-9]*}}: error: instruction requires: NEON
+  vcmla.f32 d0, d1, d2[0], #180
+// ARM: vcmla.f32       d0, d1, d2[0], #180 @ encoding: [0x02,0x08,0xa1,0xfe]
+// THUMB: vcmla.f32       d0, d1, d2[0], #180 @ encoding: [0xa1,0xfe,0x02,0x08]
+// V82A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a
+// NO-NEON_STDERR: :[[@LINE-4]]:{{[0-9]*}}: error: instruction requires: NEON
+  vcmla.f32 d0, d1, d2[0], #270
+// ARM: vcmla.f32       d0, d1, d2[0], #270 @ encoding: [0x02,0x08,0xb1,0xfe]
+// THUMB: vcmla.f32       d0, d1, d2[0], #270 @ encoding: [0xb1,0xfe,0x02,0x08]
+// V82A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a
+// NO-NEON_STDERR: :[[@LINE-4]]:{{[0-9]*}}: error: instruction requires: NEON
+
+// Invalid rotations
+  vcmla.f32 d0, d1, d2[0], #-90
+// STDERR: :[[@LINE-1]]:{{[0-9]*}}: error: complex rotation must be 0, 90, 180 or 270
+  vcmla.f32 d0, d1, d2[0], #1
+// STDERR: :[[@LINE-1]]:{{[0-9]*}}: error: complex rotation must be 0, 90, 180 or 270
+  vcmla.f32 d0, d1, d2[0], #360
+// STDERR: :[[@LINE-1]]:{{[0-9]*}}: error: complex rotation must be 0, 90, 180 or 270
+
+// Valid indices
+  vcmla.f16 d0, d1, d2[1], #0
+// FP16-ARM: vcmla.f16       d0, d1, d2[1], #0 @ encoding: [0x22,0x08,0x01,0xfe]
+// FP16-THUMB: vcmla.f16       d0, d1, d2[1], #0 @ encoding: [0x01,0xfe,0x22,0x08]
+// V82A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a
+// NO-NEON_STDERR: :[[@LINE-4]]:{{[0-9]*}}: error: instruction requires: NEON
+
+// Invalid indices
+// FIXME: These error messages are emitted because the index operand is not
+// valid as a rotation, so they are a bit unintuitive. Can we do better?
+  vcmla.f16 d0, d1, d2[2], #0
+// STDERR: :[[@LINE-1]]:{{[0-9]*}}: error: complex rotation must be 0, 90, 180 or 270
+  vcmla.f32 d0, d1, d2[1], #0
+// STDERR: :[[@LINE-1]]:{{[0-9]*}}: error: complex rotation must be 0, 90, 180 or 270
diff --git a/llvm/test/MC/Disassembler/ARM/neon-complex-arm.txt b/llvm/test/MC/Disassembler/ARM/neon-complex-arm.txt
new file mode 100644
index 0000000..519298a
--- /dev/null
+++ b/llvm/test/MC/Disassembler/ARM/neon-complex-arm.txt
@@ -0,0 +1,66 @@
+# RUN:     llvm-mc -triple armv8a -mattr=+v8.3a,+neon,+fullfp16 -disassemble < %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP16
+# RUN: not llvm-mc -triple armv8a -mattr=+v8.2a,+neon,+fullfp16 -disassemble < %s 2>&1 | FileCheck %s --check-prefix=MISSING --check-prefix=MISSING-FP16
+# RUN: not llvm-mc -triple armv8a -mattr=+v8.3a,-neon,+fullfp16 -disassemble < %s 2>&1 | FileCheck %s --check-prefix=MISSING --check-prefix=MISSING-FP16
+# RUN: not llvm-mc -triple armv8a -mattr=+v8.3a,+neon,-fullfp16 -disassemble < %s 2>%t | FileCheck %s --check-prefix=CHECK
+# RUN: FileCheck %s < %t --check-prefix=MISSING-FP16
+
+[0x02,0x08,0x21,0xfc]
+# CHECK-FP16: vcmla.f16       d0, d1, d2, #0
+# MISSING-FP16: warning: invalid instruction encoding
+[0x44,0x08,0x22,0xfc]
+# CHECK-FP16: vcmla.f16       q0, q1, q2, #0
+# MISSING-FP16: warning: invalid instruction encoding
+[0x02,0x08,0x31,0xfc]
+# CHECK: vcmla.f32       d0, d1, d2, #0
+# MISSING: warning: invalid instruction encoding
+[0x44,0x08,0x32,0xfc]
+# CHECK: vcmla.f32       q0, q1, q2, #0
+# MISSING: warning: invalid instruction encoding
+[0x02,0x08,0xb1,0xfc]
+# CHECK: vcmla.f32       d0, d1, d2, #90
+# MISSING: warning: invalid instruction encoding
+[0x02,0x08,0x31,0xfd]
+# CHECK: vcmla.f32       d0, d1, d2, #180
+# MISSING: warning: invalid instruction encoding
+[0x02,0x08,0xb1,0xfd]
+# CHECK: vcmla.f32       d0, d1, d2, #270
+# MISSING: warning: invalid instruction encoding
+[0x02,0x08,0x81,0xfc]
+# CHECK-FP16: vcadd.f16       d0, d1, d2, #90
+# MISSING-FP16: warning: invalid instruction encoding
+[0x44,0x08,0x82,0xfc]
+# CHECK-FP16: vcadd.f16       q0, q1, q2, #90
+# MISSING-FP16: warning: invalid instruction encoding
+[0x02,0x08,0x91,0xfc]
+# CHECK: vcadd.f32       d0, d1, d2, #90
+# MISSING: warning: invalid instruction encoding
+[0x44,0x08,0x92,0xfc]
+# CHECK: vcadd.f32       q0, q1, q2, #90
+# MISSING: warning: invalid instruction encoding
+[0x02,0x08,0x91,0xfd]
+# CHECK: vcadd.f32       d0, d1, d2, #270
+# MISSING: warning: invalid instruction encoding
+[0x02,0x08,0x01,0xfe]
+# CHECK-FP16: vcmla.f16       d0, d1, d2[0], #0
+# MISSING-FP16: warning: invalid instruction encoding
+[0x42,0x08,0x02,0xfe]
+# CHECK-FP16: vcmla.f16       q0, q1, d2[0], #0
+# MISSING-FP16: warning: invalid instruction encoding
+[0x02,0x08,0x81,0xfe]
+# CHECK: vcmla.f32       d0, d1, d2[0], #0
+# MISSING: warning: invalid instruction encoding
+[0x42,0x08,0x82,0xfe]
+# CHECK: vcmla.f32       q0, q1, d2[0], #0
+# MISSING: warning: invalid instruction encoding
+[0x02,0x08,0x91,0xfe]
+# CHECK: vcmla.f32       d0, d1, d2[0], #90
+# MISSING: warning: invalid instruction encoding
+[0x02,0x08,0xa1,0xfe]
+# CHECK: vcmla.f32       d0, d1, d2[0], #180
+# MISSING: warning: invalid instruction encoding
+[0x02,0x08,0xb1,0xfe]
+# CHECK: vcmla.f32       d0, d1, d2[0], #270
+# MISSING: warning: invalid instruction encoding
+[0x22,0x08,0x01,0xfe]
+# CHECK-FP16: vcmla.f16       d0, d1, d2[1], #0
+# MISSING-FP16: warning: invalid instruction encoding
diff --git a/llvm/test/MC/Disassembler/ARM/neon-complex-thumb.txt b/llvm/test/MC/Disassembler/ARM/neon-complex-thumb.txt
new file mode 100644
index 0000000..260404f
--- /dev/null
+++ b/llvm/test/MC/Disassembler/ARM/neon-complex-thumb.txt
@@ -0,0 +1,66 @@
+# RUN:     llvm-mc -triple thumbv8a -mattr=+v8.3a,+neon,+fullfp16 -disassemble < %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP16
+# RUN: not llvm-mc -triple thumbv8a -mattr=+v8.2a,+neon,+fullfp16 -disassemble < %s 2>&1 | FileCheck %s --check-prefix=MISSING --check-prefix=MISSING-FP16
+# RUN: not llvm-mc -triple thumbv8a -mattr=+v8.3a,-neon,+fullfp16 -disassemble < %s 2>&1 | FileCheck %s --check-prefix=MISSING --check-prefix=MISSING-FP16
+# RUN: not llvm-mc -triple thumbv8a -mattr=+v8.3a,+neon,-fullfp16 -disassemble < %s 2>%t | FileCheck %s --check-prefix=CHECK
+# RUN: FileCheck %s < %t --check-prefix=MISSING-FP16
+
+[0x21,0xfc,0x02,0x08]
+# CHECK-FP16: vcmla.f16       d0, d1, d2, #0
+# MISSING-FP16: warning: invalid instruction encoding
+[0x22,0xfc,0x44,0x08]
+# CHECK-FP16: vcmla.f16       q0, q1, q2, #0
+# MISSING-FP16: warning: invalid instruction encoding
+[0x31,0xfc,0x02,0x08]
+# CHECK: vcmla.f32       d0, d1, d2, #0
+# MISSING: warning: invalid instruction encoding
+[0x32,0xfc,0x44,0x08]
+# CHECK: vcmla.f32       q0, q1, q2, #0
+# MISSING: warning: invalid instruction encoding
+[0xb1,0xfc,0x02,0x08]
+# CHECK: vcmla.f32       d0, d1, d2, #90
+# MISSING: warning: invalid instruction encoding
+[0x31,0xfd,0x02,0x08]
+# CHECK: vcmla.f32       d0, d1, d2, #180
+# MISSING: warning: invalid instruction encoding
+[0xb1,0xfd,0x02,0x08]
+# CHECK: vcmla.f32       d0, d1, d2, #270
+# MISSING: warning: invalid instruction encoding
+[0x81,0xfc,0x02,0x08]
+# CHECK-FP16: vcadd.f16       d0, d1, d2, #90
+# MISSING-FP16: warning: invalid instruction encoding
+[0x82,0xfc,0x44,0x08]
+# CHECK-FP16: vcadd.f16       q0, q1, q2, #90
+# MISSING-FP16: warning: invalid instruction encoding
+[0x91,0xfc,0x02,0x08]
+# CHECK: vcadd.f32       d0, d1, d2, #90
+# MISSING: warning: invalid instruction encoding
+[0x92,0xfc,0x44,0x08]
+# CHECK: vcadd.f32       q0, q1, q2, #90
+# MISSING: warning: invalid instruction encoding
+[0x91,0xfd,0x02,0x08]
+# CHECK: vcadd.f32       d0, d1, d2, #270
+# MISSING: warning: invalid instruction encoding
+[0x01,0xfe,0x02,0x08]
+# CHECK-FP16: vcmla.f16       d0, d1, d2[0], #0
+# MISSING-FP16: warning: invalid instruction encoding
+[0x02,0xfe,0x42,0x08]
+# CHECK-FP16: vcmla.f16       q0, q1, d2[0], #0
+# MISSING-FP16: warning: invalid instruction encoding
+[0x81,0xfe,0x02,0x08]
+# CHECK: vcmla.f32       d0, d1, d2[0], #0
+# MISSING: warning: invalid instruction encoding
+[0x82,0xfe,0x42,0x08]
+# CHECK: vcmla.f32       q0, q1, d2[0], #0
+# MISSING: warning: invalid instruction encoding
+[0x91,0xfe,0x02,0x08]
+# CHECK: vcmla.f32       d0, d1, d2[0], #90
+# MISSING: warning: invalid instruction encoding
+[0xa1,0xfe,0x02,0x08]
+# CHECK: vcmla.f32       d0, d1, d2[0], #180
+# MISSING: warning: invalid instruction encoding
+[0xb1,0xfe,0x02,0x08]
+# CHECK: vcmla.f32       d0, d1, d2[0], #270
+# MISSING: warning: invalid instruction encoding
+[0x01,0xfe,0x22,0x08]
+# CHECK-FP16: vcmla.f16       d0, d1, d2[1], #0
+# MISSING-FP16: warning: invalid instruction encoding
-- 
2.7.4