From 963da5b1191f0ec084247252666d3f669fdaf71c Mon Sep 17 00:00:00 2001 From: Sam Parker Date: Fri, 29 Sep 2017 13:11:33 +0000 Subject: [PATCH] [ARM] v8.3-a complex number support New instructions are added to AArch32 and AArch64 to aid floating-point multiplication and addition of complex numbers, where the complex numbers are packed in a vector register as a pair of elements. The Imaginary part of the number is placed in the more significant element, and the Real part of the number is placed in the less significant element. This patch adds assembler for the ARM target. Differential Revision: https://reviews.llvm.org/D36789 llvm-svn: 314511 --- llvm/lib/Target/ARM/ARMInstrFormats.td | 75 +++++++++ llvm/lib/Target/ARM/ARMInstrNEON.td | 133 +++++++++++++++ llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 42 ++++- .../Target/ARM/Disassembler/ARMDisassembler.cpp | 37 +++++ llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp | 9 ++ llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h | 3 + llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h | 1 + llvm/test/MC/ARM/neon-complex.s | 180 +++++++++++++++++++++ llvm/test/MC/Disassembler/ARM/neon-complex-arm.txt | 66 ++++++++ .../MC/Disassembler/ARM/neon-complex-thumb.txt | 66 ++++++++ 10 files changed, 610 insertions(+), 2 deletions(-) create mode 100644 llvm/test/MC/ARM/neon-complex.s create mode 100644 llvm/test/MC/Disassembler/ARM/neon-complex-arm.txt create mode 100644 llvm/test/MC/Disassembler/ARM/neon-complex-thumb.txt diff --git a/llvm/lib/Target/ARM/ARMInstrFormats.td b/llvm/lib/Target/ARM/ARMInstrFormats.td index 1bbe7f0..8bb6483 100644 --- a/llvm/lib/Target/ARM/ARMInstrFormats.td +++ b/llvm/lib/Target/ARM/ARMInstrFormats.td @@ -69,6 +69,7 @@ def NVExtFrm : Format<39>; def NVMulSLFrm : Format<40>; def NVTBLFrm : Format<41>; def DPSoRegImmFrm : Format<42>; +def N3RegCplxFrm : Format<43>; // Misc flags. @@ -2513,6 +2514,80 @@ multiclass NEONDTAnyInstAlias : AsmPseudoInst, Requires<[HasNEON]>; +// Extension of NEON 3-vector data processing instructions in coprocessor 8 +// encoding space, introduced in ARMv8.3-A. +class N3VCP8 op24_23, bits<2> op21_20, bit op6, bit op4, + dag oops, dag iops, InstrItinClass itin, + string opc, string dt, string asm, string cstr, list pattern> + : NeonInp { + bits<5> Vd; + bits<5> Vn; + bits<5> Vm; + + let DecoderNamespace = "VFPV8"; + // These have the same encodings in ARM and Thumb2 + let PostEncoderMethod = ""; + + let Inst{31-25} = 0b1111110; + let Inst{24-23} = op24_23; + let Inst{22} = Vd{4}; + let Inst{21-20} = op21_20; + let Inst{19-16} = Vn{3-0}; + let Inst{15-12} = Vd{3-0}; + let Inst{11-8} = 0b1000; + let Inst{7} = Vn{4}; + let Inst{6} = op6; + let Inst{5} = Vm{4}; + let Inst{4} = op4; + let Inst{3-0} = Vm{3-0}; +} + +// Extension of NEON 2-vector-and-scalar data processing instructions in +// coprocessor 8 encoding space, introduced in ARMv8.3-A. +class N3VLaneCP8 op21_20, bit op6, bit op4, + dag oops, dag iops, InstrItinClass itin, + string opc, string dt, string asm, string cstr, list pattern> + : NeonInp { + bits<5> Vd; + bits<5> Vn; + bits<5> Vm; + + let DecoderNamespace = "VFPV8"; + // These have the same encodings in ARM and Thumb2 + let PostEncoderMethod = ""; + + let Inst{31-24} = 0b11111110; + let Inst{23} = op23; + let Inst{22} = Vd{4}; + let Inst{21-20} = op21_20; + let Inst{19-16} = Vn{3-0}; + let Inst{15-12} = Vd{3-0}; + let Inst{11-8} = 0b1000; + let Inst{7} = Vn{4}; + let Inst{6} = op6; + // Bit 5 set by sub-classes + let Inst{4} = op4; + let Inst{3-0} = Vm{3-0}; +} + +// Operand types for complex instructions +class ComplexRotationOperand + : AsmOperandClass { + let PredicateMethod = "isComplexRotation<" # Angle # ", " # Remainder # ">"; + let DiagnosticType = "InvalidComplexRotation" # Type; + let Name = "ComplexRotation" # Type; +} +def complexrotateop : Operand { + let ParserMatchClass = ComplexRotationOperand<90, 0, "Even">; + let PrintMethod = "printComplexRotationOp<90, 0>"; +} +def complexrotateopodd : Operand { + let ParserMatchClass = ComplexRotationOperand<180, 90, "Odd">; + let PrintMethod = "printComplexRotationOp<180, 90>"; +} + // Data type suffix token aliases. Implements Table A7-3 in the ARM ARM. def : TokenAlias<".s8", ".i8">; def : TokenAlias<".u8", ".i8">; diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td index 495d44f..cd67dde 100644 --- a/llvm/lib/Target/ARM/ARMInstrNEON.td +++ b/llvm/lib/Target/ARM/ARMInstrNEON.td @@ -108,6 +108,7 @@ def nImmSplatI64 : Operand { def VectorIndex8Operand : AsmOperandClass { let Name = "VectorIndex8"; } def VectorIndex16Operand : AsmOperandClass { let Name = "VectorIndex16"; } def VectorIndex32Operand : AsmOperandClass { let Name = "VectorIndex32"; } +def VectorIndex64Operand : AsmOperandClass { let Name = "VectorIndex64"; } def VectorIndex8 : Operand, ImmLeaf { @@ -129,6 +130,13 @@ def VectorIndex32 : Operand, ImmLeaf, ImmLeaf { + let ParserMatchClass = VectorIndex64Operand; + let PrintMethod = "printVectorIndex"; + let MIOperandInfo = (ops i32imm); +} // Register list of one D register. def VecListOneDAsmOperand : AsmOperandClass { @@ -4724,6 +4732,131 @@ def VSDOTQI : DOTI<"vsdot", "s8", 0b1, 0b0, QPR>; } // HasDotProd +// ARMv8.3 complex operations +class BaseN3VCP8ComplexTied pattern> + : N3VCP8<{?,?}, {op21,s}, q, op4, oops, + iops, itin, opc, dt, "$Vd, $Vn, $Vm, $rot", "$src1 = $Vd", pattern>{ + bits<2> rot; + let Inst{24-23} = rot; +} + +class BaseN3VCP8ComplexOdd pattern> + : N3VCP8<{?,op23}, {op21,s}, q, op4, oops, + iops, itin, opc, dt, "$Vd, $Vn, $Vm, $rot", "", pattern> { + bits<1> rot; + let Inst{24} = rot; +} + +class BaseN3VCP8ComplexTiedLane32 pattern> + : N3VLaneCP8 { + bits<2> rot; + bit lane; + + let Inst{21-20} = rot; + let Inst{5} = lane; +} + +class BaseN3VCP8ComplexTiedLane64 pattern> + : N3VLaneCP8 { + bits<2> rot; + bit lane; + + let Inst{21-20} = rot; + let Inst{5} = Vm{4}; + // This is needed because the lane operand does not have any bits in the + // encoding (it only has one possible value), so we need to manually set it + // to it's default value. + let DecoderMethod = "DecodeNEONComplexLane64Instruction"; +} + +multiclass N3VCP8ComplexTied { + let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in { + def v4f16 : BaseN3VCP8ComplexTied; + def v8f16 : BaseN3VCP8ComplexTied; + } + let Predicates = [HasNEON,HasV8_3a] in { + def v2f32 : BaseN3VCP8ComplexTied; + def v4f32 : BaseN3VCP8ComplexTied; + } +} + +multiclass N3VCP8ComplexOdd { + let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in { + def v4f16 : BaseN3VCP8ComplexOdd; + def v8f16 : BaseN3VCP8ComplexOdd; + } + let Predicates = [HasNEON,HasV8_3a] in { + def v2f32 : BaseN3VCP8ComplexOdd; + def v4f32 : BaseN3VCP8ComplexOdd; + } +} + +// These instructions index by pairs of lanes, so the VectorIndexes are twice +// as wide as the data types. +multiclass N3VCP8ComplexTiedLane { + let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in { + def v4f16_indexed : BaseN3VCP8ComplexTiedLane32; + def v8f16_indexed : BaseN3VCP8ComplexTiedLane32; + } + let Predicates = [HasNEON,HasV8_3a] in { + def v2f32_indexed : BaseN3VCP8ComplexTiedLane64; + def v4f32_indexed : BaseN3VCP8ComplexTiedLane64; + } +} + +defm VCMLA : N3VCP8ComplexTied<1, 0, "vcmla", null_frag>; +defm VCADD : N3VCP8ComplexOdd<1, 0, 0, "vcadd", null_frag>; +defm VCMLA : N3VCP8ComplexTiedLane<0, "vcmla", null_frag>; + // Vector Subtract Operations. // VSUB : Vector Subtract (integer and floating-point) diff --git a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 287ed20..b84a4e8 100644 --- a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -1756,6 +1756,10 @@ public: if (Kind != k_VectorIndex) return false; return VectorIndex.Val < 2; } + bool isVectorIndex64() const { + if (Kind != k_VectorIndex) return false; + return VectorIndex.Val < 1; + } bool isNEONi8splat() const { if (!isImm()) return false; @@ -1885,6 +1889,17 @@ public: return true; } + template + bool isComplexRotation() const { + if (!isImm()) return false; + + const MCConstantExpr *CE = dyn_cast(getImm()); + if (!CE) return false; + uint64_t Value = CE->getValue(); + + return (Value % Angle == Remainder && Value <= 270); + } + void addExpr(MCInst &Inst, const MCExpr *Expr) const { // Add as immediates when possible. Null MCExpr = 0. if (!Expr) @@ -2628,6 +2643,11 @@ public: Inst.addOperand(MCOperand::createImm(getVectorIndex())); } + void addVectorIndex64Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createImm(getVectorIndex())); + } + void addNEONi8splatOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); // The immediate encodes the type of constant as well as the value. @@ -2740,6 +2760,18 @@ public: Inst.addOperand(MCOperand::createImm(Imm | 0x1e00)); } + void addComplexRotationEvenOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *CE = dyn_cast(getImm()); + Inst.addOperand(MCOperand::createImm(CE->getValue() / 90)); + } + + void addComplexRotationOddOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *CE = dyn_cast(getImm()); + Inst.addOperand(MCOperand::createImm((CE->getValue() - 90) / 180)); + } + void print(raw_ostream &OS) const override; static std::unique_ptr CreateITMask(unsigned Mask, SMLoc S) { @@ -5432,7 +5464,8 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic, Mnemonic == "vrintp" || Mnemonic == "vrintm" || Mnemonic == "hvc" || Mnemonic.startswith("vsel") || Mnemonic == "vins" || Mnemonic == "vmovx" || Mnemonic == "bxns" || Mnemonic == "blxns" || - Mnemonic == "vudot" || Mnemonic == "vsdot") + Mnemonic == "vudot" || Mnemonic == "vsdot" || + Mnemonic == "vcmla" || Mnemonic == "vcadd") return Mnemonic; // First, split out any predication code. Ignore mnemonics we know aren't @@ -5521,7 +5554,8 @@ void ARMAsmParser::getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst, Mnemonic.startswith("sha1") || Mnemonic.startswith("sha256") || (FullInst.startswith("vmull") && FullInst.endswith(".p64")) || Mnemonic == "vmovx" || Mnemonic == "vins" || - Mnemonic == "vudot" || Mnemonic == "vsdot") { + Mnemonic == "vudot" || Mnemonic == "vsdot" || + Mnemonic == "vcmla" || Mnemonic == "vcadd") { // These mnemonics are never predicable CanAcceptPredicationCode = false; } else if (!isThumb()) { @@ -9155,6 +9189,10 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, return Error(ErrorLoc, "alignment must be 64, 128, 256 or omitted"); } } + case Match_InvalidComplexRotationEven: + return Error(IDLoc, "complex rotation must be 0, 90, 180 or 270"); + case Match_InvalidComplexRotationOdd: + return Error(IDLoc, "complex rotation must be 90 or 270"); } llvm_unreachable("Implement any new match types added!"); diff --git a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index e385498..737450d 100644 --- a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -322,6 +322,10 @@ static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeNEONComplexLane64Instruction(MCInst &Inst, + unsigned Val, + uint64_t Address, + const void *Decoder); static DecodeStatus DecodeThumbAddSpecialReg(MCInst &Inst, uint16_t Insn, uint64_t Address, const void *Decoder); @@ -5215,6 +5219,39 @@ static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn, return S; } +static DecodeStatus DecodeNEONComplexLane64Instruction(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder) { + unsigned Vd = (fieldFromInstruction(Insn, 12, 4) << 0); + Vd |= (fieldFromInstruction(Insn, 22, 1) << 4); + unsigned Vn = (fieldFromInstruction(Insn, 16, 4) << 0); + Vn |= (fieldFromInstruction(Insn, 7, 1) << 4); + unsigned Vm = (fieldFromInstruction(Insn, 0, 4) << 0); + Vm |= (fieldFromInstruction(Insn, 5, 1) << 4); + unsigned q = (fieldFromInstruction(Insn, 6, 1) << 0); + unsigned rotate = (fieldFromInstruction(Insn, 20, 2) << 0); + + DecodeStatus S = MCDisassembler::Success; + + auto DestRegDecoder = q ? DecodeQPRRegisterClass : DecodeDPRRegisterClass; + + if (!Check(S, DestRegDecoder(Inst, Vd, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DestRegDecoder(Inst, Vd, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DestRegDecoder(Inst, Vn, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeDPRRegisterClass(Inst, Vm, Address, Decoder))) + return MCDisassembler::Fail; + // The lane index does not have any bits in the encoding, because it can only + // be 0. + Inst.addOperand(MCOperand::createImm(0)); + Inst.addOperand(MCOperand::createImm(rotate)); + + return S; +} + static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; diff --git a/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index be6815a..4fc67a4 100644 --- a/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -1535,3 +1535,12 @@ void ARMInstPrinter::printVectorListFourSpaced(const MCInst *MI, unsigned OpNum, printRegName(O, MI->getOperand(OpNum).getReg() + 6); O << "}"; } + +template +void ARMInstPrinter::printComplexRotationOp(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { + unsigned Val = MI->getOperand(OpNo).getImm(); + O << "#" << (Val * Angle) + Remainder; +} + diff --git a/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h index 86873a3..7dc3112 100644 --- a/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h +++ b/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h @@ -231,6 +231,9 @@ public: const MCSubtargetInfo &STI, raw_ostream &O); void printVectorListFourSpaced(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O); + template + void printComplexRotationOp(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); }; } // end namespace llvm diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h index 31f081b..17da82b 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h @@ -343,6 +343,7 @@ namespace ARMII { NVExtFrm = 39 << FormShift, NVMulSLFrm = 40 << FormShift, NVTBLFrm = 41 << FormShift, + N3RegCplxFrm = 43 << FormShift, //===------------------------------------------------------------------===// // Misc flags. diff --git a/llvm/test/MC/ARM/neon-complex.s b/llvm/test/MC/ARM/neon-complex.s new file mode 100644 index 0000000..54176d8 --- /dev/null +++ b/llvm/test/MC/ARM/neon-complex.s @@ -0,0 +1,180 @@ +// RUN: not llvm-mc -triple thumb-none-linux-gnu -mattr=+v8.3a,+neon,+fullfp16 -show-encoding < %s 2>%t | FileCheck %s --check-prefix=THUMB --check-prefix=FP16-THUMB +// RUN: FileCheck --check-prefix=STDERR <%t %s +// RUN: not llvm-mc -triple arm-none-linux-gnu -mattr=+v8.3a,+neon,+fullfp16 -show-encoding < %s 2>%t | FileCheck %s --check-prefix=ARM --check-prefix=FP16-ARM +// RUN: FileCheck --check-prefix=STDERR <%t %s + +// RUN: not llvm-mc -triple thumb-none-linux-gnu -mattr=+v8.3a,+neon,-fullfp16 -show-encoding < %s 2>%t | FileCheck %s --check-prefix=THUMB +// RUN: FileCheck --check-prefix=STDERR --check-prefix=NO-FP16-STDERR <%t %s +// RUN: not llvm-mc -triple arm-none-linux-gnu -mattr=+v8.3a,+neon,-fullfp16 -show-encoding < %s 2>%t | FileCheck %s --check-prefix=ARM +// RUN: FileCheck --check-prefix=STDERR --check-prefix=NO-FP16-STDERR <%t %s + +// RUN: not llvm-mc -triple thumb-none-linux-gnu -mattr=+v8.3a,-neon,+fullfp16 -show-encoding < %s 2>%t +// RUN: FileCheck --check-prefix=STDERR --check-prefix=NO-NEON-STDERR <%t %s +// RUN: not llvm-mc -triple arm-none-linux-gnu -mattr=+v8.3a,-neon,+fullfp16 -show-encoding < %s 2>%t +// RUN: FileCheck --check-prefix=STDERR --check-prefix=NO-NEON-STDERR <%t %s + +// RUN: not llvm-mc -triple thumb-none-linux-gnu -mattr=+v8.2a,+neon,+fullfp16 -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=V82A +// RUN: not llvm-mc -triple arm-none-linux-gnu -mattr=+v8.2a,+neon,+fullfp16 -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=V82A + +/* ==== VCMLA vector ==== */ + +// Valid types + vcmla.f16 d0, d1, d2, #0 +// FP16-ARM: vcmla.f16 d0, d1, d2, #0 @ encoding: [0x02,0x08,0x21,0xfc] +// FP16-THUMB: vcmla.f16 d0, d1, d2, #0 @ encoding: [0x21,0xfc,0x02,0x08] +// NO-FP16-STDERR: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: full half-float +// V82A: :[[@LINE-4]]:{{[0-9]*}}: error: instruction requires: armv8.3a +// NO-NEON_STDERR: :[[@LINE-5]]:{{[0-9]*}}: error: instruction requires: NEON + vcmla.f16 q0, q1, q2, #0 +// FP16-ARM: vcmla.f16 q0, q1, q2, #0 @ encoding: [0x44,0x08,0x22,0xfc] +// FP16-THUMB: vcmla.f16 q0, q1, q2, #0 @ encoding: [0x22,0xfc,0x44,0x08] +// NO-FP16-STDERR: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: full half-float +// V82A: :[[@LINE-4]]:{{[0-9]*}}: error: instruction requires: armv8.3a +// NO-NEON_STDERR: :[[@LINE-5]]:{{[0-9]*}}: error: instruction requires: NEON + vcmla.f32 d0, d1, d2, #0 +// ARM: vcmla.f32 d0, d1, d2, #0 @ encoding: [0x02,0x08,0x31,0xfc] +// THUMB: vcmla.f32 d0, d1, d2, #0 @ encoding: [0x31,0xfc,0x02,0x08] +// V82A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a +// NO-NEON_STDERR: :[[@LINE-4]]:{{[0-9]*}}: error: instruction requires: NEON + vcmla.f32 q0, q1, q2, #0 +// ARM: vcmla.f32 q0, q1, q2, #0 @ encoding: [0x44,0x08,0x32,0xfc] +// THUMB: vcmla.f32 q0, q1, q2, #0 @ encoding: [0x32,0xfc,0x44,0x08] +// V82A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a +// NO-NEON_STDERR: :[[@LINE-4]]:{{[0-9]*}}: error: instruction requires: NEON + +// Valid rotations + vcmla.f32 d0, d1, d2, #90 +// ARM: vcmla.f32 d0, d1, d2, #90 @ encoding: [0x02,0x08,0xb1,0xfc] +// THUMB: vcmla.f32 d0, d1, d2, #90 @ encoding: [0xb1,0xfc,0x02,0x08] +// V82A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a +// NO-NEON_STDERR: :[[@LINE-4]]:{{[0-9]*}}: error: instruction requires: NEON + vcmla.f32 d0, d1, d2, #180 +// ARM: vcmla.f32 d0, d1, d2, #180 @ encoding: [0x02,0x08,0x31,0xfd] +// THUMB: vcmla.f32 d0, d1, d2, #180 @ encoding: [0x31,0xfd,0x02,0x08] +// V82A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a +// NO-NEON_STDERR: :[[@LINE-4]]:{{[0-9]*}}: error: instruction requires: NEON + vcmla.f32 d0, d1, d2, #270 +// ARM: vcmla.f32 d0, d1, d2, #270 @ encoding: [0x02,0x08,0xb1,0xfd] +// THUMB: vcmla.f32 d0, d1, d2, #270 @ encoding: [0xb1,0xfd,0x02,0x08] +// V82A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a +// NO-NEON_STDERR: :[[@LINE-4]]:{{[0-9]*}}: error: instruction requires: NEON + +// Invalid rotations + vcmla.f32 d0, d1, d2, #-90 +// STDERR: :[[@LINE-1]]:{{[0-9]*}}: error: complex rotation must be 0, 90, 180 or 270 + vcmla.f32 d0, d1, d2, #1 +// STDERR: :[[@LINE-1]]:{{[0-9]*}}: error: complex rotation must be 0, 90, 180 or 270 + vcmla.f32 d0, d1, d2, #360 +// STDERR: :[[@LINE-1]]:{{[0-9]*}}: error: complex rotation must be 0, 90, 180 or 270 + +/* ==== VCADD vector ==== */ + +// Valid types + vcadd.f16 d0, d1, d2, #90 +// FP16-ARM: vcadd.f16 d0, d1, d2, #90 @ encoding: [0x02,0x08,0x81,0xfc] +// FP16-THUMB: vcadd.f16 d0, d1, d2, #90 @ encoding: [0x81,0xfc,0x02,0x08] +// NO-FP16-STDERR: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: full half-float +// V82A: :[[@LINE-4]]:{{[0-9]*}}: error: instruction requires: armv8.3a +// NO-NEON_STDERR: :[[@LINE-5]]:{{[0-9]*}}: error: instruction requires: NEON + vcadd.f16 q0, q1, q2, #90 +// FP16-ARM: vcadd.f16 q0, q1, q2, #90 @ encoding: [0x44,0x08,0x82,0xfc] +// FP16-THUMB: vcadd.f16 q0, q1, q2, #90 @ encoding: [0x82,0xfc,0x44,0x08] +// NO-FP16-STDERR: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: full half-float +// V82A: :[[@LINE-4]]:{{[0-9]*}}: error: instruction requires: armv8.3a +// NO-NEON_STDERR: :[[@LINE-5]]:{{[0-9]*}}: error: instruction requires: NEON + vcadd.f32 d0, d1, d2, #90 +// ARM: vcadd.f32 d0, d1, d2, #90 @ encoding: [0x02,0x08,0x91,0xfc] +// THUMB: vcadd.f32 d0, d1, d2, #90 @ encoding: [0x91,0xfc,0x02,0x08] +// V82A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a +// NO-NEON_STDERR: :[[@LINE-4]]:{{[0-9]*}}: error: instruction requires: NEON + vcadd.f32 q0, q1, q2, #90 +// ARM: vcadd.f32 q0, q1, q2, #90 @ encoding: [0x44,0x08,0x92,0xfc] +// THUMB: vcadd.f32 q0, q1, q2, #90 @ encoding: [0x92,0xfc,0x44,0x08] +// V82A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a +// NO-NEON_STDERR: :[[@LINE-4]]:{{[0-9]*}}: error: instruction requires: NEON + +// Valid rotations + vcadd.f32 d0, d1, d2, #270 +// ARM: vcadd.f32 d0, d1, d2, #270 @ encoding: [0x02,0x08,0x91,0xfd] +// THUMB: vcadd.f32 d0, d1, d2, #270 @ encoding: [0x91,0xfd,0x02,0x08] +// V82A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a +// NO-NEON_STDERR: :[[@LINE-4]]:{{[0-9]*}}: error: instruction requires: NEON + +// Invalid rotations + vcadd.f32 d0, d1, d2, #0 +// STDERR: :[[@LINE-1]]:{{[0-9]*}}: error: complex rotation must be 90 or 270 + vcadd.f32 d0, d1, d2, #180 +// STDERR: :[[@LINE-1]]:{{[0-9]*}}: error: complex rotation must be 90 or 270 + vcadd.f32 d0, d1, d2, #-90 +// STDERR: :[[@LINE-1]]:{{[0-9]*}}: error: complex rotation must be 90 or 270 + vcadd.f32 d0, d1, d2, #1 +// STDERR: :[[@LINE-1]]:{{[0-9]*}}: error: complex rotation must be 90 or 270 + vcadd.f32 d0, d1, d2, #360 +// STDERR: :[[@LINE-1]]:{{[0-9]*}}: error: complex rotation must be 90 or 270 + + +/* ==== VCMLA indexed ==== */ + +// Valid types + vcmla.f16 d0, d1, d2[0], #0 +// FP16-ARM: vcmla.f16 d0, d1, d2[0], #0 @ encoding: [0x02,0x08,0x01,0xfe] +// FP16-THUMB: vcmla.f16 d0, d1, d2[0], #0 @ encoding: [0x01,0xfe,0x02,0x08] +// NO-FP16-STDERR: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: full half-float +// V82A: :[[@LINE-4]]:{{[0-9]*}}: error: instruction requires: armv8.3a +// NO-NEON_STDERR: :[[@LINE-5]]:{{[0-9]*}}: error: instruction requires: NEON + vcmla.f16 q0, q1, d2[0], #0 +// FP16-ARM: vcmla.f16 q0, q1, d2[0], #0 @ encoding: [0x42,0x08,0x02,0xfe] +// FP16-THUMB: vcmla.f16 q0, q1, d2[0], #0 @ encoding: [0x02,0xfe,0x42,0x08] +// NO-FP16-STDERR: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: full half-float +// V82A: :[[@LINE-4]]:{{[0-9]*}}: error: instruction requires: armv8.3a +// NO-NEON_STDERR: :[[@LINE-5]]:{{[0-9]*}}: error: instruction requires: NEON + vcmla.f32 d0, d1, d2[0], #0 +// ARM: vcmla.f32 d0, d1, d2[0], #0 @ encoding: [0x02,0x08,0x81,0xfe] +// THUMB: vcmla.f32 d0, d1, d2[0], #0 @ encoding: [0x81,0xfe,0x02,0x08] +// V82A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a +// NO-NEON_STDERR: :[[@LINE-5]]:{{[0-9]*}}: error: instruction requires: NEON + vcmla.f32 q0, q1, d2[0], #0 +// ARM: vcmla.f32 q0, q1, d2[0], #0 @ encoding: [0x42,0x08,0x82,0xfe] +// THUMB: vcmla.f32 q0, q1, d2[0], #0 @ encoding: [0x82,0xfe,0x42,0x08] +// V82A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a +// NO-NEON_STDERR: :[[@LINE-5]]:{{[0-9]*}}: error: instruction requires: NEON + +// Valid rotations + vcmla.f32 d0, d1, d2[0], #90 +// ARM: vcmla.f32 d0, d1, d2[0], #90 @ encoding: [0x02,0x08,0x91,0xfe] +// THUMB: vcmla.f32 d0, d1, d2[0], #90 @ encoding: [0x91,0xfe,0x02,0x08] +// V82A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a +// NO-NEON_STDERR: :[[@LINE-4]]:{{[0-9]*}}: error: instruction requires: NEON + vcmla.f32 d0, d1, d2[0], #180 +// ARM: vcmla.f32 d0, d1, d2[0], #180 @ encoding: [0x02,0x08,0xa1,0xfe] +// THUMB: vcmla.f32 d0, d1, d2[0], #180 @ encoding: [0xa1,0xfe,0x02,0x08] +// V82A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a +// NO-NEON_STDERR: :[[@LINE-4]]:{{[0-9]*}}: error: instruction requires: NEON + vcmla.f32 d0, d1, d2[0], #270 +// ARM: vcmla.f32 d0, d1, d2[0], #270 @ encoding: [0x02,0x08,0xb1,0xfe] +// THUMB: vcmla.f32 d0, d1, d2[0], #270 @ encoding: [0xb1,0xfe,0x02,0x08] +// V82A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a +// NO-NEON_STDERR: :[[@LINE-4]]:{{[0-9]*}}: error: instruction requires: NEON + +// Invalid rotations + vcmla.f32 d0, d1, d2[0], #-90 +// STDERR: :[[@LINE-1]]:{{[0-9]*}}: error: complex rotation must be 0, 90, 180 or 270 + vcmla.f32 d0, d1, d2[0], #1 +// STDERR: :[[@LINE-1]]:{{[0-9]*}}: error: complex rotation must be 0, 90, 180 or 270 + vcmla.f32 d0, d1, d2[0], #360 +// STDERR: :[[@LINE-1]]:{{[0-9]*}}: error: complex rotation must be 0, 90, 180 or 270 + +// Valid indices + vcmla.f16 d0, d1, d2[1], #0 +// FP16-ARM: vcmla.f16 d0, d1, d2[1], #0 @ encoding: [0x22,0x08,0x01,0xfe] +// FP16-THUMB: vcmla.f16 d0, d1, d2[1], #0 @ encoding: [0x01,0xfe,0x22,0x08] +// V82A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a +// NO-NEON_STDERR: :[[@LINE-4]]:{{[0-9]*}}: error: instruction requires: NEON + +// Invalid indices +// FIXME: These error messages are emitted because the index operand is not +// valid as a rotation, so they are a bit unintuitive. Can we do better? + vcmla.f16 d0, d1, d2[2], #0 +// STDERR: :[[@LINE-1]]:{{[0-9]*}}: error: complex rotation must be 0, 90, 180 or 270 + vcmla.f32 d0, d1, d2[1], #0 +// STDERR: :[[@LINE-1]]:{{[0-9]*}}: error: complex rotation must be 0, 90, 180 or 270 diff --git a/llvm/test/MC/Disassembler/ARM/neon-complex-arm.txt b/llvm/test/MC/Disassembler/ARM/neon-complex-arm.txt new file mode 100644 index 0000000..519298a --- /dev/null +++ b/llvm/test/MC/Disassembler/ARM/neon-complex-arm.txt @@ -0,0 +1,66 @@ +# RUN: llvm-mc -triple armv8a -mattr=+v8.3a,+neon,+fullfp16 -disassemble < %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP16 +# RUN: not llvm-mc -triple armv8a -mattr=+v8.2a,+neon,+fullfp16 -disassemble < %s 2>&1 | FileCheck %s --check-prefix=MISSING --check-prefix=MISSING-FP16 +# RUN: not llvm-mc -triple armv8a -mattr=+v8.3a,-neon,+fullfp16 -disassemble < %s 2>&1 | FileCheck %s --check-prefix=MISSING --check-prefix=MISSING-FP16 +# RUN: not llvm-mc -triple armv8a -mattr=+v8.3a,+neon,-fullfp16 -disassemble < %s 2>%t | FileCheck %s --check-prefix=CHECK +# RUN: FileCheck %s < %t --check-prefix=MISSING-FP16 + +[0x02,0x08,0x21,0xfc] +# CHECK-FP16: vcmla.f16 d0, d1, d2, #0 +# MISSING-FP16: warning: invalid instruction encoding +[0x44,0x08,0x22,0xfc] +# CHECK-FP16: vcmla.f16 q0, q1, q2, #0 +# MISSING-FP16: warning: invalid instruction encoding +[0x02,0x08,0x31,0xfc] +# CHECK: vcmla.f32 d0, d1, d2, #0 +# MISSING: warning: invalid instruction encoding +[0x44,0x08,0x32,0xfc] +# CHECK: vcmla.f32 q0, q1, q2, #0 +# MISSING: warning: invalid instruction encoding +[0x02,0x08,0xb1,0xfc] +# CHECK: vcmla.f32 d0, d1, d2, #90 +# MISSING: warning: invalid instruction encoding +[0x02,0x08,0x31,0xfd] +# CHECK: vcmla.f32 d0, d1, d2, #180 +# MISSING: warning: invalid instruction encoding +[0x02,0x08,0xb1,0xfd] +# CHECK: vcmla.f32 d0, d1, d2, #270 +# MISSING: warning: invalid instruction encoding +[0x02,0x08,0x81,0xfc] +# CHECK-FP16: vcadd.f16 d0, d1, d2, #90 +# MISSING-FP16: warning: invalid instruction encoding +[0x44,0x08,0x82,0xfc] +# CHECK-FP16: vcadd.f16 q0, q1, q2, #90 +# MISSING-FP16: warning: invalid instruction encoding +[0x02,0x08,0x91,0xfc] +# CHECK: vcadd.f32 d0, d1, d2, #90 +# MISSING: warning: invalid instruction encoding +[0x44,0x08,0x92,0xfc] +# CHECK: vcadd.f32 q0, q1, q2, #90 +# MISSING: warning: invalid instruction encoding +[0x02,0x08,0x91,0xfd] +# CHECK: vcadd.f32 d0, d1, d2, #270 +# MISSING: warning: invalid instruction encoding +[0x02,0x08,0x01,0xfe] +# CHECK-FP16: vcmla.f16 d0, d1, d2[0], #0 +# MISSING-FP16: warning: invalid instruction encoding +[0x42,0x08,0x02,0xfe] +# CHECK-FP16: vcmla.f16 q0, q1, d2[0], #0 +# MISSING-FP16: warning: invalid instruction encoding +[0x02,0x08,0x81,0xfe] +# CHECK: vcmla.f32 d0, d1, d2[0], #0 +# MISSING: warning: invalid instruction encoding +[0x42,0x08,0x82,0xfe] +# CHECK: vcmla.f32 q0, q1, d2[0], #0 +# MISSING: warning: invalid instruction encoding +[0x02,0x08,0x91,0xfe] +# CHECK: vcmla.f32 d0, d1, d2[0], #90 +# MISSING: warning: invalid instruction encoding +[0x02,0x08,0xa1,0xfe] +# CHECK: vcmla.f32 d0, d1, d2[0], #180 +# MISSING: warning: invalid instruction encoding +[0x02,0x08,0xb1,0xfe] +# CHECK: vcmla.f32 d0, d1, d2[0], #270 +# MISSING: warning: invalid instruction encoding +[0x22,0x08,0x01,0xfe] +# CHECK-FP16: vcmla.f16 d0, d1, d2[1], #0 +# MISSING-FP16: warning: invalid instruction encoding diff --git a/llvm/test/MC/Disassembler/ARM/neon-complex-thumb.txt b/llvm/test/MC/Disassembler/ARM/neon-complex-thumb.txt new file mode 100644 index 0000000..260404f --- /dev/null +++ b/llvm/test/MC/Disassembler/ARM/neon-complex-thumb.txt @@ -0,0 +1,66 @@ +# RUN: llvm-mc -triple thumbv8a -mattr=+v8.3a,+neon,+fullfp16 -disassemble < %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP16 +# RUN: not llvm-mc -triple thumbv8a -mattr=+v8.2a,+neon,+fullfp16 -disassemble < %s 2>&1 | FileCheck %s --check-prefix=MISSING --check-prefix=MISSING-FP16 +# RUN: not llvm-mc -triple thumbv8a -mattr=+v8.3a,-neon,+fullfp16 -disassemble < %s 2>&1 | FileCheck %s --check-prefix=MISSING --check-prefix=MISSING-FP16 +# RUN: not llvm-mc -triple thumbv8a -mattr=+v8.3a,+neon,-fullfp16 -disassemble < %s 2>%t | FileCheck %s --check-prefix=CHECK +# RUN: FileCheck %s < %t --check-prefix=MISSING-FP16 + +[0x21,0xfc,0x02,0x08] +# CHECK-FP16: vcmla.f16 d0, d1, d2, #0 +# MISSING-FP16: warning: invalid instruction encoding +[0x22,0xfc,0x44,0x08] +# CHECK-FP16: vcmla.f16 q0, q1, q2, #0 +# MISSING-FP16: warning: invalid instruction encoding +[0x31,0xfc,0x02,0x08] +# CHECK: vcmla.f32 d0, d1, d2, #0 +# MISSING: warning: invalid instruction encoding +[0x32,0xfc,0x44,0x08] +# CHECK: vcmla.f32 q0, q1, q2, #0 +# MISSING: warning: invalid instruction encoding +[0xb1,0xfc,0x02,0x08] +# CHECK: vcmla.f32 d0, d1, d2, #90 +# MISSING: warning: invalid instruction encoding +[0x31,0xfd,0x02,0x08] +# CHECK: vcmla.f32 d0, d1, d2, #180 +# MISSING: warning: invalid instruction encoding +[0xb1,0xfd,0x02,0x08] +# CHECK: vcmla.f32 d0, d1, d2, #270 +# MISSING: warning: invalid instruction encoding +[0x81,0xfc,0x02,0x08] +# CHECK-FP16: vcadd.f16 d0, d1, d2, #90 +# MISSING-FP16: warning: invalid instruction encoding +[0x82,0xfc,0x44,0x08] +# CHECK-FP16: vcadd.f16 q0, q1, q2, #90 +# MISSING-FP16: warning: invalid instruction encoding +[0x91,0xfc,0x02,0x08] +# CHECK: vcadd.f32 d0, d1, d2, #90 +# MISSING: warning: invalid instruction encoding +[0x92,0xfc,0x44,0x08] +# CHECK: vcadd.f32 q0, q1, q2, #90 +# MISSING: warning: invalid instruction encoding +[0x91,0xfd,0x02,0x08] +# CHECK: vcadd.f32 d0, d1, d2, #270 +# MISSING: warning: invalid instruction encoding +[0x01,0xfe,0x02,0x08] +# CHECK-FP16: vcmla.f16 d0, d1, d2[0], #0 +# MISSING-FP16: warning: invalid instruction encoding +[0x02,0xfe,0x42,0x08] +# CHECK-FP16: vcmla.f16 q0, q1, d2[0], #0 +# MISSING-FP16: warning: invalid instruction encoding +[0x81,0xfe,0x02,0x08] +# CHECK: vcmla.f32 d0, d1, d2[0], #0 +# MISSING: warning: invalid instruction encoding +[0x82,0xfe,0x42,0x08] +# CHECK: vcmla.f32 q0, q1, d2[0], #0 +# MISSING: warning: invalid instruction encoding +[0x91,0xfe,0x02,0x08] +# CHECK: vcmla.f32 d0, d1, d2[0], #90 +# MISSING: warning: invalid instruction encoding +[0xa1,0xfe,0x02,0x08] +# CHECK: vcmla.f32 d0, d1, d2[0], #180 +# MISSING: warning: invalid instruction encoding +[0xb1,0xfe,0x02,0x08] +# CHECK: vcmla.f32 d0, d1, d2[0], #270 +# MISSING: warning: invalid instruction encoding +[0x01,0xfe,0x22,0x08] +# CHECK-FP16: vcmla.f16 d0, d1, d2[1], #0 +# MISSING-FP16: warning: invalid instruction encoding -- 2.7.4