Add support for VCMLA and VCADD advanced SIMD complex number instructions.
The command line option is -march=armv8.3-a+fp16+simd for enabling all
instructions.
In arm-dis.c the formatting syntax was abused a bit to select between
0 vs 90 or 180 vs 270 or 90 vs 270 based on a bit value instead of
duplicating entries in the opcode table.
gas/
* config/tc-arm.c (do_vcmla, do_vcadd): Define.
(neon_scalar_for_vcmla): Define.
(enum operand_parse_code): Add OP_IROT1 and OP_IROT2.
(NEON_ENC_TAB): Add DDSI and QQSI variants.
(insns): Add vcmla and vcadd.
* testsuite/gas/arm/armv8_3-a-simd.d: New.
* testsuite/gas/arm/armv8_3-a-simd.s: New.
* testsuite/gas/arm/armv8_3-a-simd-bad.d: New.
* testsuite/gas/arm/armv8_3-a-simd-bad.l: New.
* testsuite/gas/arm/armv8_3-a-simd-bad.s: New.
opcodes/
* arm-dis.c (coprocessor_opcodes): Add vcmla and vcadd.
(print_insn_coprocessor): Add 'V' format for neon D or Q regs.
+2016-12-05 Szabolcs Nagy <szabolcs.nagy@arm.com>
+
+ * config/tc-arm.c (do_vcmla, do_vcadd): Define.
+ (neon_scalar_for_vcmla): Define.
+ (enum operand_parse_code): Add OP_IROT1 and OP_IROT2.
+ (NEON_ENC_TAB): Add DDSI and QQSI variants.
+ (insns): Add vcmla and vcadd.
+ * testsuite/gas/arm/armv8_3-a-simd.d: New.
+ * testsuite/gas/arm/armv8_3-a-simd.s: New.
+ * testsuite/gas/arm/armv8_3-a-simd-bad.d: New.
+ * testsuite/gas/arm/armv8_3-a-simd-bad.l: New.
+ * testsuite/gas/arm/armv8_3-a-simd-bad.s: New.
+
2016-12-05 Claudiu Zissulescu <claziss@synopsys.com>
* testsuite/gas/arc/textauxregister-1.d: New file.
OP_EXPi, /* same, with optional immediate prefix */
OP_EXPr, /* same, with optional relocation suffix */
OP_HALF, /* 0 .. 65535 or low/high reloc. */
+ OP_IROT1, /* VCADD rotate immediate: 90, 270. */
+ OP_IROT2, /* VCMLA rotate immediate: 0, 90, 180, 270. */
OP_CPSF, /* CPS flags */
OP_ENDI, /* Endianness specifier */
X(3, (D, Q, S), MIXED), \
X(4, (D, D, D, I), DOUBLE), \
X(4, (Q, Q, Q, I), QUAD), \
+ X(4, (D, D, S, I), DOUBLE), \
+ X(4, (Q, Q, S, I), QUAD), \
X(2, (F, F), SINGLE), \
X(3, (F, F, F), SINGLE), \
X(2, (F, I), SINGLE), \
do_vrint_1 (neon_cvt_mode_m);
}
+static unsigned
+neon_scalar_for_vcmla (unsigned opnd, unsigned elsize)
+{
+ unsigned regno = NEON_SCALAR_REG (opnd);
+ unsigned elno = NEON_SCALAR_INDEX (opnd);
+
+ if (elsize == 16 && elno < 2 && regno < 16)
+ return regno | (elno << 4);
+ else if (elsize == 32 && elno == 0)
+ return regno;
+
+ first_error (_("scalar out of range"));
+ return 0;
+}
+
+static void
+do_vcmla (void)
+{
+ constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_neon_ext_armv8),
+ _(BAD_FPU));
+ constraint (inst.reloc.exp.X_op != O_constant, _("expression too complex"));
+ unsigned rot = inst.reloc.exp.X_add_number;
+ constraint (rot != 0 && rot != 90 && rot != 180 && rot != 270,
+ _("immediate out of range"));
+ rot /= 90;
+ if (inst.operands[2].isscalar)
+ {
+ enum neon_shape rs = neon_select_shape (NS_DDSI, NS_QQSI, NS_NULL);
+ unsigned size = neon_check_type (3, rs, N_EQK, N_EQK,
+ N_KEY | N_F16 | N_F32).size;
+ unsigned m = neon_scalar_for_vcmla (inst.operands[2].reg, size);
+ inst.is_neon = 1;
+ inst.instruction = 0xfe000800;
+ inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+ inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+ inst.instruction |= LOW4 (inst.operands[1].reg) << 16;
+ inst.instruction |= HI1 (inst.operands[1].reg) << 7;
+ inst.instruction |= LOW4 (m);
+ inst.instruction |= HI1 (m) << 5;
+ inst.instruction |= neon_quad (rs) << 6;
+ inst.instruction |= rot << 20;
+ inst.instruction |= (size == 32) << 23;
+ }
+ else
+ {
+ enum neon_shape rs = neon_select_shape (NS_DDDI, NS_QQQI, NS_NULL);
+ unsigned size = neon_check_type (3, rs, N_EQK, N_EQK,
+ N_KEY | N_F16 | N_F32).size;
+ neon_three_same (neon_quad (rs), 0, -1);
+ inst.instruction &= 0x00ffffff; /* Undo neon_dp_fixup. */
+ inst.instruction |= 0xfc200800;
+ inst.instruction |= rot << 23;
+ inst.instruction |= (size == 32) << 20;
+ }
+}
+
+static void
+do_vcadd (void)
+{
+ constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_neon_ext_armv8),
+ _(BAD_FPU));
+ constraint (inst.reloc.exp.X_op != O_constant, _("expression too complex"));
+ unsigned rot = inst.reloc.exp.X_add_number;
+ constraint (rot != 90 && rot != 270, _("immediate out of range"));
+ enum neon_shape rs = neon_select_shape (NS_DDDI, NS_QQQI, NS_NULL);
+ unsigned size = neon_check_type (3, rs, N_EQK, N_EQK,
+ N_KEY | N_F16 | N_F32).size;
+ neon_three_same (neon_quad (rs), 0, -1);
+ inst.instruction &= 0x00ffffff; /* Undo neon_dp_fixup. */
+ inst.instruction |= 0xfc800800;
+ inst.instruction |= (rot == 270) << 24;
+ inst.instruction |= (size == 32) << 20;
+}
+
/* Crypto v1 instructions. */
static void
do_crypto_2op_1 (unsigned elttype, int op)
#undef THUMB_VARIANT
#define THUMB_VARIANT & arm_ext_v8_3
NCE (vjcvt, eb90bc0, 2, (RVS, RVD), vjcvt),
+ NUF (vcmla, 0, 4, (RNDQ, RNDQ, RNDQ_RNSC, EXPi), vcmla),
+ NUF (vcadd, 0, 4, (RNDQ, RNDQ, RNDQ, EXPi), vcadd),
#undef ARM_VARIANT
#define ARM_VARIANT & fpu_fpa_ext_v1 /* Core FPA instruction set (V1). */
--- /dev/null
+#as: -march=armv8.3-a+fp16+simd
+#error-output: armv8_3-a-simd-bad.l
--- /dev/null
+[^:]+: Assembler messages:
+[^:]+:6: Error: operand types can't be inferred -- `vcadd d0,d1,d2,#90'
+[^:]+:7: Error: immediate out of range -- `vcadd\.f32 q0,q1,q2,#0'
+[^:]+:8: Error: immediate out of range -- `vcadd\.f32 q0,q1,q2,#180'
+[^:]+:9: Error: Neon double or quad precision register expected -- `vcadd\.f16 s0,s1,s2,#90'
+[^:]+:10: Error: bad type in Neon instruction -- `vcadd\.f64 d0,d1,d2,#90'
+[^:]+:11: Error: bad type in Neon instruction -- `vcadd\.f64 q0,q1,q2,#90'
+[^:]+:13: Error: operand types can't be inferred -- `vcmla d0,d1,d2,#90'
+[^:]+:14: Error: immediate out of range -- `vcmla\.f32 q0,q1,q2,#-90'
+[^:]+:15: Error: immediate out of range -- `vcmla\.f32 q0,q1,q2,#120'
+[^:]+:16: Error: immediate out of range -- `vcmla\.f32 q0,q1,q2,#360'
+[^:]+:17: Error: Neon double or quad precision register expected -- `vcmla\.f16 s0,s1,s2,#90'
+[^:]+:18: Error: bad type in Neon instruction -- `vcmla\.f64 d0,d1,d2,#90'
+[^:]+:19: Error: bad type in Neon instruction -- `vcmla\.f64 q0,q1,q2,#90'
+[^:]+:21: Error: only D registers may be indexed -- `vcmla\.f16 q0,q1,q2\[0\],#90'
+[^:]+:22: Error: only D registers may be indexed -- `vcmla\.f32 q0,q1,q2\[0\],#90'
+[^:]+:23: Error: scalar out of range -- `vcmla\.f16 d0,d1,d2\[2\],#90'
+[^:]+:24: Error: scalar out of range -- `vcmla\.f16 q0,q1,d2\[2\],#90'
+[^:]+:25: Error: scalar out of range -- `vcmla\.f16 q0,q1,d16\[1\],#90'
+[^:]+:26: Error: scalar out of range -- `vcmla\.f32 q0,q1,d2\[1\],#90'
+[^:]+:31: Error: operand types can't be inferred -- `vcadd d0,d1,d2,#90'
+[^:]+:32: Error: immediate out of range -- `vcadd\.f32 q0,q1,q2,#0'
+[^:]+:33: Error: immediate out of range -- `vcadd\.f32 q0,q1,q2,#180'
+[^:]+:34: Error: Neon double or quad precision register expected -- `vcadd\.f16 s0,s1,s2,#90'
+[^:]+:35: Error: bad type in Neon instruction -- `vcadd\.f64 d0,d1,d2,#90'
+[^:]+:36: Error: bad type in Neon instruction -- `vcadd\.f64 q0,q1,q2,#90'
+[^:]+:38: Error: operand types can't be inferred -- `vcmla d0,d1,d2,#90'
+[^:]+:39: Error: immediate out of range -- `vcmla\.f32 q0,q1,q2,#-90'
+[^:]+:40: Error: immediate out of range -- `vcmla\.f32 q0,q1,q2,#120'
+[^:]+:41: Error: immediate out of range -- `vcmla\.f32 q0,q1,q2,#360'
+[^:]+:42: Error: Neon double or quad precision register expected -- `vcmla\.f16 s0,s1,s2,#90'
+[^:]+:43: Error: bad type in Neon instruction -- `vcmla\.f64 d0,d1,d2,#90'
+[^:]+:44: Error: bad type in Neon instruction -- `vcmla\.f64 q0,q1,q2,#90'
+[^:]+:46: Error: only D registers may be indexed -- `vcmla\.f16 q0,q1,q2\[0\],#90'
+[^:]+:47: Error: only D registers may be indexed -- `vcmla\.f32 q0,q1,q2\[0\],#90'
+[^:]+:48: Error: scalar out of range -- `vcmla\.f16 d0,d1,d2\[2\],#90'
+[^:]+:49: Error: scalar out of range -- `vcmla\.f16 q0,q1,d2\[2\],#90'
+[^:]+:50: Error: scalar out of range -- `vcmla\.f16 q0,q1,d16\[1\],#90'
+[^:]+:51: Error: scalar out of range -- `vcmla\.f32 q0,q1,d2\[1\],#90'
--- /dev/null
+ .text
+
+A1:
+ .arm
+
+ vcadd d0,d1,d2,#90
+ vcadd.f32 q0,q1,q2,#0
+ vcadd.f32 q0,q1,q2,#180
+ vcadd.f16 s0,s1,s2,#90
+ vcadd.f64 d0,d1,d2,#90
+ vcadd.f64 q0,q1,q2,#90
+
+ vcmla d0,d1,d2,#90
+ vcmla.f32 q0,q1,q2,#-90
+ vcmla.f32 q0,q1,q2,#120
+ vcmla.f32 q0,q1,q2,#360
+ vcmla.f16 s0,s1,s2,#90
+ vcmla.f64 d0,d1,d2,#90
+ vcmla.f64 q0,q1,q2,#90
+
+ vcmla.f16 q0,q1,q2[0],#90
+ vcmla.f32 q0,q1,q2[0],#90
+ vcmla.f16 d0,d1,d2[2],#90
+ vcmla.f16 q0,q1,d2[2],#90
+ vcmla.f16 q0,q1,d16[1],#90
+ vcmla.f32 q0,q1,d2[1],#90
+
+T1:
+ .thumb
+
+ vcadd d0,d1,d2,#90
+ vcadd.f32 q0,q1,q2,#0
+ vcadd.f32 q0,q1,q2,#180
+ vcadd.f16 s0,s1,s2,#90
+ vcadd.f64 d0,d1,d2,#90
+ vcadd.f64 q0,q1,q2,#90
+
+ vcmla d0,d1,d2,#90
+ vcmla.f32 q0,q1,q2,#-90
+ vcmla.f32 q0,q1,q2,#120
+ vcmla.f32 q0,q1,q2,#360
+ vcmla.f16 s0,s1,s2,#90
+ vcmla.f64 d0,d1,d2,#90
+ vcmla.f64 q0,q1,q2,#90
+
+ vcmla.f16 q0,q1,q2[0],#90
+ vcmla.f32 q0,q1,q2[0],#90
+ vcmla.f16 d0,d1,d2[2],#90
+ vcmla.f16 q0,q1,d2[2],#90
+ vcmla.f16 q0,q1,d16[1],#90
+ vcmla.f32 q0,q1,d2[1],#90
--- /dev/null
+#as: -march=armv8.3-a+fp16+simd
+#objdump: -dr
+#skip: *-*-pe *-wince-* *-*-coff
+
+.*: +file format .*arm.*
+
+Disassembly of section .text:
+
+[0-9a-f]+ <.*>:
+ +[0-9a-f]+: fc942846 vcadd.f32 q1, q2, q3, #90
+ +[0-9a-f]+: fd942846 vcadd.f32 q1, q2, q3, #270
+ +[0-9a-f]+: fcc658a7 vcadd.f16 d21, d22, d23, #90
+ +[0-9a-f]+: fc842846 vcadd.f16 q1, q2, q3, #90
+ +[0-9a-f]+: fcd658a7 vcadd.f32 d21, d22, d23, #90
+ +[0-9a-f]+: fc342846 vcmla.f32 q1, q2, q3, #0
+ +[0-9a-f]+: fcb42846 vcmla.f32 q1, q2, q3, #90
+ +[0-9a-f]+: fd342846 vcmla.f32 q1, q2, q3, #180
+ +[0-9a-f]+: fdb42846 vcmla.f32 q1, q2, q3, #270
+ +[0-9a-f]+: fce658a7 vcmla.f16 d21, d22, d23, #90
+ +[0-9a-f]+: fca42846 vcmla.f16 q1, q2, q3, #90
+ +[0-9a-f]+: fcf658a7 vcmla.f32 d21, d22, d23, #90
+ +[0-9a-f]+: fe565883 vcmla.f16 d21, d22, d3\[0\], #90
+ +[0-9a-f]+: fe5658a3 vcmla.f16 d21, d22, d3\[1\], #90
+ +[0-9a-f]+: fe142843 vcmla.f16 q1, q2, d3\[0\], #90
+ +[0-9a-f]+: fe142863 vcmla.f16 q1, q2, d3\[1\], #90
+ +[0-9a-f]+: fed658a7 vcmla.f32 d21, d22, d23\[0\], #90
+ +[0-9a-f]+: fe942867 vcmla.f32 q1, q2, d23\[0\], #90
+
+[0-9a-f]+ <.*>:
+ +[0-9a-f]+: fc94 2846 vcadd.f32 q1, q2, q3, #90
+ +[0-9a-f]+: fd94 2846 vcadd.f32 q1, q2, q3, #270
+ +[0-9a-f]+: fcc6 58a7 vcadd.f16 d21, d22, d23, #90
+ +[0-9a-f]+: fc84 2846 vcadd.f16 q1, q2, q3, #90
+ +[0-9a-f]+: fcd6 58a7 vcadd.f32 d21, d22, d23, #90
+ +[0-9a-f]+: fc34 2846 vcmla.f32 q1, q2, q3, #0
+ +[0-9a-f]+: fcb4 2846 vcmla.f32 q1, q2, q3, #90
+ +[0-9a-f]+: fd34 2846 vcmla.f32 q1, q2, q3, #180
+ +[0-9a-f]+: fdb4 2846 vcmla.f32 q1, q2, q3, #270
+ +[0-9a-f]+: fce6 58a7 vcmla.f16 d21, d22, d23, #90
+ +[0-9a-f]+: fca4 2846 vcmla.f16 q1, q2, q3, #90
+ +[0-9a-f]+: fcf6 58a7 vcmla.f32 d21, d22, d23, #90
+ +[0-9a-f]+: fe56 5883 vcmla.f16 d21, d22, d3\[0\], #90
+ +[0-9a-f]+: fe56 58a3 vcmla.f16 d21, d22, d3\[1\], #90
+ +[0-9a-f]+: fe14 2843 vcmla.f16 q1, q2, d3\[0\], #90
+ +[0-9a-f]+: fe14 2863 vcmla.f16 q1, q2, d3\[1\], #90
+ +[0-9a-f]+: fed6 58a7 vcmla.f32 d21, d22, d23\[0\], #90
+ +[0-9a-f]+: fe94 2867 vcmla.f32 q1, q2, d23\[0\], #90
--- /dev/null
+ .text
+
+A1:
+ .arm
+
+ vcadd.f32 q1,q2,q3,#90
+ vcadd.f32 q1,q2,q3,#270
+ vcadd.f16 d21,d22,d23,#90
+ vcadd.f16 q1,q2,q3,#90
+ vcadd.f32 d21,d22,d23,#90
+
+ vcmla.f32 q1,q2,q3,#0
+ vcmla.f32 q1,q2,q3,#90
+ vcmla.f32 q1,q2,q3,#180
+ vcmla.f32 q1,q2,q3,#270
+ vcmla.f16 d21,d22,d23,#90
+ vcmla.f16 q1,q2,q3,#90
+ vcmla.f32 d21,d22,d23,#90
+
+ vcmla.f16 d21,d22,d3[0],#90
+ vcmla.f16 d21,d22,d3[1],#90
+ vcmla.f16 q1,q2,d3[0],#90
+ vcmla.f16 q1,q2,d3[1],#90
+ vcmla.f32 d21,d22,d23[0],#90
+ vcmla.f32 q1,q2,d23[0],#90
+
+T1:
+ .thumb
+
+ vcadd.f32 q1,q2,q3,#90
+ vcadd.f32 q1,q2,q3,#270
+ vcadd.f16 d21,d22,d23,#90
+ vcadd.f16 q1,q2,q3,#90
+ vcadd.f32 d21,d22,d23,#90
+
+ vcmla.f32 q1,q2,q3,#0
+ vcmla.f32 q1,q2,q3,#90
+ vcmla.f32 q1,q2,q3,#180
+ vcmla.f32 q1,q2,q3,#270
+ vcmla.f16 d21,d22,d23,#90
+ vcmla.f16 q1,q2,q3,#90
+ vcmla.f32 d21,d22,d23,#90
+
+ vcmla.f16 d21,d22,d3[0],#90
+ vcmla.f16 d21,d22,d3[1],#90
+ vcmla.f16 q1,q2,d3[0],#90
+ vcmla.f16 q1,q2,d3[1],#90
+ vcmla.f32 d21,d22,d23[0],#90
+ vcmla.f32 q1,q2,d23[0],#90
2016-12-05 Szabolcs Nagy <szabolcs.nagy@arm.com>
+ * arm-dis.c (coprocessor_opcodes): Add vcmla and vcadd.
+ (print_insn_coprocessor): Add 'V' format for neon D or Q regs.
+
+2016-12-05 Szabolcs Nagy <szabolcs.nagy@arm.com>
+
* arm-dis.c (coprocessor_opcodes): Add vjcvt.
2016-12-01 Nick Clifton <nickc@redhat.com>
%<bitfield>G print as an iWMMXt general purpose or control register
%<bitfield>D print as a NEON D register
%<bitfield>Q print as a NEON Q register
+ %<bitfield>V print as a NEON D or Q register
%<bitfield>E print a quarter-float immediate value
%y<code> print a single precision VFP reg.
0xfc400000, 0xfff00000,
"mcrr2%c\t%8-11d, %4-7d, %12-15R, %16-19R, cr%0-3d"},
+ /* ARMv8.3 AdvSIMD instructions in the space of coprocessor 8. */
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_V8_3A),
+ 0xfc800800, 0xfeb00f10, "vcadd%c.f16\t%12-15,22V, %16-19,7V, %0-3,5V, #%24?29%24'70"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_V8_3A),
+ 0xfc900800, 0xfeb00f10, "vcadd%c.f32\t%12-15,22V, %16-19,7V, %0-3,5V, #%24?29%24'70"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_V8_3A),
+ 0xfc200800, 0xff300f10, "vcmla%c.f16\t%12-15,22V, %16-19,7V, %0-3,5V, #%23'90"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_V8_3A),
+ 0xfd200800, 0xff300f10, "vcmla%c.f16\t%12-15,22V, %16-19,7V, %0-3,5V, #%23?21%23?780"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_V8_3A),
+ 0xfc300800, 0xff300f10, "vcmla%c.f32\t%12-15,22V, %16-19,7V, %0-3,5V, #%23'90"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_V8_3A),
+ 0xfd300800, 0xff300f10, "vcmla%c.f32\t%12-15,22V, %16-19,7V, %0-3,5V, #%23?21%23?780"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_V8_3A),
+ 0xfe000800, 0xfea00f10, "vcmla%c.f16\t%12-15,22V, %16-19,7V, %0-3D[%5?10], #%20'90"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_V8_3A),
+ 0xfe200800, 0xfea00f10, "vcmla%c.f16\t%12-15,22V, %16-19,7V, %0-3D[%5?10], #%20?21%23?780"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_V8_3A),
+ 0xfe800800, 0xfea00f10, "vcmla%c.f32\t%12-15,22V, %16-19,7V, %0-3,5D[0], #%20'90"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_V8_3A),
+ 0xfea00800, 0xfea00f10, "vcmla%c.f32\t%12-15,22V, %16-19,7V, %0-3,5D[0], #%20?21%23?780"},
+
/* V5 coprocessor instructions. */
{ARM_FEATURE_CORE_LOW (ARM_EXT_V5),
0xfc100000, 0xfe100000, "ldc2%22'l%c\t%8-11d, cr%12-15d, %A"},
}
func (stream, "%s", arm_regnames[value]);
break;
+ case 'V':
+ if (given & (1 << 6))
+ goto Q;
+ /* FALLTHROUGH */
case 'D':
func (stream, "d%ld", value);
break;
case 'Q':
+ Q:
if (value & 1)
func (stream, "<illegal reg q%ld.5>", value >> 1);
else