1 (* Common code for ARM NEON header file, documentation and test case
4 Copyright (C) 2006-2013 Free Software Foundation, Inc.
5 Contributed by CodeSourcery.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. *)
23 (* Shorthand types for vector elements. *)
24 type elts = S8 | S16 | S32 | S64 | F32 | U8 | U16 | U32 | U64 | P8 | P16
25 | I8 | I16 | I32 | I64 | B8 | B16 | B32 | B64 | Conv of elts * elts
26 | Cast of elts * elts | NoElts
28 type eltclass = Signed | Unsigned | Float | Poly | Int | Bits
29 | ConvClass of eltclass * eltclass | NoType
31 (* These vector types correspond directly to C types. *)
32 type vectype = T_int8x8 | T_int8x16
33 | T_int16x4 | T_int16x8
34 | T_int32x2 | T_int32x4
35 | T_int64x1 | T_int64x2
36 | T_uint8x8 | T_uint8x16
37 | T_uint16x4 | T_uint16x8
38 | T_uint32x2 | T_uint32x4
39 | T_uint64x1 | T_uint64x2
40 | T_float32x2 | T_float32x4
41 | T_poly8x8 | T_poly8x16
42 | T_poly16x4 | T_poly16x8
43 | T_immediate of int * int
49 | T_float32 | T_arrayof of int * vectype
50 | T_ptrto of vectype | T_const of vectype
55 (* The meanings of the following are:
56 TImode : "Tetra", two registers (four words).
57 EImode : "hExa", three registers (six words).
58 OImode : "Octa", four registers (eight words).
59 CImode : "dodeCa", six registers (twelve words).
60 XImode : "heXadeca", eight registers (sixteen words).
63 type inttype = B_TImode | B_EImode | B_OImode | B_CImode | B_XImode
65 type shape_elt = Dreg | Qreg | Corereg | Immed | VecArray of int * shape_elt
66 | PtrTo of shape_elt | CstPtrTo of shape_elt
67 (* These next ones are used only in the test generator. *)
68 | Element_of_dreg (* Used for "lane" variants. *)
69 | Element_of_qreg (* Likewise. *)
70 | All_elements_of_dreg (* Used for "dup" variants. *)
71 | Alternatives of shape_elt list (* Used for multiple valid operands *)
73 type shape_form = All of int * shape_elt
75 | Long_noreg of shape_elt
77 | Wide_noreg of shape_elt
81 | Binary_imm of shape_elt
82 | Use_operands of shape_elt array
83 | By_scalar of shape_elt
84 | Unary_scalar of shape_elt
87 | Pair_result of shape_elt
89 type arity = Arity0 of vectype
90 | Arity1 of vectype * vectype
91 | Arity2 of vectype * vectype * vectype
92 | Arity3 of vectype * vectype * vectype * vectype
93 | Arity4 of vectype * vectype * vectype * vectype * vectype
95 type vecmode = V8QI | V4HI | V2SI | V2SF | DI
96 | V16QI | V8HI | V4SI | V4SF | V2DI
141 (* Ops with scalar. *)
168 (* Vector extract. *)
170 (* Reverse elements. *)
174 (* Transposition ops. *)
178 (* Loads and stores (VLD1/VST1/VLD2...), elements and structures. *)
184 (* Set/extract lanes from a vector. *)
187 (* Initialize vector from bit pattern. *)
189 (* Set all lanes to same value. *)
191 | Vmov_n (* Is this the same? *)
192 (* Duplicate scalar to all lanes of vector. *)
194 (* Combine vectors. *)
196 (* Get quadword high/low parts. *)
199 (* Convert vectors. *)
202 (* Narrow/lengthen vectors. *)
208 (* Reinterpret casts. *)
211 let rev_elems revsize elsize nelts _ =
212 let mask = (revsize / elsize) - 1 in
213 let arr = Array.init nelts
214 (fun i -> i lxor mask) in
217 let permute_range i stride nelts increment =
218 let rec build i = function
220 | nelts -> i :: (i + stride) :: build (i + increment) (pred nelts) in
223 (* Generate a list of integers suitable for vzip. *)
224 let zip_range i stride nelts = permute_range i stride nelts 1
226 (* Generate a list of integers suitable for vunzip. *)
227 let uzip_range i stride nelts = permute_range i stride nelts 4
229 (* Generate a list of integers suitable for trn. *)
230 let trn_range i stride nelts = permute_range i stride nelts 2
232 let zip_elems _ nelts part =
234 `lo -> zip_range 0 nelts (nelts / 2)
235 | `hi -> zip_range (nelts / 2) nelts (nelts / 2)
237 let uzip_elems _ nelts part =
239 `lo -> uzip_range 0 2 (nelts / 2)
240 | `hi -> uzip_range 1 2 (nelts / 2)
242 let trn_elems _ nelts part =
244 `lo -> trn_range 0 nelts (nelts / 2)
245 | `hi -> trn_range 1 nelts (nelts / 2)
247 (* Features used for documentation, to distinguish between some instruction
248 variants, and to signal special requirements (e.g. swapping arguments). *)
257 | Flipped of string (* Builtin name to use with flipped arguments. *)
258 | InfoWord (* Pass an extra word for signage/rounding etc. (always passed
259 for All _, Long, Wide, Narrow shape_forms. *)
260 (* Implement builtin as shuffle. The parameter is a function which returns
261 masks suitable for __builtin_shuffle: arguments are (element size,
262 number of elements, high/low part selector). *)
263 | Use_shuffle of (int -> int -> [`lo|`hi] -> int list)
264 (* A specification as to the shape of instruction expected upon
265 disassembly, used if it differs from the shape used to build the
266 intrinsic prototype. Multiple entries in the constructor's argument
267 indicate that the intrinsic expands to more than one assembly
268 instruction, each with a corresponding shape specified here. *)
269 | Disassembles_as of shape_form list
270 | Builtin_name of string (* Override the name of the builtin. *)
271 (* Override the name of the instruction. If more than one name
272 is specified, it means that the instruction can have any of those
274 | Instruction_name of string list
275 (* Mark that the intrinsic yields no instructions, or expands to yield
276 behavior that the test generator cannot test. *)
278 (* Mark that the intrinsic has constant arguments that cannot be set
279 to the defaults (zero for pointers and one otherwise) in the test
280 cases. The function supplied must return the integer to be written
281 into the testcase for the argument number (0-based) supplied to it. *)
282 | Const_valuator of (int -> int)
285 (* Mark that the intrinsic requires __ARM_FEATURE_string to be defined. *)
286 | Requires_feature of string
287 | Requires_arch of int
289 exception MixedMode of elts * elts
291 let rec elt_width = function
292 S8 | U8 | P8 | I8 | B8 -> 8
293 | S16 | U16 | P16 | I16 | B16 -> 16
294 | S32 | F32 | U32 | I32 | B32 -> 32
295 | S64 | U64 | I64 | B64 -> 64
297 let wa = elt_width a and wb = elt_width b in
298 if wa = wb then wa else failwith "element width?"
299 | Cast (a, b) -> raise (MixedMode (a, b))
300 | NoElts -> failwith "No elts"
302 let rec elt_class = function
303 S8 | S16 | S32 | S64 -> Signed
304 | U8 | U16 | U32 | U64 -> Unsigned
307 | I8 | I16 | I32 | I64 -> Int
308 | B8 | B16 | B32 | B64 -> Bits
309 | Conv (a, b) | Cast (a, b) -> ConvClass (elt_class a, elt_class b)
312 let elt_of_class_width c w =
320 | Unsigned, 16 -> U16
321 | Unsigned, 32 -> U32
322 | Unsigned, 64 -> U64
333 | _ -> failwith "Bad element type"
335 (* Return unsigned integer element the same width as argument. *)
336 let unsigned_of_elt elt =
337 elt_of_class_width Unsigned (elt_width elt)
339 let signed_of_elt elt =
340 elt_of_class_width Signed (elt_width elt)
342 (* Return untyped bits element the same width as argument. *)
343 let bits_of_elt elt =
344 elt_of_class_width Bits (elt_width elt)
346 let non_signed_variant = function
357 let poly_unsigned_variant v =
358 let elclass = match elt_class v with
361 elt_of_class_width elclass (elt_width v)
364 let w = elt_width elt
365 and c = elt_class elt in
366 elt_of_class_width c (w * 2)
369 let w = elt_width elt
370 and c = elt_class elt in
371 elt_of_class_width c (w / 2)
373 (* If we're trying to find a mode from a "Use_operands" instruction, use the
374 last vector operand as the dominant mode used to invoke the correct builtin.
375 We must stick to this rule in neon.md. *)
376 let find_key_operand operands =
378 match operands.(opno) with
381 | VecArray (_, Qreg) -> Qreg
382 | VecArray (_, Dreg) -> Dreg
385 scan ((Array.length operands) - 1)
387 let rec mode_of_elt elt shape =
388 let flt = match elt_class elt with
389 Float | ConvClass(_, Float) -> true | _ -> false in
391 match elt_width elt with
392 8 -> 0 | 16 -> 1 | 32 -> 2 | 64 -> 3
393 | _ -> failwith "Bad element width"
395 All (_, Dreg) | By_scalar Dreg | Pair_result Dreg | Unary_scalar Dreg
396 | Binary_imm Dreg | Long_noreg Dreg | Wide_noreg Dreg ->
397 [| V8QI; V4HI; if flt then V2SF else V2SI; DI |].(idx)
398 | All (_, Qreg) | By_scalar Qreg | Pair_result Qreg | Unary_scalar Qreg
399 | Binary_imm Qreg | Long_noreg Qreg | Wide_noreg Qreg ->
400 [| V16QI; V8HI; if flt then V4SF else V4SI; V2DI |].(idx)
401 | All (_, (Corereg | PtrTo _ | CstPtrTo _)) ->
402 [| QI; HI; if flt then SF else SI; DI |].(idx)
403 | Long | Wide | Wide_lane | Wide_scalar
405 [| V8QI; V4HI; V2SI; DI |].(idx)
406 | Narrow | Narrow_imm -> [| V16QI; V8HI; V4SI; V2DI |].(idx)
407 | Use_operands ops -> mode_of_elt elt (All (0, (find_key_operand ops)))
408 | _ -> failwith "invalid shape"
410 (* Modify an element type dependent on the shape of the instruction and the
413 let shapemap shape no =
414 let ident = fun x -> x in
416 All _ | Use_operands _ | By_scalar _ | Pair_result _ | Unary_scalar _
417 | Binary_imm _ -> ident
418 | Long | Long_noreg _ | Wide_scalar | Long_imm ->
419 [| widen_elt; ident; ident |].(no)
420 | Wide | Wide_noreg _ -> [| widen_elt; widen_elt; ident |].(no)
421 | Wide_lane -> [| widen_elt; ident; ident; ident |].(no)
422 | Narrow | Narrow_imm -> [| narrow_elt; ident; ident |].(no)
424 (* Register type (D/Q) of an operand, based on shape and operand number. *)
426 let regmap shape no =
428 All (_, reg) | Long_noreg reg | Wide_noreg reg -> reg
429 | Long -> [| Qreg; Dreg; Dreg |].(no)
430 | Wide -> [| Qreg; Qreg; Dreg |].(no)
431 | Narrow -> [| Dreg; Qreg; Qreg |].(no)
432 | Wide_lane -> [| Qreg; Dreg; Dreg; Immed |].(no)
433 | Wide_scalar -> [| Qreg; Dreg; Corereg |].(no)
434 | By_scalar reg -> [| reg; reg; Dreg; Immed |].(no)
435 | Unary_scalar reg -> [| reg; Dreg; Immed |].(no)
436 | Pair_result reg -> [| VecArray (2, reg); reg; reg |].(no)
437 | Binary_imm reg -> [| reg; reg; Immed |].(no)
438 | Long_imm -> [| Qreg; Dreg; Immed |].(no)
439 | Narrow_imm -> [| Dreg; Qreg; Immed |].(no)
440 | Use_operands these -> these.(no)
442 let type_for_elt shape elt no =
443 let elt = (shapemap shape no) elt in
444 let reg = regmap shape no in
445 let rec type_for_reg_elt reg elt =
460 | _ -> failwith "Bad elt type"
475 | _ -> failwith "Bad elt type"
490 | _ -> failwith "Bad elt type"
494 | VecArray (num, sub) ->
495 T_arrayof (num, type_for_reg_elt sub elt)
497 T_ptrto (type_for_reg_elt x elt)
499 T_ptrto (T_const (type_for_reg_elt x elt))
500 (* Anything else is solely for the use of the test generator. *)
503 type_for_reg_elt reg elt
505 (* Return size of a vector type, in bits. *)
506 let vectype_size = function
507 T_int8x8 | T_int16x4 | T_int32x2 | T_int64x1
508 | T_uint8x8 | T_uint16x4 | T_uint32x2 | T_uint64x1
509 | T_float32x2 | T_poly8x8 | T_poly16x4 -> 64
510 | T_int8x16 | T_int16x8 | T_int32x4 | T_int64x2
511 | T_uint8x16 | T_uint16x8 | T_uint32x4 | T_uint64x2
512 | T_float32x4 | T_poly8x16 | T_poly16x8 -> 128
513 | _ -> raise Not_found
515 let inttype_for_array num elttype =
516 let eltsize = vectype_size elttype in
517 let numwords = (num * eltsize) / 32 in
524 | _ -> failwith ("no int type for size " ^ string_of_int numwords)
526 (* These functions return pairs of (internal, external) types, where "internal"
527 types are those seen by GCC, and "external" are those seen by the assembler.
528 These types aren't necessarily the same, since the intrinsics can munge more
529 than one C type into each assembler opcode. *)
531 let make_sign_invariant func shape elt =
532 let arity, elt' = func shape elt in
533 arity, non_signed_variant elt'
535 (* Don't restrict any types. *)
537 let elts_same make_arity shape elt =
538 let vtype = type_for_elt shape elt in
539 make_arity vtype, elt
541 (* As sign_invar_*, but when sign matters. *)
542 let elts_same_io_lane =
543 elts_same (fun vtype -> Arity4 (vtype 0, vtype 0, vtype 1, vtype 2, vtype 3))
546 elts_same (fun vtype -> Arity3 (vtype 0, vtype 0, vtype 1, vtype 2))
548 let elts_same_2_lane =
549 elts_same (fun vtype -> Arity3 (vtype 0, vtype 1, vtype 2, vtype 3))
551 let elts_same_3 = elts_same_2_lane
554 elts_same (fun vtype -> Arity2 (vtype 0, vtype 1, vtype 2))
557 elts_same (fun vtype -> Arity1 (vtype 0, vtype 1))
559 (* Use for signed/unsigned invariant operations (i.e. where the operation
560 doesn't depend on the sign of the data. *)
562 let sign_invar_io_lane = make_sign_invariant elts_same_io_lane
563 let sign_invar_io = make_sign_invariant elts_same_io
564 let sign_invar_2_lane = make_sign_invariant elts_same_2_lane
565 let sign_invar_2 = make_sign_invariant elts_same_2
566 let sign_invar_1 = make_sign_invariant elts_same_1
568 (* Sign-sensitive comparison. *)
570 let cmp_sign_matters shape elt =
571 let vtype = type_for_elt shape elt
572 and rtype = type_for_elt shape (unsigned_of_elt elt) 0 in
573 Arity2 (rtype, vtype 1, vtype 2), elt
575 (* Signed/unsigned invariant comparison. *)
577 let cmp_sign_invar shape elt =
578 let shape', elt' = cmp_sign_matters shape elt in
580 match non_signed_variant elt' with
586 (* Comparison (VTST) where only the element width matters. *)
588 let cmp_bits shape elt =
589 let vtype = type_for_elt shape elt
590 and rtype = type_for_elt shape (unsigned_of_elt elt) 0
591 and bits_only = bits_of_elt elt in
592 Arity2 (rtype, vtype 1, vtype 2), bits_only
594 let reg_shift shape elt =
595 let vtype = type_for_elt shape elt
596 and op2type = type_for_elt shape (signed_of_elt elt) 2 in
597 Arity2 (vtype 0, vtype 1, op2type), elt
599 (* Genericised constant-shift type-generating function. *)
601 let const_shift mkimm ?arity ?result shape elt =
602 let op2type = (shapemap shape 2) elt in
603 let op2width = elt_width op2type in
604 let op2 = mkimm op2width
605 and op1 = type_for_elt shape elt 1
609 | Some restriction -> restriction elt in
610 let rtype = type_for_elt shape r_elt 0 in
612 None -> Arity2 (rtype, op1, op2), elt
613 | Some mkarity -> mkarity rtype op1 op2, elt
615 (* Use for immediate right-shifts. *)
617 let shift_right shape elt =
618 const_shift (fun imm -> T_immediate (1, imm)) shape elt
620 let shift_right_acc shape elt =
621 const_shift (fun imm -> T_immediate (1, imm))
622 ~arity:(fun dst op1 op2 -> Arity3 (dst, dst, op1, op2)) shape elt
624 (* Use for immediate right-shifts when the operation doesn't care about
627 let shift_right_sign_invar =
628 make_sign_invariant shift_right
630 (* Immediate right-shift; result is unsigned even when operand is signed. *)
632 let shift_right_to_uns shape elt =
633 const_shift (fun imm -> T_immediate (1, imm)) ~result:unsigned_of_elt
636 (* Immediate left-shift. *)
638 let shift_left shape elt =
639 const_shift (fun imm -> T_immediate (0, imm - 1)) shape elt
641 (* Immediate left-shift, unsigned result. *)
643 let shift_left_to_uns shape elt =
644 const_shift (fun imm -> T_immediate (0, imm - 1)) ~result:unsigned_of_elt
647 (* Immediate left-shift, don't care about signs. *)
649 let shift_left_sign_invar =
650 make_sign_invariant shift_left
652 (* Shift left/right and insert: only element size matters. *)
654 let shift_insert shape elt =
656 const_shift (fun imm -> T_immediate (1, imm))
657 ~arity:(fun dst op1 op2 -> Arity3 (dst, dst, op1, op2)) shape elt in
658 arity, bits_of_elt elt
662 let get_lane shape elt =
663 let vtype = type_for_elt shape elt in
664 Arity2 (vtype 0, vtype 1, vtype 2),
665 (match elt with P8 -> U8 | P16 -> U16 | S32 | U32 | F32 -> B32 | x -> x)
667 let set_lane shape elt =
668 let vtype = type_for_elt shape elt in
669 Arity3 (vtype 0, vtype 1, vtype 2, vtype 3), bits_of_elt elt
671 let set_lane_notype shape elt =
672 let vtype = type_for_elt shape elt in
673 Arity3 (vtype 0, vtype 1, vtype 2, vtype 3), NoElts
675 let create_vector shape elt =
676 let vtype = type_for_elt shape U64 1
677 and rtype = type_for_elt shape elt 0 in
678 Arity1 (rtype, vtype), elt
680 let conv make_arity shape elt =
681 let edest, esrc = match elt with
682 Conv (edest, esrc) | Cast (edest, esrc) -> edest, esrc
683 | _ -> failwith "Non-conversion element in conversion" in
684 let vtype = type_for_elt shape esrc
685 and rtype = type_for_elt shape edest 0 in
686 make_arity rtype vtype, elt
688 let conv_1 = conv (fun rtype vtype -> Arity1 (rtype, vtype 1))
689 let conv_2 = conv (fun rtype vtype -> Arity2 (rtype, vtype 1, vtype 2))
691 (* Operation has an unsigned result even if operands are signed. *)
693 let dst_unsign make_arity shape elt =
694 let vtype = type_for_elt shape elt
695 and rtype = type_for_elt shape (unsigned_of_elt elt) 0 in
696 make_arity rtype vtype, elt
698 let dst_unsign_1 = dst_unsign (fun rtype vtype -> Arity1 (rtype, vtype 1))
700 let make_bits_only func shape elt =
701 let arity, elt' = func shape elt in
702 arity, bits_of_elt elt'
704 (* Extend operation. *)
706 let extend shape elt =
707 let vtype = type_for_elt shape elt in
708 Arity3 (vtype 0, vtype 1, vtype 2, vtype 3), bits_of_elt elt
710 (* Table look-up operations. Operand 2 is signed/unsigned for signed/unsigned
711 integer ops respectively, or unsigned for polynomial ops. *)
713 let table mkarity shape elt =
714 let vtype = type_for_elt shape elt in
715 let op2 = type_for_elt shape (poly_unsigned_variant elt) 2 in
716 mkarity vtype op2, bits_of_elt elt
718 let table_2 = table (fun vtype op2 -> Arity2 (vtype 0, vtype 1, op2))
719 let table_io = table (fun vtype op2 -> Arity3 (vtype 0, vtype 0, vtype 1, op2))
721 (* Operations where only bits matter. *)
723 let bits_1 = make_bits_only elts_same_1
724 let bits_2 = make_bits_only elts_same_2
725 let bits_3 = make_bits_only elts_same_3
728 let store_1 shape elt =
729 let vtype = type_for_elt shape elt in
730 Arity2 (T_void, vtype 0, vtype 1), bits_of_elt elt
732 let store_3 shape elt =
733 let vtype = type_for_elt shape elt in
734 Arity3 (T_void, vtype 0, vtype 1, vtype 2), bits_of_elt elt
736 let make_notype func shape elt =
737 let arity, _ = func shape elt in
740 let notype_1 = make_notype elts_same_1
741 let notype_2 = make_notype elts_same_2
742 let notype_3 = make_notype elts_same_3
744 (* Bit-select operations (first operand is unsigned int). *)
746 let bit_select shape elt =
747 let vtype = type_for_elt shape elt
748 and itype = type_for_elt shape (unsigned_of_elt elt) in
749 Arity3 (vtype 0, itype 1, vtype 2, vtype 3), NoElts
751 (* Common lists of supported element types. *)
753 let s_8_32 = [S8; S16; S32]
754 let u_8_32 = [U8; U16; U32]
755 let su_8_32 = [S8; S16; S32; U8; U16; U32]
756 let su_8_64 = S64 :: U64 :: su_8_32
757 let su_16_64 = [S16; S32; S64; U16; U32; U64]
758 let pf_su_8_16 = [P8; P16; S8; S16; U8; U16]
759 let pf_su_8_32 = P8 :: P16 :: F32 :: su_8_32
760 let pf_su_8_64 = P8 :: P16 :: F32 :: su_8_64
761 let suf_32 = [S32; U32; F32]
766 Vadd, [], All (3, Dreg), "vadd", sign_invar_2, F32 :: su_8_32;
767 Vadd, [No_op], All (3, Dreg), "vadd", sign_invar_2, [S64; U64];
768 Vadd, [], All (3, Qreg), "vaddQ", sign_invar_2, F32 :: su_8_64;
769 Vadd, [], Long, "vaddl", elts_same_2, su_8_32;
770 Vadd, [], Wide, "vaddw", elts_same_2, su_8_32;
771 Vadd, [Halving], All (3, Dreg), "vhadd", elts_same_2, su_8_32;
772 Vadd, [Halving], All (3, Qreg), "vhaddQ", elts_same_2, su_8_32;
773 Vadd, [Instruction_name ["vrhadd"]; Rounding; Halving],
774 All (3, Dreg), "vRhadd", elts_same_2, su_8_32;
775 Vadd, [Instruction_name ["vrhadd"]; Rounding; Halving],
776 All (3, Qreg), "vRhaddQ", elts_same_2, su_8_32;
777 Vadd, [Saturating], All (3, Dreg), "vqadd", elts_same_2, su_8_64;
778 Vadd, [Saturating], All (3, Qreg), "vqaddQ", elts_same_2, su_8_64;
779 Vadd, [High_half], Narrow, "vaddhn", sign_invar_2, su_16_64;
780 Vadd, [Instruction_name ["vraddhn"]; Rounding; High_half],
781 Narrow, "vRaddhn", sign_invar_2, su_16_64;
783 (* Multiplication. *)
784 Vmul, [], All (3, Dreg), "vmul", sign_invar_2, P8 :: F32 :: su_8_32;
785 Vmul, [], All (3, Qreg), "vmulQ", sign_invar_2, P8 :: F32 :: su_8_32;
786 Vmul, [Saturating; Doubling; High_half], All (3, Dreg), "vqdmulh",
787 elts_same_2, [S16; S32];
788 Vmul, [Saturating; Doubling; High_half], All (3, Qreg), "vqdmulhQ",
789 elts_same_2, [S16; S32];
791 [Saturating; Rounding; Doubling; High_half;
792 Instruction_name ["vqrdmulh"]],
793 All (3, Dreg), "vqRdmulh",
794 elts_same_2, [S16; S32];
796 [Saturating; Rounding; Doubling; High_half;
797 Instruction_name ["vqrdmulh"]],
798 All (3, Qreg), "vqRdmulhQ",
799 elts_same_2, [S16; S32];
800 Vmul, [], Long, "vmull", elts_same_2, P8 :: su_8_32;
801 Vmul, [Saturating; Doubling], Long, "vqdmull", elts_same_2, [S16; S32];
803 (* Multiply-accumulate. *)
804 Vmla, [], All (3, Dreg), "vmla", sign_invar_io, F32 :: su_8_32;
805 Vmla, [], All (3, Qreg), "vmlaQ", sign_invar_io, F32 :: su_8_32;
806 Vmla, [], Long, "vmlal", elts_same_io, su_8_32;
807 Vmla, [Saturating; Doubling], Long, "vqdmlal", elts_same_io, [S16; S32];
809 (* Multiply-subtract. *)
810 Vmls, [], All (3, Dreg), "vmls", sign_invar_io, F32 :: su_8_32;
811 Vmls, [], All (3, Qreg), "vmlsQ", sign_invar_io, F32 :: su_8_32;
812 Vmls, [], Long, "vmlsl", elts_same_io, su_8_32;
813 Vmls, [Saturating; Doubling], Long, "vqdmlsl", elts_same_io, [S16; S32];
815 (* Fused-multiply-accumulate. *)
816 Vfma, [Requires_feature "FMA"], All (3, Dreg), "vfma", elts_same_io, [F32];
817 Vfma, [Requires_feature "FMA"], All (3, Qreg), "vfmaQ", elts_same_io, [F32];
818 Vfms, [Requires_feature "FMA"], All (3, Dreg), "vfms", elts_same_io, [F32];
819 Vfms, [Requires_feature "FMA"], All (3, Qreg), "vfmsQ", elts_same_io, [F32];
821 (* Round to integral. *)
822 Vrintn, [Builtin_name "vrintn"; Requires_arch 8], Use_operands [| Dreg; Dreg |],
823 "vrndn", elts_same_1, [F32];
824 Vrintn, [Builtin_name "vrintn"; Requires_arch 8], Use_operands [| Qreg; Qreg |],
825 "vrndqn", elts_same_1, [F32];
826 Vrinta, [Builtin_name "vrinta"; Requires_arch 8], Use_operands [| Dreg; Dreg |],
827 "vrnda", elts_same_1, [F32];
828 Vrinta, [Builtin_name "vrinta"; Requires_arch 8], Use_operands [| Qreg; Qreg |],
829 "vrndqa", elts_same_1, [F32];
830 Vrintp, [Builtin_name "vrintp"; Requires_arch 8], Use_operands [| Dreg; Dreg |],
831 "vrndp", elts_same_1, [F32];
832 Vrintp, [Builtin_name "vrintp"; Requires_arch 8], Use_operands [| Qreg; Qreg |],
833 "vrndqp", elts_same_1, [F32];
834 Vrintm, [Builtin_name "vrintm"; Requires_arch 8], Use_operands [| Dreg; Dreg |],
835 "vrndm", elts_same_1, [F32];
836 Vrintm, [Builtin_name "vrintm"; Requires_arch 8], Use_operands [| Qreg; Qreg |],
837 "vrndqm", elts_same_1, [F32];
838 Vrintz, [Builtin_name "vrintz"; Requires_arch 8], Use_operands [| Dreg; Dreg |],
839 "vrnd", elts_same_1, [F32];
840 Vrintz, [Builtin_name "vrintz"; Requires_arch 8], Use_operands [| Qreg; Qreg |],
841 "vrndq", elts_same_1, [F32];
843 Vsub, [], All (3, Dreg), "vsub", sign_invar_2, F32 :: su_8_32;
844 Vsub, [No_op], All (3, Dreg), "vsub", sign_invar_2, [S64; U64];
845 Vsub, [], All (3, Qreg), "vsubQ", sign_invar_2, F32 :: su_8_64;
846 Vsub, [], Long, "vsubl", elts_same_2, su_8_32;
847 Vsub, [], Wide, "vsubw", elts_same_2, su_8_32;
848 Vsub, [Halving], All (3, Dreg), "vhsub", elts_same_2, su_8_32;
849 Vsub, [Halving], All (3, Qreg), "vhsubQ", elts_same_2, su_8_32;
850 Vsub, [Saturating], All (3, Dreg), "vqsub", elts_same_2, su_8_64;
851 Vsub, [Saturating], All (3, Qreg), "vqsubQ", elts_same_2, su_8_64;
852 Vsub, [High_half], Narrow, "vsubhn", sign_invar_2, su_16_64;
853 Vsub, [Instruction_name ["vrsubhn"]; Rounding; High_half],
854 Narrow, "vRsubhn", sign_invar_2, su_16_64;
856 (* Comparison, equal. *)
857 Vceq, [], All (3, Dreg), "vceq", cmp_sign_invar, P8 :: F32 :: su_8_32;
858 Vceq, [], All (3, Qreg), "vceqQ", cmp_sign_invar, P8 :: F32 :: su_8_32;
860 (* Comparison, greater-than or equal. *)
861 Vcge, [], All (3, Dreg), "vcge", cmp_sign_matters, F32 :: s_8_32;
862 Vcge, [Instruction_name ["vcge"]; Builtin_name "vcgeu"],
863 All (3, Dreg), "vcge", cmp_sign_matters,
865 Vcge, [], All (3, Qreg), "vcgeQ", cmp_sign_matters, F32 :: s_8_32;
866 Vcge, [Instruction_name ["vcge"]; Builtin_name "vcgeu"],
867 All (3, Qreg), "vcgeQ", cmp_sign_matters,
870 (* Comparison, less-than or equal. *)
871 Vcle, [Flipped "vcge"], All (3, Dreg), "vcle", cmp_sign_matters,
873 Vcle, [Instruction_name ["vcge"]; Flipped "vcgeu"],
874 All (3, Dreg), "vcle", cmp_sign_matters,
876 Vcle, [Instruction_name ["vcge"]; Flipped "vcgeQ"],
877 All (3, Qreg), "vcleQ", cmp_sign_matters,
879 Vcle, [Instruction_name ["vcge"]; Flipped "vcgeuQ"],
880 All (3, Qreg), "vcleQ", cmp_sign_matters,
883 (* Comparison, greater-than. *)
884 Vcgt, [], All (3, Dreg), "vcgt", cmp_sign_matters, F32 :: s_8_32;
885 Vcgt, [Instruction_name ["vcgt"]; Builtin_name "vcgtu"],
886 All (3, Dreg), "vcgt", cmp_sign_matters,
888 Vcgt, [], All (3, Qreg), "vcgtQ", cmp_sign_matters, F32 :: s_8_32;
889 Vcgt, [Instruction_name ["vcgt"]; Builtin_name "vcgtu"],
890 All (3, Qreg), "vcgtQ", cmp_sign_matters,
893 (* Comparison, less-than. *)
894 Vclt, [Flipped "vcgt"], All (3, Dreg), "vclt", cmp_sign_matters,
896 Vclt, [Instruction_name ["vcgt"]; Flipped "vcgtu"],
897 All (3, Dreg), "vclt", cmp_sign_matters,
899 Vclt, [Instruction_name ["vcgt"]; Flipped "vcgtQ"],
900 All (3, Qreg), "vcltQ", cmp_sign_matters,
902 Vclt, [Instruction_name ["vcgt"]; Flipped "vcgtuQ"],
903 All (3, Qreg), "vcltQ", cmp_sign_matters,
906 (* Compare absolute greater-than or equal. *)
907 Vcage, [Instruction_name ["vacge"]],
908 All (3, Dreg), "vcage", cmp_sign_matters, [F32];
909 Vcage, [Instruction_name ["vacge"]],
910 All (3, Qreg), "vcageQ", cmp_sign_matters, [F32];
912 (* Compare absolute less-than or equal. *)
913 Vcale, [Instruction_name ["vacge"]; Flipped "vcage"],
914 All (3, Dreg), "vcale", cmp_sign_matters, [F32];
915 Vcale, [Instruction_name ["vacge"]; Flipped "vcageQ"],
916 All (3, Qreg), "vcaleQ", cmp_sign_matters, [F32];
918 (* Compare absolute greater-than or equal. *)
919 Vcagt, [Instruction_name ["vacgt"]],
920 All (3, Dreg), "vcagt", cmp_sign_matters, [F32];
921 Vcagt, [Instruction_name ["vacgt"]],
922 All (3, Qreg), "vcagtQ", cmp_sign_matters, [F32];
924 (* Compare absolute less-than or equal. *)
925 Vcalt, [Instruction_name ["vacgt"]; Flipped "vcagt"],
926 All (3, Dreg), "vcalt", cmp_sign_matters, [F32];
927 Vcalt, [Instruction_name ["vacgt"]; Flipped "vcagtQ"],
928 All (3, Qreg), "vcaltQ", cmp_sign_matters, [F32];
931 Vtst, [], All (3, Dreg), "vtst", cmp_bits, P8 :: su_8_32;
932 Vtst, [], All (3, Qreg), "vtstQ", cmp_bits, P8 :: su_8_32;
934 (* Absolute difference. *)
935 Vabd, [], All (3, Dreg), "vabd", elts_same_2, F32 :: su_8_32;
936 Vabd, [], All (3, Qreg), "vabdQ", elts_same_2, F32 :: su_8_32;
937 Vabd, [], Long, "vabdl", elts_same_2, su_8_32;
939 (* Absolute difference and accumulate. *)
940 Vaba, [], All (3, Dreg), "vaba", elts_same_io, su_8_32;
941 Vaba, [], All (3, Qreg), "vabaQ", elts_same_io, su_8_32;
942 Vaba, [], Long, "vabal", elts_same_io, su_8_32;
945 Vmax, [], All (3, Dreg), "vmax", elts_same_2, F32 :: su_8_32;
946 Vmax, [], All (3, Qreg), "vmaxQ", elts_same_2, F32 :: su_8_32;
949 Vmin, [], All (3, Dreg), "vmin", elts_same_2, F32 :: su_8_32;
950 Vmin, [], All (3, Qreg), "vminQ", elts_same_2, F32 :: su_8_32;
953 Vpadd, [], All (3, Dreg), "vpadd", sign_invar_2, F32 :: su_8_32;
954 Vpadd, [], Long_noreg Dreg, "vpaddl", elts_same_1, su_8_32;
955 Vpadd, [], Long_noreg Qreg, "vpaddlQ", elts_same_1, su_8_32;
957 (* Pairwise add, widen and accumulate. *)
958 Vpada, [], Wide_noreg Dreg, "vpadal", elts_same_2, su_8_32;
959 Vpada, [], Wide_noreg Qreg, "vpadalQ", elts_same_2, su_8_32;
961 (* Folding maximum, minimum. *)
962 Vpmax, [], All (3, Dreg), "vpmax", elts_same_2, F32 :: su_8_32;
963 Vpmin, [], All (3, Dreg), "vpmin", elts_same_2, F32 :: su_8_32;
965 (* Reciprocal step. *)
966 Vrecps, [], All (3, Dreg), "vrecps", elts_same_2, [F32];
967 Vrecps, [], All (3, Qreg), "vrecpsQ", elts_same_2, [F32];
968 Vrsqrts, [], All (3, Dreg), "vrsqrts", elts_same_2, [F32];
969 Vrsqrts, [], All (3, Qreg), "vrsqrtsQ", elts_same_2, [F32];
971 (* Vector shift left. *)
972 Vshl, [], All (3, Dreg), "vshl", reg_shift, su_8_64;
973 Vshl, [], All (3, Qreg), "vshlQ", reg_shift, su_8_64;
974 Vshl, [Instruction_name ["vrshl"]; Rounding],
975 All (3, Dreg), "vRshl", reg_shift, su_8_64;
976 Vshl, [Instruction_name ["vrshl"]; Rounding],
977 All (3, Qreg), "vRshlQ", reg_shift, su_8_64;
978 Vshl, [Saturating], All (3, Dreg), "vqshl", reg_shift, su_8_64;
979 Vshl, [Saturating], All (3, Qreg), "vqshlQ", reg_shift, su_8_64;
980 Vshl, [Instruction_name ["vqrshl"]; Saturating; Rounding],
981 All (3, Dreg), "vqRshl", reg_shift, su_8_64;
982 Vshl, [Instruction_name ["vqrshl"]; Saturating; Rounding],
983 All (3, Qreg), "vqRshlQ", reg_shift, su_8_64;
985 (* Vector shift right by constant. *)
986 Vshr_n, [], Binary_imm Dreg, "vshr_n", shift_right, su_8_64;
987 Vshr_n, [], Binary_imm Qreg, "vshrQ_n", shift_right, su_8_64;
988 Vshr_n, [Instruction_name ["vrshr"]; Rounding], Binary_imm Dreg,
989 "vRshr_n", shift_right, su_8_64;
990 Vshr_n, [Instruction_name ["vrshr"]; Rounding], Binary_imm Qreg,
991 "vRshrQ_n", shift_right, su_8_64;
992 Vshr_n, [], Narrow_imm, "vshrn_n", shift_right_sign_invar, su_16_64;
993 Vshr_n, [Instruction_name ["vrshrn"]; Rounding], Narrow_imm, "vRshrn_n",
994 shift_right_sign_invar, su_16_64;
995 Vshr_n, [Saturating], Narrow_imm, "vqshrn_n", shift_right, su_16_64;
996 Vshr_n, [Instruction_name ["vqrshrn"]; Saturating; Rounding], Narrow_imm,
997 "vqRshrn_n", shift_right, su_16_64;
998 Vshr_n, [Saturating; Dst_unsign], Narrow_imm, "vqshrun_n",
999 shift_right_to_uns, [S16; S32; S64];
1000 Vshr_n, [Instruction_name ["vqrshrun"]; Saturating; Dst_unsign; Rounding],
1001 Narrow_imm, "vqRshrun_n", shift_right_to_uns, [S16; S32; S64];
1003 (* Vector shift left by constant. *)
1004 Vshl_n, [], Binary_imm Dreg, "vshl_n", shift_left_sign_invar, su_8_64;
1005 Vshl_n, [], Binary_imm Qreg, "vshlQ_n", shift_left_sign_invar, su_8_64;
1006 Vshl_n, [Saturating], Binary_imm Dreg, "vqshl_n", shift_left, su_8_64;
1007 Vshl_n, [Saturating], Binary_imm Qreg, "vqshlQ_n", shift_left, su_8_64;
1008 Vshl_n, [Saturating; Dst_unsign], Binary_imm Dreg, "vqshlu_n",
1009 shift_left_to_uns, [S8; S16; S32; S64];
1010 Vshl_n, [Saturating; Dst_unsign], Binary_imm Qreg, "vqshluQ_n",
1011 shift_left_to_uns, [S8; S16; S32; S64];
1012 Vshl_n, [], Long_imm, "vshll_n", shift_left, su_8_32;
1014 (* Vector shift right by constant and accumulate. *)
1015 Vsra_n, [], Binary_imm Dreg, "vsra_n", shift_right_acc, su_8_64;
1016 Vsra_n, [], Binary_imm Qreg, "vsraQ_n", shift_right_acc, su_8_64;
1017 Vsra_n, [Instruction_name ["vrsra"]; Rounding], Binary_imm Dreg,
1018 "vRsra_n", shift_right_acc, su_8_64;
1019 Vsra_n, [Instruction_name ["vrsra"]; Rounding], Binary_imm Qreg,
1020 "vRsraQ_n", shift_right_acc, su_8_64;
1022 (* Vector shift right and insert. *)
1023 Vsri, [], Use_operands [| Dreg; Dreg; Immed |], "vsri_n", shift_insert,
1024 P8 :: P16 :: su_8_64;
1025 Vsri, [], Use_operands [| Qreg; Qreg; Immed |], "vsriQ_n", shift_insert,
1026 P8 :: P16 :: su_8_64;
1028 (* Vector shift left and insert. *)
1029 Vsli, [], Use_operands [| Dreg; Dreg; Immed |], "vsli_n", shift_insert,
1030 P8 :: P16 :: su_8_64;
1031 Vsli, [], Use_operands [| Qreg; Qreg; Immed |], "vsliQ_n", shift_insert,
1032 P8 :: P16 :: su_8_64;
1034 (* Absolute value. *)
1035 Vabs, [], All (2, Dreg), "vabs", elts_same_1, [S8; S16; S32; F32];
1036 Vabs, [], All (2, Qreg), "vabsQ", elts_same_1, [S8; S16; S32; F32];
1037 Vabs, [Saturating], All (2, Dreg), "vqabs", elts_same_1, [S8; S16; S32];
1038 Vabs, [Saturating], All (2, Qreg), "vqabsQ", elts_same_1, [S8; S16; S32];
1041 Vneg, [], All (2, Dreg), "vneg", elts_same_1, [S8; S16; S32; F32];
1042 Vneg, [], All (2, Qreg), "vnegQ", elts_same_1, [S8; S16; S32; F32];
1043 Vneg, [Saturating], All (2, Dreg), "vqneg", elts_same_1, [S8; S16; S32];
1044 Vneg, [Saturating], All (2, Qreg), "vqnegQ", elts_same_1, [S8; S16; S32];
1047 Vmvn, [], All (2, Dreg), "vmvn", notype_1, P8 :: su_8_32;
1048 Vmvn, [], All (2, Qreg), "vmvnQ", notype_1, P8 :: su_8_32;
1050 (* Count leading sign bits. *)
1051 Vcls, [], All (2, Dreg), "vcls", elts_same_1, [S8; S16; S32];
1052 Vcls, [], All (2, Qreg), "vclsQ", elts_same_1, [S8; S16; S32];
1054 (* Count leading zeros. *)
1055 Vclz, [], All (2, Dreg), "vclz", sign_invar_1, su_8_32;
1056 Vclz, [], All (2, Qreg), "vclzQ", sign_invar_1, su_8_32;
1058 (* Count number of set bits. *)
1059 Vcnt, [], All (2, Dreg), "vcnt", bits_1, [P8; S8; U8];
1060 Vcnt, [], All (2, Qreg), "vcntQ", bits_1, [P8; S8; U8];
1062 (* Reciprocal estimate. *)
1063 Vrecpe, [], All (2, Dreg), "vrecpe", elts_same_1, [U32; F32];
1064 Vrecpe, [], All (2, Qreg), "vrecpeQ", elts_same_1, [U32; F32];
1066 (* Reciprocal square-root estimate. *)
1067 Vrsqrte, [], All (2, Dreg), "vrsqrte", elts_same_1, [U32; F32];
1068 Vrsqrte, [], All (2, Qreg), "vrsqrteQ", elts_same_1, [U32; F32];
1070 (* Get lanes from a vector. *)
1072 [InfoWord; Disassembles_as [Use_operands [| Corereg; Element_of_dreg |]];
1073 Instruction_name ["vmov"]],
1074 Use_operands [| Corereg; Dreg; Immed |],
1075 "vget_lane", get_lane, pf_su_8_32;
1079 Disassembles_as [Use_operands [| Corereg; Corereg; Dreg |]];
1080 Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)],
1081 Use_operands [| Corereg; Dreg; Immed |],
1082 "vget_lane", notype_2, [S64; U64];
1084 [InfoWord; Disassembles_as [Use_operands [| Corereg; Element_of_dreg |]];
1085 Instruction_name ["vmov"]],
1086 Use_operands [| Corereg; Qreg; Immed |],
1087 "vgetQ_lane", get_lane, pf_su_8_32;
1090 Disassembles_as [Use_operands [| Corereg; Corereg; Dreg |]];
1091 Instruction_name ["vmov"; "fmrrd"]; Const_valuator (fun _ -> 0);
1093 Use_operands [| Corereg; Qreg; Immed |],
1094 "vgetQ_lane", notype_2, [S64; U64];
1096 (* Set lanes in a vector. *)
1097 Vset_lane, [Disassembles_as [Use_operands [| Element_of_dreg; Corereg |]];
1098 Instruction_name ["vmov"]],
1099 Use_operands [| Dreg; Corereg; Dreg; Immed |], "vset_lane",
1100 set_lane, pf_su_8_32;
1102 Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]];
1103 Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)],
1104 Use_operands [| Dreg; Corereg; Dreg; Immed |], "vset_lane",
1105 set_lane_notype, [S64; U64];
1106 Vset_lane, [Disassembles_as [Use_operands [| Element_of_dreg; Corereg |]];
1107 Instruction_name ["vmov"]],
1108 Use_operands [| Qreg; Corereg; Qreg; Immed |], "vsetQ_lane",
1109 set_lane, pf_su_8_32;
1110 Vset_lane, [Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]];
1111 Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)],
1112 Use_operands [| Qreg; Corereg; Qreg; Immed |], "vsetQ_lane",
1113 set_lane_notype, [S64; U64];
1115 (* Create vector from literal bit pattern. *)
1117 [No_op], (* Not really, but it can yield various things that are too
1118 hard for the test generator at this time. *)
1119 Use_operands [| Dreg; Corereg |], "vcreate", create_vector,
1122 (* Set all lanes to the same value. *)
1124 [Disassembles_as [Use_operands [| Dreg;
1125 Alternatives [ Corereg;
1126 Element_of_dreg ] |]]],
1127 Use_operands [| Dreg; Corereg |], "vdup_n", bits_1,
1131 Instruction_name ["vmov"];
1132 Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]],
1133 Use_operands [| Dreg; Corereg |], "vdup_n", notype_1,
1136 [Disassembles_as [Use_operands [| Qreg;
1137 Alternatives [ Corereg;
1138 Element_of_dreg ] |]]],
1139 Use_operands [| Qreg; Corereg |], "vdupQ_n", bits_1,
1143 Instruction_name ["vmov"];
1144 Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |];
1145 Use_operands [| Dreg; Corereg; Corereg |]]],
1146 Use_operands [| Qreg; Corereg |], "vdupQ_n", notype_1,
1149 (* These are just aliases for the above. *)
1151 [Builtin_name "vdup_n";
1152 Disassembles_as [Use_operands [| Dreg;
1153 Alternatives [ Corereg;
1154 Element_of_dreg ] |]]],
1155 Use_operands [| Dreg; Corereg |],
1156 "vmov_n", bits_1, pf_su_8_32;
1159 Builtin_name "vdup_n";
1160 Instruction_name ["vmov"];
1161 Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]],
1162 Use_operands [| Dreg; Corereg |],
1163 "vmov_n", notype_1, [S64; U64];
1165 [Builtin_name "vdupQ_n";
1166 Disassembles_as [Use_operands [| Qreg;
1167 Alternatives [ Corereg;
1168 Element_of_dreg ] |]]],
1169 Use_operands [| Qreg; Corereg |],
1170 "vmovQ_n", bits_1, pf_su_8_32;
1173 Builtin_name "vdupQ_n";
1174 Instruction_name ["vmov"];
1175 Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |];
1176 Use_operands [| Dreg; Corereg; Corereg |]]],
1177 Use_operands [| Qreg; Corereg |],
1178 "vmovQ_n", notype_1, [S64; U64];
1180 (* Duplicate, lane version. We can't use Use_operands here because the
1181 rightmost register (always Dreg) would be picked up by find_key_operand,
1182 when we want the leftmost register to be used in this case (otherwise
1183 the modes are indistinguishable in neon.md, etc. *)
1185 [Disassembles_as [Use_operands [| Dreg; Element_of_dreg |]]],
1186 Unary_scalar Dreg, "vdup_lane", bits_2, pf_su_8_32;
1188 [No_op; Const_valuator (fun _ -> 0)],
1189 Unary_scalar Dreg, "vdup_lane", bits_2, [S64; U64];
1191 [Disassembles_as [Use_operands [| Qreg; Element_of_dreg |]]],
1192 Unary_scalar Qreg, "vdupQ_lane", bits_2, pf_su_8_32;
1194 [No_op; Const_valuator (fun _ -> 0)],
1195 Unary_scalar Qreg, "vdupQ_lane", bits_2, [S64; U64];
1197 (* Combining vectors. *)
1199 Use_operands [| Qreg; Dreg; Dreg |], "vcombine", notype_2,
1202 (* Splitting vectors. *)
1204 Use_operands [| Dreg; Qreg |], "vget_high",
1205 notype_1, pf_su_8_64;
1206 Vget_low, [Instruction_name ["vmov"];
1207 Disassembles_as [Use_operands [| Dreg; Dreg |]];
1209 Use_operands [| Dreg; Qreg |], "vget_low",
1210 notype_1, pf_su_8_32;
1212 Use_operands [| Dreg; Qreg |], "vget_low",
1213 notype_1, [S64; U64];
1216 Vcvt, [InfoWord], All (2, Dreg), "vcvt", conv_1,
1217 [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
1218 Vcvt, [InfoWord], All (2, Qreg), "vcvtQ", conv_1,
1219 [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
1220 Vcvt_n, [InfoWord], Use_operands [| Dreg; Dreg; Immed |], "vcvt_n", conv_2,
1221 [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
1222 Vcvt_n, [InfoWord], Use_operands [| Qreg; Qreg; Immed |], "vcvtQ_n", conv_2,
1223 [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
1225 (* Move, narrowing. *)
1226 Vmovn, [Disassembles_as [Use_operands [| Dreg; Qreg |]]],
1227 Narrow, "vmovn", sign_invar_1, su_16_64;
1228 Vmovn, [Disassembles_as [Use_operands [| Dreg; Qreg |]]; Saturating],
1229 Narrow, "vqmovn", elts_same_1, su_16_64;
1231 [Disassembles_as [Use_operands [| Dreg; Qreg |]]; Saturating; Dst_unsign],
1232 Narrow, "vqmovun", dst_unsign_1,
1236 Vmovl, [Disassembles_as [Use_operands [| Qreg; Dreg |]]],
1237 Long, "vmovl", elts_same_1, su_8_32;
1241 [Instruction_name ["vtbl"];
1242 Disassembles_as [Use_operands [| Dreg; VecArray (1, Dreg); Dreg |]]],
1243 Use_operands [| Dreg; Dreg; Dreg |], "vtbl1", table_2, [U8; S8; P8];
1244 Vtbl 2, [Instruction_name ["vtbl"]],
1245 Use_operands [| Dreg; VecArray (2, Dreg); Dreg |], "vtbl2", table_2,
1247 Vtbl 3, [Instruction_name ["vtbl"]],
1248 Use_operands [| Dreg; VecArray (3, Dreg); Dreg |], "vtbl3", table_2,
1250 Vtbl 4, [Instruction_name ["vtbl"]],
1251 Use_operands [| Dreg; VecArray (4, Dreg); Dreg |], "vtbl4", table_2,
1254 (* Extended table lookup. *)
1256 [Instruction_name ["vtbx"];
1257 Disassembles_as [Use_operands [| Dreg; VecArray (1, Dreg); Dreg |]]],
1258 Use_operands [| Dreg; Dreg; Dreg |], "vtbx1", table_io, [U8; S8; P8];
1259 Vtbx 2, [Instruction_name ["vtbx"]],
1260 Use_operands [| Dreg; VecArray (2, Dreg); Dreg |], "vtbx2", table_io,
1262 Vtbx 3, [Instruction_name ["vtbx"]],
1263 Use_operands [| Dreg; VecArray (3, Dreg); Dreg |], "vtbx3", table_io,
1265 Vtbx 4, [Instruction_name ["vtbx"]],
1266 Use_operands [| Dreg; VecArray (4, Dreg); Dreg |], "vtbx4", table_io,
1269 (* Multiply, lane. (note: these were undocumented at the time of
1271 Vmul_lane, [], By_scalar Dreg, "vmul_lane", sign_invar_2_lane,
1272 [S16; S32; U16; U32; F32];
1273 Vmul_lane, [], By_scalar Qreg, "vmulQ_lane", sign_invar_2_lane,
1274 [S16; S32; U16; U32; F32];
1276 (* Multiply-accumulate, lane. *)
1277 Vmla_lane, [], By_scalar Dreg, "vmla_lane", sign_invar_io_lane,
1278 [S16; S32; U16; U32; F32];
1279 Vmla_lane, [], By_scalar Qreg, "vmlaQ_lane", sign_invar_io_lane,
1280 [S16; S32; U16; U32; F32];
1281 Vmla_lane, [], Wide_lane, "vmlal_lane", elts_same_io_lane,
1282 [S16; S32; U16; U32];
1283 Vmla_lane, [Saturating; Doubling], Wide_lane, "vqdmlal_lane",
1284 elts_same_io_lane, [S16; S32];
1286 (* Multiply-subtract, lane. *)
1287 Vmls_lane, [], By_scalar Dreg, "vmls_lane", sign_invar_io_lane,
1288 [S16; S32; U16; U32; F32];
1289 Vmls_lane, [], By_scalar Qreg, "vmlsQ_lane", sign_invar_io_lane,
1290 [S16; S32; U16; U32; F32];
1291 Vmls_lane, [], Wide_lane, "vmlsl_lane", elts_same_io_lane,
1292 [S16; S32; U16; U32];
1293 Vmls_lane, [Saturating; Doubling], Wide_lane, "vqdmlsl_lane",
1294 elts_same_io_lane, [S16; S32];
1296 (* Long multiply, lane. *)
1298 Wide_lane, "vmull_lane", elts_same_2_lane, [S16; S32; U16; U32];
1300 (* Saturating doubling long multiply, lane. *)
1301 Vqdmull_lane, [Saturating; Doubling],
1302 Wide_lane, "vqdmull_lane", elts_same_2_lane, [S16; S32];
1304 (* Saturating doubling long multiply high, lane. *)
1305 Vqdmulh_lane, [Saturating; Halving],
1306 By_scalar Qreg, "vqdmulhQ_lane", elts_same_2_lane, [S16; S32];
1307 Vqdmulh_lane, [Saturating; Halving],
1308 By_scalar Dreg, "vqdmulh_lane", elts_same_2_lane, [S16; S32];
1309 Vqdmulh_lane, [Saturating; Halving; Rounding;
1310 Instruction_name ["vqrdmulh"]],
1311 By_scalar Qreg, "vqRdmulhQ_lane", elts_same_2_lane, [S16; S32];
1312 Vqdmulh_lane, [Saturating; Halving; Rounding;
1313 Instruction_name ["vqrdmulh"]],
1314 By_scalar Dreg, "vqRdmulh_lane", elts_same_2_lane, [S16; S32];
1316 (* Vector multiply by scalar. *)
1318 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
1319 Use_operands [| Dreg; Dreg; Corereg |], "vmul_n",
1320 sign_invar_2, [S16; S32; U16; U32; F32];
1322 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
1323 Use_operands [| Qreg; Qreg; Corereg |], "vmulQ_n",
1324 sign_invar_2, [S16; S32; U16; U32; F32];
1326 (* Vector long multiply by scalar. *)
1327 Vmull_n, [Instruction_name ["vmull"];
1328 Disassembles_as [Use_operands [| Qreg; Dreg; Element_of_dreg |]]],
1329 Wide_scalar, "vmull_n",
1330 elts_same_2, [S16; S32; U16; U32];
1332 (* Vector saturating doubling long multiply by scalar. *)
1333 Vqdmull_n, [Saturating; Doubling;
1334 Disassembles_as [Use_operands [| Qreg; Dreg;
1335 Element_of_dreg |]]],
1336 Wide_scalar, "vqdmull_n",
1337 elts_same_2, [S16; S32];
1339 (* Vector saturating doubling long multiply high by scalar. *)
1341 [Saturating; Halving; InfoWord;
1342 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
1343 Use_operands [| Qreg; Qreg; Corereg |],
1344 "vqdmulhQ_n", elts_same_2, [S16; S32];
1346 [Saturating; Halving; InfoWord;
1347 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
1348 Use_operands [| Dreg; Dreg; Corereg |],
1349 "vqdmulh_n", elts_same_2, [S16; S32];
1351 [Saturating; Halving; Rounding; InfoWord;
1352 Instruction_name ["vqrdmulh"];
1353 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
1354 Use_operands [| Qreg; Qreg; Corereg |],
1355 "vqRdmulhQ_n", elts_same_2, [S16; S32];
1357 [Saturating; Halving; Rounding; InfoWord;
1358 Instruction_name ["vqrdmulh"];
1359 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
1360 Use_operands [| Dreg; Dreg; Corereg |],
1361 "vqRdmulh_n", elts_same_2, [S16; S32];
1363 (* Vector multiply-accumulate by scalar. *)
1365 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
1366 Use_operands [| Dreg; Dreg; Corereg |], "vmla_n",
1367 sign_invar_io, [S16; S32; U16; U32; F32];
1369 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
1370 Use_operands [| Qreg; Qreg; Corereg |], "vmlaQ_n",
1371 sign_invar_io, [S16; S32; U16; U32; F32];
1372 Vmla_n, [], Wide_scalar, "vmlal_n", elts_same_io, [S16; S32; U16; U32];
1373 Vmla_n, [Saturating; Doubling], Wide_scalar, "vqdmlal_n", elts_same_io,
1376 (* Vector multiply subtract by scalar. *)
1378 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
1379 Use_operands [| Dreg; Dreg; Corereg |], "vmls_n",
1380 sign_invar_io, [S16; S32; U16; U32; F32];
1382 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
1383 Use_operands [| Qreg; Qreg; Corereg |], "vmlsQ_n",
1384 sign_invar_io, [S16; S32; U16; U32; F32];
1385 Vmls_n, [], Wide_scalar, "vmlsl_n", elts_same_io, [S16; S32; U16; U32];
1386 Vmls_n, [Saturating; Doubling], Wide_scalar, "vqdmlsl_n", elts_same_io,
1389 (* Vector extract. *)
1390 Vext, [Const_valuator (fun _ -> 0)],
1391 Use_operands [| Dreg; Dreg; Dreg; Immed |], "vext", extend,
1393 Vext, [Const_valuator (fun _ -> 0)],
1394 Use_operands [| Qreg; Qreg; Qreg; Immed |], "vextQ", extend,
1397 (* Reverse elements. *)
1398 Vrev64, [Use_shuffle (rev_elems 64)], All (2, Dreg), "vrev64", bits_1,
1399 P8 :: P16 :: F32 :: su_8_32;
1400 Vrev64, [Use_shuffle (rev_elems 64)], All (2, Qreg), "vrev64Q", bits_1,
1401 P8 :: P16 :: F32 :: su_8_32;
1402 Vrev32, [Use_shuffle (rev_elems 32)], All (2, Dreg), "vrev32", bits_1,
1403 [P8; P16; S8; U8; S16; U16];
1404 Vrev32, [Use_shuffle (rev_elems 32)], All (2, Qreg), "vrev32Q", bits_1,
1405 [P8; P16; S8; U8; S16; U16];
1406 Vrev16, [Use_shuffle (rev_elems 16)], All (2, Dreg), "vrev16", bits_1,
1408 Vrev16, [Use_shuffle (rev_elems 16)], All (2, Qreg), "vrev16Q", bits_1,
1411 (* Bit selection. *)
1413 [Instruction_name ["vbsl"; "vbit"; "vbif"];
1414 Disassembles_as [Use_operands [| Dreg; Dreg; Dreg |]]],
1415 Use_operands [| Dreg; Dreg; Dreg; Dreg |], "vbsl", bit_select,
1418 [Instruction_name ["vbsl"; "vbit"; "vbif"];
1419 Disassembles_as [Use_operands [| Qreg; Qreg; Qreg |]]],
1420 Use_operands [| Qreg; Qreg; Qreg; Qreg |], "vbslQ", bit_select,
1423 Vtrn, [Use_shuffle trn_elems], Pair_result Dreg, "vtrn", bits_2, pf_su_8_16;
1424 Vtrn, [Use_shuffle trn_elems; Instruction_name ["vuzp"]], Pair_result Dreg, "vtrn", bits_2, suf_32;
1425 Vtrn, [Use_shuffle trn_elems], Pair_result Qreg, "vtrnQ", bits_2, pf_su_8_32;
1427 Vzip, [Use_shuffle zip_elems], Pair_result Dreg, "vzip", bits_2, pf_su_8_16;
1428 Vzip, [Use_shuffle zip_elems; Instruction_name ["vuzp"]], Pair_result Dreg, "vzip", bits_2, suf_32;
1429 Vzip, [Use_shuffle zip_elems], Pair_result Qreg, "vzipQ", bits_2, pf_su_8_32;
1431 (* Unzip elements. *)
1432 Vuzp, [Use_shuffle uzip_elems], Pair_result Dreg, "vuzp", bits_2,
1434 Vuzp, [Use_shuffle uzip_elems], Pair_result Qreg, "vuzpQ", bits_2,
1437 (* Element/structure loads. VLD1 variants. *)
1439 [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1440 CstPtrTo Corereg |]]],
1441 Use_operands [| Dreg; CstPtrTo Corereg |], "vld1", bits_1,
1443 Vldx 1, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1444 CstPtrTo Corereg |]]],
1445 Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q", bits_1,
1449 [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg);
1450 CstPtrTo Corereg |]]],
1451 Use_operands [| Dreg; CstPtrTo Corereg; Dreg; Immed |],
1452 "vld1_lane", bits_3, pf_su_8_32;
1454 [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1455 CstPtrTo Corereg |]];
1456 Const_valuator (fun _ -> 0)],
1457 Use_operands [| Dreg; CstPtrTo Corereg; Dreg; Immed |],
1458 "vld1_lane", bits_3, [S64; U64];
1460 [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg);
1461 CstPtrTo Corereg |]]],
1462 Use_operands [| Qreg; CstPtrTo Corereg; Qreg; Immed |],
1463 "vld1Q_lane", bits_3, pf_su_8_32;
1465 [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1466 CstPtrTo Corereg |]]],
1467 Use_operands [| Qreg; CstPtrTo Corereg; Qreg; Immed |],
1468 "vld1Q_lane", bits_3, [S64; U64];
1471 [Disassembles_as [Use_operands [| VecArray (1, All_elements_of_dreg);
1472 CstPtrTo Corereg |]]],
1473 Use_operands [| Dreg; CstPtrTo Corereg |], "vld1_dup",
1476 [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1477 CstPtrTo Corereg |]]],
1478 Use_operands [| Dreg; CstPtrTo Corereg |], "vld1_dup",
1481 [Disassembles_as [Use_operands [| VecArray (2, All_elements_of_dreg);
1482 CstPtrTo Corereg |]]],
1483 Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q_dup",
1485 (* Treated identically to vld1_dup above as we now
1486 do a single load followed by a duplicate. *)
1488 [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1489 CstPtrTo Corereg |]]],
1490 Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q_dup",
1493 (* VST1 variants. *)
1494 Vstx 1, [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1496 Use_operands [| PtrTo Corereg; Dreg |], "vst1",
1497 store_1, pf_su_8_64;
1498 Vstx 1, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1500 Use_operands [| PtrTo Corereg; Qreg |], "vst1Q",
1501 store_1, pf_su_8_64;
1504 [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg);
1505 CstPtrTo Corereg |]]],
1506 Use_operands [| PtrTo Corereg; Dreg; Immed |],
1507 "vst1_lane", store_3, pf_su_8_32;
1509 [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1510 CstPtrTo Corereg |]];
1511 Const_valuator (fun _ -> 0)],
1512 Use_operands [| PtrTo Corereg; Dreg; Immed |],
1513 "vst1_lane", store_3, [U64; S64];
1515 [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg);
1516 CstPtrTo Corereg |]]],
1517 Use_operands [| PtrTo Corereg; Qreg; Immed |],
1518 "vst1Q_lane", store_3, pf_su_8_32;
1520 [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1521 CstPtrTo Corereg |]]],
1522 Use_operands [| PtrTo Corereg; Qreg; Immed |],
1523 "vst1Q_lane", store_3, [U64; S64];
1525 (* VLD2 variants. *)
1526 Vldx 2, [], Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |],
1527 "vld2", bits_1, pf_su_8_32;
1528 Vldx 2, [Instruction_name ["vld1"]],
1529 Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |],
1530 "vld2", bits_1, [S64; U64];
1531 Vldx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1532 CstPtrTo Corereg |];
1533 Use_operands [| VecArray (2, Dreg);
1534 CstPtrTo Corereg |]]],
1535 Use_operands [| VecArray (2, Qreg); CstPtrTo Corereg |],
1536 "vld2Q", bits_1, pf_su_8_32;
1539 [Disassembles_as [Use_operands
1540 [| VecArray (2, Element_of_dreg);
1541 CstPtrTo Corereg |]]],
1542 Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg;
1543 VecArray (2, Dreg); Immed |],
1544 "vld2_lane", bits_3, P8 :: P16 :: F32 :: su_8_32;
1546 [Disassembles_as [Use_operands
1547 [| VecArray (2, Element_of_dreg);
1548 CstPtrTo Corereg |]]],
1549 Use_operands [| VecArray (2, Qreg); CstPtrTo Corereg;
1550 VecArray (2, Qreg); Immed |],
1551 "vld2Q_lane", bits_3, [P16; F32; U16; U32; S16; S32];
1554 [Disassembles_as [Use_operands
1555 [| VecArray (2, All_elements_of_dreg); CstPtrTo Corereg |]]],
1556 Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |],
1557 "vld2_dup", bits_1, pf_su_8_32;
1559 [Instruction_name ["vld1"]; Disassembles_as [Use_operands
1560 [| VecArray (2, Dreg); CstPtrTo Corereg |]]],
1561 Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |],
1562 "vld2_dup", bits_1, [S64; U64];
1564 (* VST2 variants. *)
1565 Vstx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1567 Use_operands [| PtrTo Corereg; VecArray (2, Dreg) |], "vst2",
1568 store_1, pf_su_8_32;
1569 Vstx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1571 Instruction_name ["vst1"]],
1572 Use_operands [| PtrTo Corereg; VecArray (2, Dreg) |], "vst2",
1573 store_1, [S64; U64];
1574 Vstx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1576 Use_operands [| VecArray (2, Dreg);
1578 Use_operands [| PtrTo Corereg; VecArray (2, Qreg) |], "vst2Q",
1579 store_1, pf_su_8_32;
1582 [Disassembles_as [Use_operands
1583 [| VecArray (2, Element_of_dreg);
1584 CstPtrTo Corereg |]]],
1585 Use_operands [| PtrTo Corereg; VecArray (2, Dreg); Immed |], "vst2_lane",
1586 store_3, P8 :: P16 :: F32 :: su_8_32;
1588 [Disassembles_as [Use_operands
1589 [| VecArray (2, Element_of_dreg);
1590 CstPtrTo Corereg |]]],
1591 Use_operands [| PtrTo Corereg; VecArray (2, Qreg); Immed |], "vst2Q_lane",
1592 store_3, [P16; F32; U16; U32; S16; S32];
1594 (* VLD3 variants. *)
1595 Vldx 3, [], Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |],
1596 "vld3", bits_1, pf_su_8_32;
1597 Vldx 3, [Instruction_name ["vld1"]],
1598 Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |],
1599 "vld3", bits_1, [S64; U64];
1600 Vldx 3, [Disassembles_as [Use_operands [| VecArray (3, Dreg);
1601 CstPtrTo Corereg |];
1602 Use_operands [| VecArray (3, Dreg);
1603 CstPtrTo Corereg |]]],
1604 Use_operands [| VecArray (3, Qreg); CstPtrTo Corereg |],
1605 "vld3Q", bits_1, P8 :: P16 :: F32 :: su_8_32;
1608 [Disassembles_as [Use_operands
1609 [| VecArray (3, Element_of_dreg);
1610 CstPtrTo Corereg |]]],
1611 Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg;
1612 VecArray (3, Dreg); Immed |],
1613 "vld3_lane", bits_3, P8 :: P16 :: F32 :: su_8_32;
1615 [Disassembles_as [Use_operands
1616 [| VecArray (3, Element_of_dreg);
1617 CstPtrTo Corereg |]]],
1618 Use_operands [| VecArray (3, Qreg); CstPtrTo Corereg;
1619 VecArray (3, Qreg); Immed |],
1620 "vld3Q_lane", bits_3, [P16; F32; U16; U32; S16; S32];
1623 [Disassembles_as [Use_operands
1624 [| VecArray (3, All_elements_of_dreg); CstPtrTo Corereg |]]],
1625 Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |],
1626 "vld3_dup", bits_1, pf_su_8_32;
1628 [Instruction_name ["vld1"]; Disassembles_as [Use_operands
1629 [| VecArray (3, Dreg); CstPtrTo Corereg |]]],
1630 Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |],
1631 "vld3_dup", bits_1, [S64; U64];
1633 (* VST3 variants. *)
1634 Vstx 3, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
1636 Use_operands [| PtrTo Corereg; VecArray (3, Dreg) |], "vst3",
1637 store_1, pf_su_8_32;
1638 Vstx 3, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
1640 Instruction_name ["vst1"]],
1641 Use_operands [| PtrTo Corereg; VecArray (3, Dreg) |], "vst3",
1642 store_1, [S64; U64];
1643 Vstx 3, [Disassembles_as [Use_operands [| VecArray (3, Dreg);
1645 Use_operands [| VecArray (3, Dreg);
1647 Use_operands [| PtrTo Corereg; VecArray (3, Qreg) |], "vst3Q",
1648 store_1, pf_su_8_32;
1651 [Disassembles_as [Use_operands
1652 [| VecArray (3, Element_of_dreg);
1653 CstPtrTo Corereg |]]],
1654 Use_operands [| PtrTo Corereg; VecArray (3, Dreg); Immed |], "vst3_lane",
1655 store_3, P8 :: P16 :: F32 :: su_8_32;
1657 [Disassembles_as [Use_operands
1658 [| VecArray (3, Element_of_dreg);
1659 CstPtrTo Corereg |]]],
1660 Use_operands [| PtrTo Corereg; VecArray (3, Qreg); Immed |], "vst3Q_lane",
1661 store_3, [P16; F32; U16; U32; S16; S32];
1663 (* VLD4/VST4 variants. *)
1664 Vldx 4, [], Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |],
1665 "vld4", bits_1, pf_su_8_32;
1666 Vldx 4, [Instruction_name ["vld1"]],
1667 Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |],
1668 "vld4", bits_1, [S64; U64];
1669 Vldx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
1670 CstPtrTo Corereg |];
1671 Use_operands [| VecArray (4, Dreg);
1672 CstPtrTo Corereg |]]],
1673 Use_operands [| VecArray (4, Qreg); CstPtrTo Corereg |],
1674 "vld4Q", bits_1, P8 :: P16 :: F32 :: su_8_32;
1677 [Disassembles_as [Use_operands
1678 [| VecArray (4, Element_of_dreg);
1679 CstPtrTo Corereg |]]],
1680 Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg;
1681 VecArray (4, Dreg); Immed |],
1682 "vld4_lane", bits_3, P8 :: P16 :: F32 :: su_8_32;
1684 [Disassembles_as [Use_operands
1685 [| VecArray (4, Element_of_dreg);
1686 CstPtrTo Corereg |]]],
1687 Use_operands [| VecArray (4, Qreg); CstPtrTo Corereg;
1688 VecArray (4, Qreg); Immed |],
1689 "vld4Q_lane", bits_3, [P16; F32; U16; U32; S16; S32];
1692 [Disassembles_as [Use_operands
1693 [| VecArray (4, All_elements_of_dreg); CstPtrTo Corereg |]]],
1694 Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |],
1695 "vld4_dup", bits_1, pf_su_8_32;
1697 [Instruction_name ["vld1"]; Disassembles_as [Use_operands
1698 [| VecArray (4, Dreg); CstPtrTo Corereg |]]],
1699 Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |],
1700 "vld4_dup", bits_1, [S64; U64];
1702 Vstx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
1704 Use_operands [| PtrTo Corereg; VecArray (4, Dreg) |], "vst4",
1705 store_1, pf_su_8_32;
1706 Vstx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
1708 Instruction_name ["vst1"]],
1709 Use_operands [| PtrTo Corereg; VecArray (4, Dreg) |], "vst4",
1710 store_1, [S64; U64];
1711 Vstx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
1713 Use_operands [| VecArray (4, Dreg);
1715 Use_operands [| PtrTo Corereg; VecArray (4, Qreg) |], "vst4Q",
1716 store_1, pf_su_8_32;
1719 [Disassembles_as [Use_operands
1720 [| VecArray (4, Element_of_dreg);
1721 CstPtrTo Corereg |]]],
1722 Use_operands [| PtrTo Corereg; VecArray (4, Dreg); Immed |], "vst4_lane",
1723 store_3, P8 :: P16 :: F32 :: su_8_32;
1725 [Disassembles_as [Use_operands
1726 [| VecArray (4, Element_of_dreg);
1727 CstPtrTo Corereg |]]],
1728 Use_operands [| PtrTo Corereg; VecArray (4, Qreg); Immed |], "vst4Q_lane",
1729 store_3, [P16; F32; U16; U32; S16; S32];
1731 (* Logical operations. And. *)
1732 Vand, [], All (3, Dreg), "vand", notype_2, su_8_32;
1733 Vand, [No_op], All (3, Dreg), "vand", notype_2, [S64; U64];
1734 Vand, [], All (3, Qreg), "vandQ", notype_2, su_8_64;
1737 Vorr, [], All (3, Dreg), "vorr", notype_2, su_8_32;
1738 Vorr, [No_op], All (3, Dreg), "vorr", notype_2, [S64; U64];
1739 Vorr, [], All (3, Qreg), "vorrQ", notype_2, su_8_64;
1742 Veor, [], All (3, Dreg), "veor", notype_2, su_8_32;
1743 Veor, [No_op], All (3, Dreg), "veor", notype_2, [S64; U64];
1744 Veor, [], All (3, Qreg), "veorQ", notype_2, su_8_64;
1746 (* Bic (And-not). *)
1747 Vbic, [], All (3, Dreg), "vbic", notype_2, su_8_32;
1748 Vbic, [No_op], All (3, Dreg), "vbic", notype_2, [S64; U64];
1749 Vbic, [], All (3, Qreg), "vbicQ", notype_2, su_8_64;
1752 Vorn, [], All (3, Dreg), "vorn", notype_2, su_8_32;
1753 Vorn, [No_op], All (3, Dreg), "vorn", notype_2, [S64; U64];
1754 Vorn, [], All (3, Qreg), "vornQ", notype_2, su_8_64;
1758 let elems = P8 :: P16 :: F32 :: su_8_64 in
1761 let types = List.fold_right
1762 (fun convfrom acc ->
1763 if convfrom <> convto then
1764 Cast (convto, convfrom) :: acc
1770 let dconv = Vreinterp, [No_op], Use_operands [| Dreg; Dreg |],
1771 "vreinterpret", conv_1, types
1772 and qconv = Vreinterp, [No_op], Use_operands [| Qreg; Qreg |],
1773 "vreinterpretQ", conv_1, types in
1774 dconv :: qconv :: acc)
1778 (* Output routines. *)
1780 let rec string_of_elt = function
1781 S8 -> "s8" | S16 -> "s16" | S32 -> "s32" | S64 -> "s64"
1782 | U8 -> "u8" | U16 -> "u16" | U32 -> "u32" | U64 -> "u64"
1783 | I8 -> "i8" | I16 -> "i16" | I32 -> "i32" | I64 -> "i64"
1784 | B8 -> "8" | B16 -> "16" | B32 -> "32" | B64 -> "64"
1785 | F32 -> "f32" | P8 -> "p8" | P16 -> "p16"
1786 | Conv (a, b) | Cast (a, b) -> string_of_elt a ^ "_" ^ string_of_elt b
1787 | NoElts -> failwith "No elts"
1789 let string_of_elt_dots elt =
1791 Conv (a, b) | Cast (a, b) -> string_of_elt a ^ "." ^ string_of_elt b
1792 | _ -> string_of_elt elt
1794 let string_of_vectype vt =
1795 let rec name affix = function
1796 T_int8x8 -> affix "int8x8"
1797 | T_int8x16 -> affix "int8x16"
1798 | T_int16x4 -> affix "int16x4"
1799 | T_int16x8 -> affix "int16x8"
1800 | T_int32x2 -> affix "int32x2"
1801 | T_int32x4 -> affix "int32x4"
1802 | T_int64x1 -> affix "int64x1"
1803 | T_int64x2 -> affix "int64x2"
1804 | T_uint8x8 -> affix "uint8x8"
1805 | T_uint8x16 -> affix "uint8x16"
1806 | T_uint16x4 -> affix "uint16x4"
1807 | T_uint16x8 -> affix "uint16x8"
1808 | T_uint32x2 -> affix "uint32x2"
1809 | T_uint32x4 -> affix "uint32x4"
1810 | T_uint64x1 -> affix "uint64x1"
1811 | T_uint64x2 -> affix "uint64x2"
1812 | T_float32x2 -> affix "float32x2"
1813 | T_float32x4 -> affix "float32x4"
1814 | T_poly8x8 -> affix "poly8x8"
1815 | T_poly8x16 -> affix "poly8x16"
1816 | T_poly16x4 -> affix "poly16x4"
1817 | T_poly16x8 -> affix "poly16x8"
1818 | T_int8 -> affix "int8"
1819 | T_int16 -> affix "int16"
1820 | T_int32 -> affix "int32"
1821 | T_int64 -> affix "int64"
1822 | T_uint8 -> affix "uint8"
1823 | T_uint16 -> affix "uint16"
1824 | T_uint32 -> affix "uint32"
1825 | T_uint64 -> affix "uint64"
1826 | T_poly8 -> affix "poly8"
1827 | T_poly16 -> affix "poly16"
1828 | T_float32 -> affix "float32"
1829 | T_immediate _ -> "const int"
1831 | T_intQI -> "__builtin_neon_qi"
1832 | T_intHI -> "__builtin_neon_hi"
1833 | T_intSI -> "__builtin_neon_si"
1834 | T_intDI -> "__builtin_neon_di"
1835 | T_floatSF -> "__builtin_neon_sf"
1836 | T_arrayof (num, base) ->
1837 let basename = name (fun x -> x) base in
1838 affix (Printf.sprintf "%sx%d" basename num)
1840 let basename = name affix x in
1841 Printf.sprintf "%s *" basename
1843 let basename = name affix x in
1844 Printf.sprintf "const %s" basename
1846 name (fun x -> x ^ "_t") vt
1848 let string_of_inttype = function
1849 B_TImode -> "__builtin_neon_ti"
1850 | B_EImode -> "__builtin_neon_ei"
1851 | B_OImode -> "__builtin_neon_oi"
1852 | B_CImode -> "__builtin_neon_ci"
1853 | B_XImode -> "__builtin_neon_xi"
1855 let string_of_mode = function
1856 V8QI -> "v8qi" | V4HI -> "v4hi" | V2SI -> "v2si" | V2SF -> "v2sf"
1857 | DI -> "di" | V16QI -> "v16qi" | V8HI -> "v8hi" | V4SI -> "v4si"
1858 | V4SF -> "v4sf" | V2DI -> "v2di" | QI -> "qi" | HI -> "hi" | SI -> "si"
1861 (* Use uppercase chars for letters which form part of the intrinsic name, but
1862 should be omitted from the builtin name (the info is passed in an extra
1863 argument, instead). *)
1864 let intrinsic_name name = String.lowercase name
1866 (* Allow the name of the builtin to be overridden by things (e.g. Flipped)
1867 found in the features list. *)
1868 let builtin_name features name =
1869 let name = List.fold_right
1872 Flipped x | Builtin_name x -> x
1875 let islower x = let str = String.make 1 x in (String.lowercase str) = str
1876 and buf = Buffer.create (String.length name) in
1877 String.iter (fun c -> if islower c then Buffer.add_char buf c) name;
1880 (* Transform an arity into a list of strings. *)
1881 let strings_of_arity a =
1883 | Arity0 vt -> [string_of_vectype vt]
1884 | Arity1 (vt1, vt2) -> [string_of_vectype vt1; string_of_vectype vt2]
1885 | Arity2 (vt1, vt2, vt3) -> [string_of_vectype vt1;
1886 string_of_vectype vt2;
1887 string_of_vectype vt3]
1888 | Arity3 (vt1, vt2, vt3, vt4) -> [string_of_vectype vt1;
1889 string_of_vectype vt2;
1890 string_of_vectype vt3;
1891 string_of_vectype vt4]
1892 | Arity4 (vt1, vt2, vt3, vt4, vt5) -> [string_of_vectype vt1;
1893 string_of_vectype vt2;
1894 string_of_vectype vt3;
1895 string_of_vectype vt4;
1896 string_of_vectype vt5]
1898 (* Suffixes on the end of builtin names that are to be stripped in order
1899 to obtain the name used as an instruction. They are only stripped if
1900 preceded immediately by an underscore. *)
1901 let suffixes_to_strip = [ "n"; "lane"; "dup" ]
1903 (* Get the possible names of an instruction corresponding to a "name" from the
1904 ops table. This is done by getting the equivalent builtin name and
1905 stripping any suffixes from the list at the top of this file, unless
1906 the features list presents with an Instruction_name entry, in which
1907 case that is used; or unless the features list presents with a Flipped
1908 entry, in which case that is used. If both such entries are present,
1909 the first in the list will be chosen. *)
1910 let get_insn_names features name =
1913 match List.find (fun feature -> match feature with
1914 Instruction_name _ -> true
1916 | _ -> false) features
1918 Instruction_name names -> names
1919 | Flipped name -> [name]
1922 with Not_found -> [builtin_name features name]
1925 List.map (fun name' ->
1927 let underscore = String.rindex name' '_' in
1928 let our_suffix = String.sub name' (underscore + 1)
1929 ((String.length name') - underscore - 1)
1931 let rec strip remaining_suffixes =
1932 match remaining_suffixes with
1934 | s::ss when our_suffix = s -> String.sub name' 0 underscore
1937 strip suffixes_to_strip
1938 with (Not_found | Invalid_argument _) -> name') names
1941 (* Apply a function to each element of a list and then comma-separate
1942 the resulting strings. *)
1943 let rec commas f elts acc =
1946 | [elt] -> acc ^ (f elt)
1948 commas f elts (acc ^ (f elt) ^ ", ")
1950 (* Given a list of features and the shape specified in the "ops" table, apply
1951 a function to each possible shape that the instruction may have.
1952 By default, this is the "shape" entry in "ops". If the features list
1953 contains a Disassembles_as entry, the shapes contained in that entry are
1954 mapped to corresponding outputs and returned in a list. If there is more
1955 than one Disassembles_as entry, only the first is used. *)
1956 let analyze_all_shapes features shape f =
1958 match List.find (fun feature ->
1959 match feature with Disassembles_as _ -> true
1962 Disassembles_as shapes -> List.map f shapes
1964 with Not_found -> [f shape]