From 8e79c3df515af53e2f21aed2d8ad7226af33f447 Mon Sep 17 00:00:00 2001 From: Catherine Moore Date: Tue, 18 Nov 2008 15:45:05 +0000 Subject: [PATCH] Add support for ARM half-precision conversion instructions. --- bfd/ChangeLog | 7 ++ bfd/elf32-arm.c | 95 ++++++++++++++-- binutils/ChangeLog | 7 ++ binutils/readelf.c | 8 +- gas/ChangeLog | 11 ++ gas/config/tc-arm.c | 173 +++++++++++++++++++++--------- gas/testsuite/ChangeLog | 9 ++ gas/testsuite/gas/arm/half-prec-neon.d | 9 ++ gas/testsuite/gas/arm/half-prec-neon.s | 4 + gas/testsuite/gas/arm/half-prec-psyntax.d | 13 +++ gas/testsuite/gas/arm/half-prec-psyntax.s | 7 ++ gas/testsuite/gas/arm/half-prec-vfpv3.d | 71 ++++++++++++ gas/testsuite/gas/arm/half-prec-vfpv3.s | 68 ++++++++++++ include/elf/ChangeLog | 4 + include/elf/arm.h | 6 ++ include/opcode/ChangeLog | 5 + include/opcode/arm.h | 3 + opcodes/ChangeLog | 8 ++ opcodes/arm-dis.c | 7 ++ 19 files changed, 458 insertions(+), 57 deletions(-) create mode 100644 gas/testsuite/gas/arm/half-prec-neon.d create mode 100644 gas/testsuite/gas/arm/half-prec-neon.s create mode 100644 gas/testsuite/gas/arm/half-prec-psyntax.d create mode 100644 gas/testsuite/gas/arm/half-prec-psyntax.s create mode 100644 gas/testsuite/gas/arm/half-prec-vfpv3.d create mode 100644 gas/testsuite/gas/arm/half-prec-vfpv3.s diff --git a/bfd/ChangeLog b/bfd/ChangeLog index 50febf2..bc22555 100644 --- a/bfd/ChangeLog +++ b/bfd/ChangeLog @@ -1,3 +1,10 @@ +2008-11-18 Catherine Moore + + * elf32-arm.c (elf32_arm_merge_eabi_attributes): Merge + half-precision attributes. + (elf32_arm_copy_one_eabi_other_attribute): New. + (elf32_arm_copy_other_attribute_list): New. + 2008-11-18 Nick Clifton * dwarf2.c (read_section): Fix formatting. diff --git a/bfd/elf32-arm.c b/bfd/elf32-arm.c index da1ac24..9008a4b 100644 --- a/bfd/elf32-arm.c +++ b/bfd/elf32-arm.c @@ -8163,6 +8163,33 @@ elf32_arm_obj_attrs_arg_type (int tag) return (tag & 1) != 0 ? 2 : 1; } +static void +elf32_arm_copy_one_eabi_other_attribute (bfd *ibfd, bfd *obfd, obj_attribute_list *in_list) +{ + switch (in_list->tag) + { + case Tag_VFP_HP_extension: + case Tag_ABI_FP_16bit_format: + bfd_elf_add_obj_attr_int (obfd, OBJ_ATTR_PROC, in_list->tag, in_list->attr.i); + break; + + default: + if ((in_list->tag & 127) < 64) + { + _bfd_error_handler + (_("Warning: %B: Unknown EABI object attribute %d"), ibfd, in_list->tag); + break; + } + } +} + +static void +elf32_arm_copy_eabi_other_attribute_list (bfd *ibfd, bfd *obfd, obj_attribute_list *in_list) +{ + for (; in_list; in_list = in_list->next ) + elf32_arm_copy_one_eabi_other_attribute (ibfd, obfd, in_list); +} + /* Merge EABI object attributes from IBFD into OBFD. Raise an error if there are conflicting attributes. */ @@ -8172,6 +8199,7 @@ elf32_arm_merge_eabi_attributes (bfd *ibfd, bfd *obfd) obj_attribute *in_attr; obj_attribute *out_attr; obj_attribute_list *in_list; + obj_attribute_list *out_list; /* Some tags have 0 = don't care, 1 = strong requirement, 2 = weak requirement. */ static const int order_312[3] = {3, 1, 2}; @@ -8196,7 +8224,7 @@ elf32_arm_merge_eabi_attributes (bfd *ibfd, bfd *obfd) /* This needs to happen before Tag_ABI_FP_number_model is merged. */ if (in_attr[Tag_ABI_VFP_args].i != out_attr[Tag_ABI_VFP_args].i) { - /* Ignore mismatches if teh object doesn't use floating point. */ + /* Ignore mismatches if the object doesn't use floating point. */ if (out_attr[Tag_ABI_FP_number_model].i == 0) out_attr[Tag_ABI_VFP_args].i = in_attr[Tag_ABI_VFP_args].i; else if (in_attr[Tag_ABI_FP_number_model].i != 0) @@ -8362,6 +8390,7 @@ elf32_arm_merge_eabi_attributes (bfd *ibfd, bfd *obfd) return FALSE; } break; + default: /* All known attributes should be explicitly covered. */ abort (); } @@ -8392,15 +8421,67 @@ elf32_arm_merge_eabi_attributes (bfd *ibfd, bfd *obfd) while (in_list && in_list->tag == Tag_compatibility) in_list = in_list->next; - for (; in_list; in_list = in_list->next) + out_list = elf_other_obj_attributes_proc (obfd); + while (out_list && out_list->tag == Tag_compatibility) + out_list = out_list->next; + + for (; in_list != NULL; ) { - if ((in_list->tag & 128) < 64) + if (out_list == NULL) { - _bfd_error_handler - (_("Warning: %B: Unknown EABI object attribute %d"), - ibfd, in_list->tag); - break; + elf32_arm_copy_eabi_other_attribute_list (ibfd, obfd, in_list); + return TRUE; + } + + /* The tags for each list are in numerical order. */ + /* If the tags are equal, then merge. */ + if (in_list->tag == out_list->tag) + { + switch (in_list->tag) + { + case Tag_VFP_HP_extension: + if (out_list->attr.i == 0) + out_list->attr.i = in_list->attr.i; + break; + + case Tag_ABI_FP_16bit_format: + if (in_list->attr.i != 0 && out_list->attr.i != 0) + { + if (in_list->attr.i != out_list->attr.i) + { + _bfd_error_handler + (_("ERROR: fp16 format mismatch between %B and %B"), + ibfd, obfd); + return FALSE; + } + } + if (in_list->attr.i != 0) + out_list->attr.i = in_list->attr.i; + break; + + default: + if ((in_list->tag & 127) < 64) + { + _bfd_error_handler + (_("Warning: %B: Unknown EABI object attribute %d"), ibfd, in_list->tag); + break; + } + } + } + else if (in_list->tag < out_list->tag) + { + /* This attribute is in ibfd, but not obfd. Copy to obfd and advance to + next input attribute. */ + elf32_arm_copy_one_eabi_other_attribute (ibfd, obfd, in_list); + } + if (in_list->tag <= out_list->tag) + { + in_list = in_list->next; + if (in_list == NULL) + continue; } + while (out_list && out_list->tag < in_list->tag) + out_list = out_list->next; } return TRUE; } diff --git a/binutils/ChangeLog b/binutils/ChangeLog index 0cff03a..be9455c 100644 --- a/binutils/ChangeLog +++ b/binutils/ChangeLog @@ -1,3 +1,10 @@ +2008-11-18 Catherine Moore + + * readelf.c (arm_attr_tag_ABI_FP_16bit_format): New. + (arm_attr_tag_VFP_HP_extension): New. + (arm_attr_public_tag arm_attr_public_tags): Support + new attributes. + 2008-11-17 Nick Clifton * version.c (print_version): Update copyright year. diff --git a/binutils/readelf.c b/binutils/readelf.c index ecc54bf..02de84c 100644 --- a/binutils/readelf.c +++ b/binutils/readelf.c @@ -8784,6 +8784,10 @@ static const char *arm_attr_tag_ABI_optimization_goals[] = static const char *arm_attr_tag_ABI_FP_optimization_goals[] = {"None", "Prefer Speed", "Aggressive Speed", "Prefer Size", "Aggressive Size", "Prefer Accuracy", "Aggressive Accuracy"}; +static const char *arm_attr_tag_VFP_HP_extension[] = + {"Not Allowed", "Allowed"}; +static const char *arm_attr_tag_ABI_FP_16bit_format[] = + {"None", "IEEE 754", "Alternative Format"}; #define LOOKUP(id, name) \ {id, #name, 0x80 | ARRAY_SIZE(arm_attr_tag_##name), arm_attr_tag_##name} @@ -8817,7 +8821,9 @@ static arm_attr_public_tag arm_attr_public_tags[] = LOOKUP(29, ABI_WMMX_args), LOOKUP(30, ABI_optimization_goals), LOOKUP(31, ABI_FP_optimization_goals), - {32, "compatibility", 0, NULL} + {32, "compatibility", 0, NULL}, + LOOKUP(36, VFP_HP_extension), + LOOKUP(38, ABI_FP_16bit_format), }; #undef LOOKUP diff --git a/gas/ChangeLog b/gas/ChangeLog index 05b98ff..d65e6c3 100644 --- a/gas/ChangeLog +++ b/gas/ChangeLog @@ -1,3 +1,14 @@ +2008-11-18 Catherine Moore + + * config/tc-arm.c (neon_type_mask): Renumber. + (type_chk_of_el_type): Handle F_F16. + (neon_cvt_flavour): Recognize half-precision conversions. + (do_neon_cvt): New shapes NS_QD and + NS_DQ. Encode half-precision conversions. + (do_neon_cvtt): Encode the T bit. + (asm_opcode_insns): vcvt, vcvtt support. + (arm_option_cpu_value): Add neon-fp16 support. + 2008-11-17 Nick Clifton * as.c (parse_args): Update copyright year. diff --git a/gas/config/tc-arm.c b/gas/config/tc-arm.c index 325aade..37f8112 100644 --- a/gas/config/tc-arm.c +++ b/gas/config/tc-arm.c @@ -226,6 +226,7 @@ static const arm_feature_set fpu_vfp_ext_d32 = static const arm_feature_set fpu_neon_ext_v1 = ARM_FEATURE (0, FPU_NEON_EXT_V1); static const arm_feature_set fpu_vfp_v3_or_neon_ext = ARM_FEATURE (0, FPU_NEON_EXT_V1 | FPU_VFP_EXT_V3); +static const arm_feature_set fpu_neon_fp16 = ARM_FEATURE (0, FPU_NEON_FP16); static int mfloat_abi_opt = -1; /* Record user cpu selection for object attributes. */ @@ -10682,36 +10683,37 @@ static struct neon_shape_info neon_shape_tab[] = enum neon_type_mask { - N_S8 = 0x000001, - N_S16 = 0x000002, - N_S32 = 0x000004, - N_S64 = 0x000008, - N_U8 = 0x000010, - N_U16 = 0x000020, - N_U32 = 0x000040, - N_U64 = 0x000080, - N_I8 = 0x000100, - N_I16 = 0x000200, - N_I32 = 0x000400, - N_I64 = 0x000800, - N_8 = 0x001000, - N_16 = 0x002000, - N_32 = 0x004000, - N_64 = 0x008000, - N_P8 = 0x010000, - N_P16 = 0x020000, - N_F32 = 0x040000, - N_F64 = 0x080000, - N_KEY = 0x100000, /* key element (main type specifier). */ - N_EQK = 0x200000, /* given operand has the same type & size as the key. */ - N_VFP = 0x400000, /* VFP mode: operand size must match register width. */ - N_DBL = 0x000001, /* if N_EQK, this operand is twice the size. */ - N_HLF = 0x000002, /* if N_EQK, this operand is half the size. */ - N_SGN = 0x000004, /* if N_EQK, this operand is forced to be signed. */ - N_UNS = 0x000008, /* if N_EQK, this operand is forced to be unsigned. */ - N_INT = 0x000010, /* if N_EQK, this operand is forced to be integer. */ - N_FLT = 0x000020, /* if N_EQK, this operand is forced to be float. */ - N_SIZ = 0x000040, /* if N_EQK, this operand is forced to be size-only. */ + N_S8 = 0x0000001, + N_S16 = 0x0000002, + N_S32 = 0x0000004, + N_S64 = 0x0000008, + N_U8 = 0x0000010, + N_U16 = 0x0000020, + N_U32 = 0x0000040, + N_U64 = 0x0000080, + N_I8 = 0x0000100, + N_I16 = 0x0000200, + N_I32 = 0x0000400, + N_I64 = 0x0000800, + N_8 = 0x0001000, + N_16 = 0x0002000, + N_32 = 0x0004000, + N_64 = 0x0008000, + N_P8 = 0x0010000, + N_P16 = 0x0020000, + N_F16 = 0x0040000, + N_F32 = 0x0080000, + N_F64 = 0x0100000, + N_KEY = 0x1000000, /* key element (main type specifier). */ + N_EQK = 0x2000000, /* given operand has the same type & size as the key. */ + N_VFP = 0x4000000, /* VFP mode: operand size must match register width. */ + N_DBL = 0x0000001, /* if N_EQK, this operand is twice the size. */ + N_HLF = 0x0000002, /* if N_EQK, this operand is half the size. */ + N_SGN = 0x0000004, /* if N_EQK, this operand is forced to be signed. */ + N_UNS = 0x0000008, /* if N_EQK, this operand is forced to be unsigned. */ + N_INT = 0x0000010, /* if N_EQK, this operand is forced to be integer. */ + N_FLT = 0x0000020, /* if N_EQK, this operand is forced to be float. */ + N_SIZ = 0x0000040, /* if N_EQK, this operand is forced to be size-only. */ N_UTYP = 0, N_MAX_NONSPECIAL = N_F64 }; @@ -10905,6 +10907,7 @@ type_chk_of_el_type (enum neon_el_type type, unsigned size) case NT_float: switch (size) { + case 16: return N_F16; case 32: return N_F32; case 64: return N_F64; default: ; @@ -12598,25 +12601,28 @@ neon_cvt_flavour (enum neon_shape rs) CVT_VAR (1, N_U32, N_F32); CVT_VAR (2, N_F32, N_S32); CVT_VAR (3, N_F32, N_U32); + /* Half-precision conversions. */ + CVT_VAR (4, N_F32, N_F16); + CVT_VAR (5, N_F16, N_F32); whole_reg = N_VFP; /* VFP instructions. */ - CVT_VAR (4, N_F32, N_F64); - CVT_VAR (5, N_F64, N_F32); - CVT_VAR (6, N_S32, N_F64 | key); - CVT_VAR (7, N_U32, N_F64 | key); - CVT_VAR (8, N_F64 | key, N_S32); - CVT_VAR (9, N_F64 | key, N_U32); + CVT_VAR (6, N_F32, N_F64); + CVT_VAR (7, N_F64, N_F32); + CVT_VAR (8, N_S32, N_F64 | key); + CVT_VAR (9, N_U32, N_F64 | key); + CVT_VAR (10, N_F64 | key, N_S32); + CVT_VAR (11, N_F64 | key, N_U32); /* VFP instructions with bitshift. */ - CVT_VAR (10, N_F32 | key, N_S16); - CVT_VAR (11, N_F32 | key, N_U16); - CVT_VAR (12, N_F64 | key, N_S16); - CVT_VAR (13, N_F64 | key, N_U16); - CVT_VAR (14, N_S16, N_F32 | key); - CVT_VAR (15, N_U16, N_F32 | key); - CVT_VAR (16, N_S16, N_F64 | key); - CVT_VAR (17, N_U16, N_F64 | key); + CVT_VAR (12, N_F32 | key, N_S16); + CVT_VAR (13, N_F32 | key, N_U16); + CVT_VAR (14, N_F64 | key, N_S16); + CVT_VAR (15, N_F64 | key, N_U16); + CVT_VAR (16, N_S16, N_F32 | key); + CVT_VAR (17, N_U16, N_F32 | key); + CVT_VAR (18, N_S16, N_F64 | key); + CVT_VAR (19, N_U16, N_F64 | key); return -1; #undef CVT_VAR @@ -12640,6 +12646,8 @@ do_vfp_nsyn_cvt (enum neon_shape rs, int flavour) "fultos", NULL, NULL, + NULL, + NULL, "ftosld", "ftould", "fsltod", @@ -12672,6 +12680,8 @@ do_vfp_nsyn_cvt (enum neon_shape rs, int flavour) "ftouis", "fsitos", "fuitos", + "NULL", + "NULL", "fcvtsd", "fcvtds", "ftosid", @@ -12701,6 +12711,8 @@ do_vfp_nsyn_cvtz (void) NULL, NULL, NULL, + NULL, + NULL, "ftosizd", "ftouizd" }; @@ -12708,16 +12720,15 @@ do_vfp_nsyn_cvtz (void) if (flavour >= 0 && flavour < (int) ARRAY_SIZE (enc) && enc[flavour]) do_vfp_nsyn_opcode (enc[flavour]); } - static void do_neon_cvt (void) { enum neon_shape rs = neon_select_shape (NS_DDI, NS_QQI, NS_FFI, NS_DD, NS_QQ, - NS_FD, NS_DF, NS_FF, NS_NULL); + NS_FD, NS_DF, NS_FF, NS_QD, NS_DQ, NS_NULL); int flavour = neon_cvt_flavour (rs); /* VFP rather than Neon conversions. */ - if (flavour >= 4) + if (flavour >= 6) { do_vfp_nsyn_cvt (rs, flavour); return; @@ -12779,6 +12790,36 @@ do_neon_cvt (void) } break; + /* Half-precision conversions for Advanced SIMD -- neon. */ + case NS_QD: + case NS_DQ: + + if ((rs == NS_DQ) + && (inst.vectype.el[0].size != 16 || inst.vectype.el[1].size != 32)) + { + as_bad (_("operand size must match register width")); + break; + } + + if ((rs == NS_QD) + && ((inst.vectype.el[0].size != 32 || inst.vectype.el[1].size != 16))) + { + as_bad (_("operand size must match register width")); + break; + } + + if (rs == NS_DQ) + inst.instruction = 0x3b60600; + else + inst.instruction = 0x3b60700; + + inst.instruction |= LOW4 (inst.operands[0].reg) << 12; + inst.instruction |= HI1 (inst.operands[0].reg) << 22; + inst.instruction |= LOW4 (inst.operands[1].reg); + inst.instruction |= HI1 (inst.operands[1].reg) << 5; + inst.instruction = neon_dp_fixup (inst.instruction); + break; + default: /* Some VFP conversions go here (s32 <-> f32, u32 <-> f32). */ do_vfp_nsyn_cvt (rs, flavour); @@ -12786,6 +12827,34 @@ do_neon_cvt (void) } static void +do_neon_cvtb (void) +{ + inst.instruction = 0xeb20a40; + + /* The sizes are attached to the mnemonic. */ + if (inst.vectype.el[0].type != NT_invtype + && inst.vectype.el[0].size == 16) + inst.instruction |= 0x00010000; + + /* Programmer's syntax: the sizes are attached to the operands. */ + else if (inst.operands[0].vectype.type != NT_invtype + && inst.operands[0].vectype.size == 16) + inst.instruction |= 0x00010000; + + encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Sd); + encode_arm_vfp_reg (inst.operands[1].reg, VFP_REG_Sm); + do_vfp_cond_or_thumb (); +} + + +static void +do_neon_cvtt (void) +{ + do_neon_cvtb (); + inst.instruction |= 0x80; +} + +static void neon_move_immediate (void) { enum neon_shape rs = neon_select_shape (NS_DI, NS_QI, NS_NULL); @@ -15950,6 +16019,9 @@ static const struct asm_opcode insns[] = NCE(vstr, d000b00, 2, (RVSD, ADDRGLDC), neon_ldr_str), nCEF(vcvt, vcvt, 3, (RNSDQ, RNSDQ, oI32b), neon_cvt), + nCEF(vcvtb, vcvt, 2, (RVS, RVS), neon_cvtb), + nCEF(vcvtt, vcvt, 2, (RVS, RVS), neon_cvtt), + /* NOTE: All VMOV encoding is special-cased! */ NCE(vmov, 0, 1, (VMOV), neon_mov), @@ -20258,6 +20330,7 @@ static const struct arm_option_cpu_value_table arm_fpus[] = {"arm1136jf-s", FPU_ARCH_VFP_V2}, {"maverick", FPU_ARCH_MAVERICK}, {"neon", FPU_ARCH_VFP_V3_PLUS_NEON_V1}, + {"neon-fp16", FPU_ARCH_NEON_FP16}, {NULL, ARM_ARCH_NONE} }; @@ -20731,9 +20804,11 @@ aeabi_set_public_attributes (void) || ARM_CPU_HAS_FEATURE (arm_arch_used, arm_cext_iwmmxt)) bfd_elf_add_proc_attr_int (stdoutput, 11, 1); /* Tag_NEON_arch. */ - if (ARM_CPU_HAS_FEATURE (thumb_arch_used, fpu_neon_ext_v1) - || ARM_CPU_HAS_FEATURE (arm_arch_used, fpu_neon_ext_v1)) + if (ARM_CPU_HAS_FEATURE (flags, fpu_neon_ext_v1)) bfd_elf_add_proc_attr_int (stdoutput, 12, 1); + /* Tag_NEON_FP16_arch. */ + if (ARM_CPU_HAS_FEATURE (flags, fpu_neon_fp16)) + bfd_elf_add_proc_attr_int (stdoutput, 36, 1); } /* Add the default contents for the .ARM.attributes section. */ diff --git a/gas/testsuite/ChangeLog b/gas/testsuite/ChangeLog index 04a9f1d..e6d6949 100644 --- a/gas/testsuite/ChangeLog +++ b/gas/testsuite/ChangeLog @@ -1,3 +1,12 @@ +2008-11-18 Catherine Moore + + * gas/arm/half-prec-neon.d: New. + * gas/arm/half-prec-neon.s: New. + * gas/arm/half-prec-vfp3.d: New. + * gas/arm/half-prec-vfp3.s: New. + * gas/arm/half-prec-psyntax.d: New. + * gas/arm/half-prec-psyntax.s: New. + 2008-11-12 Hans-Peter Nilsson * gas/cris/rd-bcnst2-pic.d, gas/cris/rd-bcnst2.d, diff --git a/gas/testsuite/gas/arm/half-prec-neon.d b/gas/testsuite/gas/arm/half-prec-neon.d new file mode 100644 index 0000000..11b119e --- /dev/null +++ b/gas/testsuite/gas/arm/half-prec-neon.d @@ -0,0 +1,9 @@ +# objdump: -dr --prefix-addresses --show-raw-insn +#name: Half-precision neon instructions +#as: -mfpu=neon-fp16 + +.*: +file format .*arm.* + +.* +0+0 <[^>]*> f3b60602 vcvt\.f16\.f32 d0, q1 +0+4 <[^>]*> f3b6a706 vcvt\.f32\.f16 q5, d6 diff --git a/gas/testsuite/gas/arm/half-prec-neon.s b/gas/testsuite/gas/arm/half-prec-neon.s new file mode 100644 index 0000000..30cdb07 --- /dev/null +++ b/gas/testsuite/gas/arm/half-prec-neon.s @@ -0,0 +1,4 @@ + .text + + vcvt.f16.f32 d0, q1 + vcvt.f32.f16 q5, d6 diff --git a/gas/testsuite/gas/arm/half-prec-psyntax.d b/gas/testsuite/gas/arm/half-prec-psyntax.d new file mode 100644 index 0000000..71e4174 --- /dev/null +++ b/gas/testsuite/gas/arm/half-prec-psyntax.d @@ -0,0 +1,13 @@ +# objdump: -dr --prefix-addresses --show-raw-insn +#name: Half-precision instructions (programmer's syntax) +#as: -mfpu=neon-fp16 + +.*: +file format .*arm.* + +.* +0+00 <[^>]*> f3b60602 vcvt\.f16\.f32 d0, q1 +0+04 <[^>]*> f3b6a706 vcvt\.f32\.f16 q5, d6 +0+08 <[^>]*> eeb21ae2 vcvtt\.f32\.f16 s2, s5 +0+0c <[^>]*> eeb21a62 vcvtb\.f32\.f16 s2, s5 +0+10 <[^>]*> eeb31ae2 vcvtt\.f16\.f32 s2, s5 +0+14 <[^>]*> eeb31a62 vcvtb\.f16\.f32 s2, s5 diff --git a/gas/testsuite/gas/arm/half-prec-psyntax.s b/gas/testsuite/gas/arm/half-prec-psyntax.s new file mode 100644 index 0000000..85e5224 --- /dev/null +++ b/gas/testsuite/gas/arm/half-prec-psyntax.s @@ -0,0 +1,7 @@ + .text + vcvt d0.f16, q1.f32 + vcvt q5.f32, d6.f16 + vcvtt s2.f32, s5.f16 + vcvtb s2.f32, s5.f16 + vcvtt s2.f16, s5.f32 + vcvtb s2.f16, s5.f32 diff --git a/gas/testsuite/gas/arm/half-prec-vfpv3.d b/gas/testsuite/gas/arm/half-prec-vfpv3.d new file mode 100644 index 0000000..5bd9f30 --- /dev/null +++ b/gas/testsuite/gas/arm/half-prec-vfpv3.d @@ -0,0 +1,71 @@ +#objdump: -d --prefix-addresses --show-raw-insn +#name: Half-precision vfpv3 instructions +#as: -mfpu=neon-fp16 + +.*: +file format .*arm.* + +.* +0+000 <[^>]*> eeb20ae0 vcvtt.f32.f16 s0, s1 +0+004 <[^>]*> 0eb21ae1 vcvtteq.f32.f16 s2, s3 +0+008 <[^>]*> 1eb21ae1 vcvttne.f32.f16 s2, s3 +0+00c <[^>]*> 2eb21ae1 vcvttcs.f32.f16 s2, s3 +0+010 <[^>]*> 3eb21ae1 vcvttcc.f32.f16 s2, s3 +0+014 <[^>]*> 4eb21ae1 vcvttmi.f32.f16 s2, s3 +0+018 <[^>]*> 5eb21ae1 vcvttpl.f32.f16 s2, s3 +0+01c <[^>]*> 6eb21ae1 vcvttvs.f32.f16 s2, s3 +0+020 <[^>]*> 7eb21ae1 vcvttvc.f32.f16 s2, s3 +0+024 <[^>]*> 8eb21ae1 vcvtthi.f32.f16 s2, s3 +0+028 <[^>]*> 9eb21ae1 vcvttls.f32.f16 s2, s3 +0+02c <[^>]*> aeb21ae1 vcvttge.f32.f16 s2, s3 +0+030 <[^>]*> beb21ae1 vcvttlt.f32.f16 s2, s3 +0+034 <[^>]*> ceb21ae1 vcvttgt.f32.f16 s2, s3 +0+038 <[^>]*> deb21ae1 vcvttle.f32.f16 s2, s3 +0+03c <[^>]*> eeb21ae1 vcvtt.f32.f16 s2, s3 +0+040 <[^>]*> eeb30ae0 vcvtt.f16.f32 s0, s1 +0+044 <[^>]*> 0eb31ae1 vcvtteq.f16.f32 s2, s3 +0+048 <[^>]*> 1eb31ae1 vcvttne.f16.f32 s2, s3 +0+04c <[^>]*> 2eb31ae1 vcvttcs.f16.f32 s2, s3 +0+050 <[^>]*> 3eb31ae1 vcvttcc.f16.f32 s2, s3 +0+054 <[^>]*> 4eb31ae1 vcvttmi.f16.f32 s2, s3 +0+058 <[^>]*> 5eb31ae1 vcvttpl.f16.f32 s2, s3 +0+05c <[^>]*> 6eb31ae1 vcvttvs.f16.f32 s2, s3 +0+060 <[^>]*> 7eb31ae1 vcvttvc.f16.f32 s2, s3 +0+064 <[^>]*> 8eb31ae1 vcvtthi.f16.f32 s2, s3 +0+068 <[^>]*> 9eb31ae1 vcvttls.f16.f32 s2, s3 +0+06c <[^>]*> aeb31ae1 vcvttge.f16.f32 s2, s3 +0+070 <[^>]*> beb31ae1 vcvttlt.f16.f32 s2, s3 +0+074 <[^>]*> ceb31ae1 vcvttgt.f16.f32 s2, s3 +0+078 <[^>]*> deb31ae1 vcvttle.f16.f32 s2, s3 +0+07c <[^>]*> eeb31ae1 vcvtt.f16.f32 s2, s3 +0+080 <[^>]*> eeb20a60 vcvtb.f32.f16 s0, s1 +0+084 <[^>]*> 0eb21a61 vcvtbeq.f32.f16 s2, s3 +0+088 <[^>]*> 1eb21a61 vcvtbne.f32.f16 s2, s3 +0+08c <[^>]*> 2eb21a61 vcvtbcs.f32.f16 s2, s3 +0+090 <[^>]*> 3eb21a61 vcvtbcc.f32.f16 s2, s3 +0+094 <[^>]*> 4eb21a61 vcvtbmi.f32.f16 s2, s3 +0+098 <[^>]*> 5eb21a61 vcvtbpl.f32.f16 s2, s3 +0+09c <[^>]*> 6eb21a61 vcvtbvs.f32.f16 s2, s3 +0+0a0 <[^>]*> 7eb21a61 vcvtbvc.f32.f16 s2, s3 +0+0a4 <[^>]*> 8eb21a61 vcvtbhi.f32.f16 s2, s3 +0+0a8 <[^>]*> 9eb21a61 vcvtbls.f32.f16 s2, s3 +0+0ac <[^>]*> aeb21a61 vcvtbge.f32.f16 s2, s3 +0+0b0 <[^>]*> beb21a61 vcvtblt.f32.f16 s2, s3 +0+0b4 <[^>]*> ceb21a61 vcvtbgt.f32.f16 s2, s3 +0+0b8 <[^>]*> deb21a61 vcvtble.f32.f16 s2, s3 +0+0bc <[^>]*> eeb21a61 vcvtb.f32.f16 s2, s3 +0+0c0 <[^>]*> eeb30a60 vcvtb.f16.f32 s0, s1 +0+0c4 <[^>]*> 0eb31a61 vcvtbeq.f16.f32 s2, s3 +0+0c8 <[^>]*> 1eb31a61 vcvtbne.f16.f32 s2, s3 +0+0cc <[^>]*> 2eb31a61 vcvtbcs.f16.f32 s2, s3 +0+0d0 <[^>]*> 3eb31a61 vcvtbcc.f16.f32 s2, s3 +0+0d4 <[^>]*> 4eb31a61 vcvtbmi.f16.f32 s2, s3 +0+0d8 <[^>]*> 5eb31a61 vcvtbpl.f16.f32 s2, s3 +0+0dc <[^>]*> 6eb31a61 vcvtbvs.f16.f32 s2, s3 +0+0e0 <[^>]*> 7eb31a61 vcvtbvc.f16.f32 s2, s3 +0+0e4 <[^>]*> 8eb31a61 vcvtbhi.f16.f32 s2, s3 +0+0e8 <[^>]*> 9eb31a61 vcvtbls.f16.f32 s2, s3 +0+0ec <[^>]*> aeb31a61 vcvtbge.f16.f32 s2, s3 +0+0f0 <[^>]*> beb31a61 vcvtblt.f16.f32 s2, s3 +0+0f4 <[^>]*> ceb31a61 vcvtbgt.f16.f32 s2, s3 +0+0f8 <[^>]*> deb31a61 vcvtble.f16.f32 s2, s3 +0+0fc <[^>]*> eeb31a61 vcvtb.f16.f32 s2, s3 diff --git a/gas/testsuite/gas/arm/half-prec-vfpv3.s b/gas/testsuite/gas/arm/half-prec-vfpv3.s new file mode 100644 index 0000000..acd1508 --- /dev/null +++ b/gas/testsuite/gas/arm/half-prec-vfpv3.s @@ -0,0 +1,68 @@ + .text + vcvtt.f32.f32 s0, s1 + vcvtteq.f32.f32 s2, s3 + vcvttne.f32.f32 s2, s3 + vcvttcs.f32.f32 s2, s3 + vcvttcc.f32.f32 s2, s3 + vcvttmi.f32.f32 s2, s3 + vcvttpl.f32.f32 s2, s3 + vcvttvs.f32.f32 s2, s3 + vcvttvc.f32.f32 s2, s3 + vcvtthi.f32.f32 s2, s3 + vcvttls.f32.f32 s2, s3 + vcvttge.f32.f32 s2, s3 + vcvttlt.f32.f32 s2, s3 + vcvttgt.f32.f32 s2, s3 + vcvttle.f32.f32 s2, s3 + vcvttal.f32.f32 s2, s3 + + vcvtt.f16.f32 s0, s1 + vcvtteq.f16.f32 s2, s3 + vcvttne.f16.f32 s2, s3 + vcvttcs.f16.f32 s2, s3 + vcvttcc.f16.f32 s2, s3 + vcvttmi.f16.f32 s2, s3 + vcvttpl.f16.f32 s2, s3 + vcvttvs.f16.f32 s2, s3 + vcvttvc.f16.f32 s2, s3 + vcvtthi.f16.f32 s2, s3 + vcvttls.f16.f32 s2, s3 + vcvttge.f16.f32 s2, s3 + vcvttlt.f16.f32 s2, s3 + vcvttgt.f16.f32 s2, s3 + vcvttle.f16.f32 s2, s3 + vcvttal.f16.f32 s2, s3 + + vcvtb.f32.f32 s0, s1 + vcvtbeq.f32.f32 s2, s3 + vcvtbne.f32.f32 s2, s3 + vcvtbcs.f32.f32 s2, s3 + vcvtbcc.f32.f32 s2, s3 + vcvtbmi.f32.f32 s2, s3 + vcvtbpl.f32.f32 s2, s3 + vcvtbvs.f32.f32 s2, s3 + vcvtbvc.f32.f32 s2, s3 + vcvtbhi.f32.f32 s2, s3 + vcvtbls.f32.f32 s2, s3 + vcvtbge.f32.f32 s2, s3 + vcvtblt.f32.f32 s2, s3 + vcvtbgt.f32.f32 s2, s3 + vcvtble.f32.f32 s2, s3 + vcvtbal.f32.f32 s2, s3 + + vcvtb.f16.f32 s0, s1 + vcvtbeq.f16.f32 s2, s3 + vcvtbne.f16.f32 s2, s3 + vcvtbcs.f16.f32 s2, s3 + vcvtbcc.f16.f32 s2, s3 + vcvtbmi.f16.f32 s2, s3 + vcvtbpl.f16.f32 s2, s3 + vcvtbvs.f16.f32 s2, s3 + vcvtbvc.f16.f32 s2, s3 + vcvtbhi.f16.f32 s2, s3 + vcvtbls.f16.f32 s2, s3 + vcvtbge.f16.f32 s2, s3 + vcvtblt.f16.f32 s2, s3 + vcvtbgt.f16.f32 s2, s3 + vcvtble.f16.f32 s2, s3 + vcvtbal.f16.f32 s2, s3 diff --git a/include/elf/ChangeLog b/include/elf/ChangeLog index 3cfc24f..c9aa86d 100644 --- a/include/elf/ChangeLog +++ b/include/elf/ChangeLog @@ -1,3 +1,7 @@ +2008-11-18 Catherine Moore + + * arm.h (Tag_ABI_FP_16bit_format): Define. + 2008-11-14 Nathan Sidwell * internal.h (struct elf_segment_map): Add header_size field. diff --git a/include/elf/arm.h b/include/elf/arm.h index af623f1..ade479c 100644 --- a/include/elf/arm.h +++ b/include/elf/arm.h @@ -272,6 +272,12 @@ enum Tag_ABI_optimization_goals, Tag_ABI_FP_optimization_goals, /* 32 is generic. */ + Tag_undefined33 = 33, + Tag_CPU_unaligned_access, + Tag_undefined35, + Tag_VFP_HP_extension, + Tag_undefined37, + Tag_ABI_FP_16bit_format = 38, }; #endif diff --git a/include/opcode/ChangeLog b/include/opcode/ChangeLog index 33719b5..b64a8b6 100644 --- a/include/opcode/ChangeLog +++ b/include/opcode/ChangeLog @@ -1,3 +1,8 @@ +2008-11-18 Catherine Moore + + * arm.h (FPU_NEON_FP16): New. + (FPU_ARCH_NEON_FP16): New. + 2008-11-06 Chao-ying Fu * mips.h: Doucument '1' for 5-bit sync type. diff --git a/include/opcode/arm.h b/include/opcode/arm.h index 11cab3e..a639a8b 100644 --- a/include/opcode/arm.h +++ b/include/opcode/arm.h @@ -65,6 +65,7 @@ #define FPU_VFP_EXT_V3 0x01000000 /* VFPv3 insns. */ #define FPU_NEON_EXT_V1 0x00800000 /* Neon (SIMD) insns. */ #define FPU_VFP_EXT_D32 0x00400000 /* Registers D16-D31. */ +#define FPU_NEON_FP16 0x00200000 /* Half-precision extensions. */ /* Architectures are the sum of the base and extensions. The ARM ARM (rev E) defines the following: ARMv3, ARMv3M, ARMv4xM, ARMv4, ARMv4TxM, ARMv4T, @@ -139,6 +140,8 @@ #define FPU_ARCH_NEON_V1 ARM_FEATURE (0, FPU_NEON_EXT_V1) #define FPU_ARCH_VFP_V3_PLUS_NEON_V1 \ ARM_FEATURE (0, FPU_VFP_V3 | FPU_NEON_EXT_V1) +#define FPU_ARCH_NEON_FP16 \ + ARM_FEATURE (0, FPU_VFP_V3 | FPU_NEON_EXT_V1 | FPU_NEON_FP16) #define FPU_ARCH_VFP_HARD ARM_FEATURE (0, FPU_VFP_HARD) #define FPU_ARCH_ENDIAN_PURE ARM_FEATURE (0, FPU_ENDIAN_PURE) diff --git a/opcodes/ChangeLog b/opcodes/ChangeLog index ee6bd10..2d8f214 100644 --- a/opcodes/ChangeLog +++ b/opcodes/ChangeLog @@ -1,3 +1,11 @@ +2008-11-18 Catherine Moore + + * arm-dis.c (coprocessor_opcodes): Add half-precision vcvt + instructions. + (neon_opcodes): Likewise. + (print_insn_coprocessor): Print 't' or 'b' for vcvt + instructions. + 2008-11-14 Tristan Gingold * makefile.vms (OBJS): Update list of objects. diff --git a/opcodes/arm-dis.c b/opcodes/arm-dis.c index 155e495..1be7bbc 100644 --- a/opcodes/arm-dis.c +++ b/opcodes/arm-dis.c @@ -264,6 +264,9 @@ static const struct opcode32 coprocessor_opcodes[] = {FPU_NEON_EXT_V1, 0x0e100b30, 0x0f500f30, "vmov%c.%23?us16\t%12-15r, %16-19,7D[%6,21d]"}, {FPU_NEON_EXT_V1, 0x0e400b10, 0x0fd00f10, "vmov%c.8\t%16-19,7D[%5,6,21d], %12-15r"}, {FPU_NEON_EXT_V1, 0x0e500b10, 0x0f500f10, "vmov%c.%23?us8\t%12-15r, %16-19,7D[%5,6,21d]"}, + /* Half-precision conversion instructions. */ + {FPU_NEON_FP16, 0x0eb20a40, 0x0fbf0f50, "vcvt%7?tb%c.f32.f16\t%y1, %y0"}, + {FPU_NEON_FP16, 0x0eb30a40, 0x0fbf0f50, "vcvt%7?tb%c.f16.f32\t%y1, %y0"}, /* Floating point coprocessor (VFP) instructions */ {FPU_VFP_EXT_V1xD, 0x0ef1fa10, 0x0fffffff, "fmstat%c"}, @@ -504,6 +507,10 @@ static const struct opcode32 neon_opcodes[] = {FPU_NEON_EXT_V1, 0xf3b00800, 0xffb00c50, "vtbl%c.8\t%12-15,22D, %F, %0-3,5D"}, {FPU_NEON_EXT_V1, 0xf3b00840, 0xffb00c50, "vtbx%c.8\t%12-15,22D, %F, %0-3,5D"}, + /* Half-precision conversions. */ + {FPU_NEON_FP16, 0xf3b60600, 0xffbf0fd0, "vcvt%c.f16.f32\t%12-15,22D, %0-3,5Q"}, + {FPU_NEON_FP16, 0xf3b60700, 0xffbf0fd0, "vcvt%c.f32.f16\t%12-15,22Q, %0-3,5D"}, + /* Two registers, miscellaneous */ {FPU_NEON_EXT_V1, 0xf2880a10, 0xfebf0fd0, "vmovl%c.%24?us8\t%12-15,22Q, %0-3,5D"}, {FPU_NEON_EXT_V1, 0xf2900a10, 0xfebf0fd0, "vmovl%c.%24?us16\t%12-15,22Q, %0-3,5D"}, -- 2.7.4