From 15ce82722d0e63cdcc8a7af734c74e44f7ce813c Mon Sep 17 00:00:00 2001 From: "sigurds@chromium.org" Date: Thu, 30 Oct 2014 11:00:47 +0000 Subject: [PATCH] Add vrint{a,n,p,m,z} instructions to arm assembler. These instructions are only available on ARMv8. R=rodolph.perfetta@gmail.com, ulan@chromium.org, bmeurer@chromium.org, rodolph.perfetta@arm.com Review URL: https://codereview.chromium.org/682643002 Cr-Commit-Position: refs/heads/master@{#25013} git-svn-id: https://v8.googlecode.com/svn/branches/bleeding_edge@25013 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- src/arm/assembler-arm.cc | 70 ++++++++++++++++++++++++++++ src/arm/assembler-arm.h | 8 ++++ src/arm/constants-arm.h | 52 ++++++++++----------- src/arm/disasm-arm.cc | 52 +++++++++++++++++++++ src/arm/simulator-arm.cc | 50 ++++++++++++++++++++ test/cctest/test-assembler-arm.cc | 96 +++++++++++++++++++++++++++++++++++++++ test/cctest/test-disasm-arm.cc | 24 ++++++++++ 7 files changed, 326 insertions(+), 26 deletions(-) diff --git a/src/arm/assembler-arm.cc b/src/arm/assembler-arm.cc index 90018ea..17bf4f9 100644 --- a/src/arm/assembler-arm.cc +++ b/src/arm/assembler-arm.cc @@ -3094,6 +3094,76 @@ void Assembler::vsqrt(const DwVfpRegister dst, } +void Assembler::vrinta(const DwVfpRegister dst, const DwVfpRegister src) { + // cond=kSpecialCondition(31-28) | 11101(27-23)| D(22) | 11(21-20) | + // 10(19-18) | RM=00(17-16) | Vd(15-12) | 101(11-9) | sz=1(8) | 01(7-6) | + // M(5) | 0(4) | Vm(3-0) + DCHECK(CpuFeatures::IsSupported(ARMv8)); + int vd, d; + dst.split_code(&vd, &d); + int vm, m; + src.split_code(&vm, &m); + emit(kSpecialCondition | 0x1D * B23 | d * B22 | 0x3 * B20 | B19 | vd * B12 | + 0x5 * B9 | B8 | B6 | m * B5 | vm); +} + + +void Assembler::vrintn(const DwVfpRegister dst, const DwVfpRegister src) { + // cond=kSpecialCondition(31-28) | 11101(27-23)| D(22) | 11(21-20) | + // 10(19-18) | RM=01(17-16) | Vd(15-12) | 101(11-9) | sz=1(8) | 01(7-6) | + // M(5) | 0(4) | Vm(3-0) + DCHECK(CpuFeatures::IsSupported(ARMv8)); + int vd, d; + dst.split_code(&vd, &d); + int vm, m; + src.split_code(&vm, &m); + emit(kSpecialCondition | 0x1D * B23 | d * B22 | 0x3 * B20 | B19 | 0x1 * B16 | + vd * B12 | 0x5 * B9 | B8 | B6 | m * B5 | vm); +} + + +void Assembler::vrintp(const DwVfpRegister dst, const DwVfpRegister src) { + // cond=kSpecialCondition(31-28) | 11101(27-23)| D(22) | 11(21-20) | + // 10(19-18) | RM=10(17-16) | Vd(15-12) | 101(11-9) | sz=1(8) | 01(7-6) | + // M(5) | 0(4) | Vm(3-0) + DCHECK(CpuFeatures::IsSupported(ARMv8)); + int vd, d; + dst.split_code(&vd, &d); + int vm, m; + src.split_code(&vm, &m); + emit(kSpecialCondition | 0x1D * B23 | d * B22 | 0x3 * B20 | B19 | 0x2 * B16 | + vd * B12 | 0x5 * B9 | B8 | B6 | m * B5 | vm); +} + + +void Assembler::vrintm(const DwVfpRegister dst, const DwVfpRegister src) { + // cond=kSpecialCondition(31-28) | 11101(27-23)| D(22) | 11(21-20) | + // 10(19-18) | RM=11(17-16) | Vd(15-12) | 101(11-9) | sz=1(8) | 01(7-6) | + // M(5) | 0(4) | Vm(3-0) + DCHECK(CpuFeatures::IsSupported(ARMv8)); + int vd, d; + dst.split_code(&vd, &d); + int vm, m; + src.split_code(&vm, &m); + emit(kSpecialCondition | 0x1D * B23 | d * B22 | 0x3 * B20 | B19 | 0x3 * B16 | + vd * B12 | 0x5 * B9 | B8 | B6 | m * B5 | vm); +} + + +void Assembler::vrintz(const DwVfpRegister dst, const DwVfpRegister src, + const Condition cond) { + // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 011(19-17) | 0(16) | + // Vd(15-12) | 101(11-9) | sz=1(8) | op=1(7) | 1(6) | M(5) | 0(4) | Vm(3-0) + DCHECK(CpuFeatures::IsSupported(ARMv8)); + int vd, d; + dst.split_code(&vd, &d); + int vm, m; + src.split_code(&vm, &m); + emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | 0x3 * B17 | vd * B12 | + 0x5 * B9 | B8 | B7 | B6 | m * B5 | vm); +} + + // Support for NEON. void Assembler::vld1(NeonSize size, diff --git a/src/arm/assembler-arm.h b/src/arm/assembler-arm.h index f78cc50..9087fab 100644 --- a/src/arm/assembler-arm.h +++ b/src/arm/assembler-arm.h @@ -1279,6 +1279,14 @@ class Assembler : public AssemblerBase { const DwVfpRegister src, const Condition cond = al); + // ARMv8 rounding instructions. + void vrinta(const DwVfpRegister dst, const DwVfpRegister src); + void vrintn(const DwVfpRegister dst, const DwVfpRegister src); + void vrintm(const DwVfpRegister dst, const DwVfpRegister src); + void vrintp(const DwVfpRegister dst, const DwVfpRegister src); + void vrintz(const DwVfpRegister dst, const DwVfpRegister src, + const Condition cond = al); + // Support for NEON. // All these APIs support D0 to D31 and Q0 to Q15. diff --git a/src/arm/constants-arm.h b/src/arm/constants-arm.h index 375ef89..2a293b3 100644 --- a/src/arm/constants-arm.h +++ b/src/arm/constants-arm.h @@ -161,26 +161,26 @@ enum MiscInstructionsBits74 { // Instruction encoding bits and masks. enum { - H = 1 << 5, // Halfword (or byte). - S6 = 1 << 6, // Signed (or unsigned). - L = 1 << 20, // Load (or store). - S = 1 << 20, // Set condition code (or leave unchanged). - W = 1 << 21, // Writeback base register (or leave unchanged). - A = 1 << 21, // Accumulate in multiply instruction (or not). - B = 1 << 22, // Unsigned byte (or word). - N = 1 << 22, // Long (or short). - U = 1 << 23, // Positive (or negative) offset/index. - P = 1 << 24, // Offset/pre-indexed addressing (or post-indexed addressing). - I = 1 << 25, // Immediate shifter operand (or not). - - B4 = 1 << 4, - B5 = 1 << 5, - B6 = 1 << 6, - B7 = 1 << 7, - B8 = 1 << 8, - B9 = 1 << 9, + H = 1 << 5, // Halfword (or byte). + S6 = 1 << 6, // Signed (or unsigned). + L = 1 << 20, // Load (or store). + S = 1 << 20, // Set condition code (or leave unchanged). + W = 1 << 21, // Writeback base register (or leave unchanged). + A = 1 << 21, // Accumulate in multiply instruction (or not). + B = 1 << 22, // Unsigned byte (or word). + N = 1 << 22, // Long (or short). + U = 1 << 23, // Positive (or negative) offset/index. + P = 1 << 24, // Offset/pre-indexed addressing (or post-indexed addressing). + I = 1 << 25, // Immediate shifter operand (or not). + B4 = 1 << 4, + B5 = 1 << 5, + B6 = 1 << 6, + B7 = 1 << 7, + B8 = 1 << 8, + B9 = 1 << 9, B12 = 1 << 12, B16 = 1 << 16, + B17 = 1 << 17, B18 = 1 << 18, B19 = 1 << 19, B20 = 1 << 20, @@ -194,16 +194,16 @@ enum { B28 = 1 << 28, // Instruction bit masks. - kCondMask = 15 << 28, - kALUMask = 0x6f << 21, - kRdMask = 15 << 12, // In str instruction. + kCondMask = 15 << 28, + kALUMask = 0x6f << 21, + kRdMask = 15 << 12, // In str instruction. kCoprocessorMask = 15 << 8, kOpCodeMask = 15 << 21, // In data-processing instructions. - kImm24Mask = (1 << 24) - 1, - kImm16Mask = (1 << 16) - 1, - kImm8Mask = (1 << 8) - 1, - kOff12Mask = (1 << 12) - 1, - kOff8Mask = (1 << 8) - 1 + kImm24Mask = (1 << 24) - 1, + kImm16Mask = (1 << 16) - 1, + kImm8Mask = (1 << 8) - 1, + kOff12Mask = (1 << 12) - 1, + kOff8Mask = (1 << 8) - 1 }; diff --git a/src/arm/disasm-arm.cc b/src/arm/disasm-arm.cc index 2f3a9c7..dc26018 100644 --- a/src/arm/disasm-arm.cc +++ b/src/arm/disasm-arm.cc @@ -1277,6 +1277,14 @@ void Decoder::DecodeTypeVFP(Instruction* instr) { } else { Unknown(instr); // Not used by V8. } + } else if (((instr->Opc2Value() == 0x6)) && instr->Opc3Value() == 0x3) { + bool dp_operation = (instr->SzValue() == 1); + // vrintz - round towards zero (truncate) + if (dp_operation) { + Format(instr, "vrintz'cond.f64.f64 'Dd, 'Dm"); + } else { + Unknown(instr); // Not used by V8. + } } else { Unknown(instr); // Not used by V8. } @@ -1627,6 +1635,50 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) { Unknown(instr); } break; + case 0x1D: + if (instr->Opc1Value() == 0x7 && instr->Bits(19, 18) == 0x2 && + instr->Bits(11, 9) == 0x5 && instr->Bits(7, 6) == 0x1 && + instr->Bit(4) == 0x0) { + // VRINTA, VRINTN, VRINTP, VRINTM (floating-point) + bool dp_operation = (instr->SzValue() == 1); + int rounding_mode = instr->Bits(17, 16); + switch (rounding_mode) { + case 0x0: + if (dp_operation) { + Format(instr, "vrinta.f64.f64 'Dd, 'Dm"); + } else { + Unknown(instr); + } + break; + case 0x1: + if (dp_operation) { + Format(instr, "vrintn.f64.f64 'Dd, 'Dm"); + } else { + Unknown(instr); + } + break; + case 0x2: + if (dp_operation) { + Format(instr, "vrintp.f64.f64 'Dd, 'Dm"); + } else { + Unknown(instr); + } + break; + case 0x3: + if (dp_operation) { + Format(instr, "vrintm.f64.f64 'Dd, 'Dm"); + } else { + Unknown(instr); + } + break; + default: + UNREACHABLE(); // Case analysis is exhaustive. + break; + } + } else { + Unknown(instr); + } + break; default: Unknown(instr); break; diff --git a/src/arm/simulator-arm.cc b/src/arm/simulator-arm.cc index ceabe78..aeb35c8 100644 --- a/src/arm/simulator-arm.cc +++ b/src/arm/simulator-arm.cc @@ -2957,6 +2957,12 @@ void Simulator::DecodeTypeVFP(Instruction* instr) { } else { UNREACHABLE(); // Not used by v8. } + } else if (((instr->Opc2Value() == 0x6)) && (instr->Opc3Value() == 0x3)) { + // vrintz - truncate + double dm_value = get_double_from_d_register(vm); + double dd_value = std::trunc(dm_value); + dd_value = canonicalizeNaN(dd_value); + set_d_register_from_double(vd, dd_value); } else { UNREACHABLE(); // Not used by V8. } @@ -3607,6 +3613,50 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { UNIMPLEMENTED(); } break; + case 0x1D: + if (instr->Opc1Value() == 0x7 && instr->Opc3Value() == 0x1 && + instr->Bits(11, 9) == 0x5 && instr->Bits(19, 18) == 0x2 && + instr->Bit(8) == 0x1) { + int vm = instr->VFPMRegValue(kDoublePrecision); + int vd = instr->VFPDRegValue(kDoublePrecision); + double dm_value = get_double_from_d_register(vm); + double dd_value = 0.0; + int rounding_mode = instr->Bits(17, 16); + switch (rounding_mode) { + case 0x0: // vrinta - round with ties to away from zero + dd_value = std::round(dm_value); + break; + case 0x1: { // vrintn - round with ties to even + dd_value = std::floor(dm_value); + double error = dm_value - dd_value; + // Take care of correctly handling the range [-0.5, -0.0], which + // must yield -0.0. + if ((-0.5 <= dm_value) && (dm_value < 0.0)) { + dd_value = -0.0; + // If the error is greater than 0.5, or is equal to 0.5 and the + // integer result is odd, round up. + } else if ((error > 0.5) || + ((error == 0.5) && (fmod(dd_value, 2) != 0))) { + dd_value++; + } + break; + } + case 0x2: // vrintp - ceil + dd_value = std::ceil(dm_value); + break; + case 0x3: // vrintm - floor + dd_value = std::floor(dm_value); + break; + default: + UNREACHABLE(); // Case analysis is exhaustive. + break; + } + dd_value = canonicalizeNaN(dd_value); + set_d_register_from_double(vd, dd_value); + } else { + UNIMPLEMENTED(); + } + break; default: UNIMPLEMENTED(); break; diff --git a/test/cctest/test-assembler-arm.cc b/test/cctest/test-assembler-arm.cc index 9079d5a..b6e260e 100644 --- a/test/cctest/test-assembler-arm.cc +++ b/test/cctest/test-assembler-arm.cc @@ -1676,4 +1676,100 @@ TEST(code_relative_offset) { CHECK_EQ(42, res); } + +TEST(ARMv8_vrintX) { + // Test the vrintX floating point instructions. + CcTest::InitializeVM(); + Isolate* isolate = CcTest::i_isolate(); + HandleScope scope(isolate); + + typedef struct { + double input; + double ar; + double nr; + double mr; + double pr; + double zr; + } T; + T t; + + // Create a function that accepts &t, and loads, manipulates, and stores + // the doubles and floats. + Assembler assm(isolate, NULL, 0); + Label L, C; + + + if (CpuFeatures::IsSupported(ARMv8)) { + CpuFeatureScope scope(&assm, ARMv8); + + __ mov(ip, Operand(sp)); + __ stm(db_w, sp, r4.bit() | fp.bit() | lr.bit()); + + __ mov(r4, Operand(r0)); + + // Test vrinta + __ vldr(d6, r4, OFFSET_OF(T, input)); + __ vrinta(d5, d6); + __ vstr(d5, r4, OFFSET_OF(T, ar)); + + // Test vrintn + __ vldr(d6, r4, OFFSET_OF(T, input)); + __ vrintn(d5, d6); + __ vstr(d5, r4, OFFSET_OF(T, nr)); + + // Test vrintp + __ vldr(d6, r4, OFFSET_OF(T, input)); + __ vrintp(d5, d6); + __ vstr(d5, r4, OFFSET_OF(T, pr)); + + // Test vrintm + __ vldr(d6, r4, OFFSET_OF(T, input)); + __ vrintm(d5, d6); + __ vstr(d5, r4, OFFSET_OF(T, mr)); + + // Test vrintz + __ vldr(d6, r4, OFFSET_OF(T, input)); + __ vrintz(d5, d6); + __ vstr(d5, r4, OFFSET_OF(T, zr)); + + __ ldm(ia_w, sp, r4.bit() | fp.bit() | pc.bit()); + + CodeDesc desc; + assm.GetCode(&desc); + Handle code = isolate->factory()->NewCode( + desc, Code::ComputeFlags(Code::STUB), Handle()); +#ifdef DEBUG + OFStream os(stdout); + code->Print(os); +#endif + F3 f = FUNCTION_CAST(code->entry()); + + Object* dummy = nullptr; + USE(dummy); + +#define CHECK_VRINT(input_val, ares, nres, mres, pres, zres) \ + t.input = input_val; \ + dummy = CALL_GENERATED_CODE(f, &t, 0, 0, 0, 0); \ + CHECK_EQ(ares, t.ar); \ + CHECK_EQ(nres, t.nr); \ + CHECK_EQ(mres, t.mr); \ + CHECK_EQ(pres, t.pr); \ + CHECK_EQ(zres, t.zr); + + CHECK_VRINT(-0.5, -1.0, -0.0, -1.0, -0.0, -0.0) + CHECK_VRINT(-0.6, -1.0, -1.0, -1.0, -0.0, -0.0) + CHECK_VRINT(-1.1, -1.0, -1.0, -2.0, -1.0, -1.0) + CHECK_VRINT(0.5, 1.0, 0.0, 0.0, 1.0, 0.0) + CHECK_VRINT(0.6, 1.0, 1.0, 0.0, 1.0, 0.0) + CHECK_VRINT(1.1, 1.0, 1.0, 1.0, 2.0, 1.0) + double inf = std::numeric_limits::infinity(); + CHECK_VRINT(inf, inf, inf, inf, inf, inf) + CHECK_VRINT(-inf, -inf, -inf, -inf, -inf, -inf) + CHECK_VRINT(-0.0, -0.0, -0.0, -0.0, -0.0, -0.0) + double nan = std::numeric_limits::quiet_NaN(); + CHECK_VRINT(nan, nan, nan, nan, nan, nan) + +#undef CHECK_VRINT + } +} #undef __ diff --git a/test/cctest/test-disasm-arm.cc b/test/cctest/test-disasm-arm.cc index 1fabdc2..39356b1 100644 --- a/test/cctest/test-disasm-arm.cc +++ b/test/cctest/test-disasm-arm.cc @@ -686,6 +686,30 @@ TEST(Vfp) { } +TEST(ARMv8_vrintX_disasm) { + SET_UP(); + + if (CpuFeatures::IsSupported(ARMv8)) { + COMPARE(vrinta(d0, d0), "feb80b40 vrinta.f64.f64 d0, d0"); + COMPARE(vrinta(d2, d3), "feb82b43 vrinta.f64.f64 d2, d3"); + + COMPARE(vrintp(d0, d0), "feba0b40 vrintp.f64.f64 d0, d0"); + COMPARE(vrintp(d2, d3), "feba2b43 vrintp.f64.f64 d2, d3"); + + COMPARE(vrintn(d0, d0), "feb90b40 vrintn.f64.f64 d0, d0"); + COMPARE(vrintn(d2, d3), "feb92b43 vrintn.f64.f64 d2, d3"); + + COMPARE(vrintm(d0, d0), "febb0b40 vrintm.f64.f64 d0, d0"); + COMPARE(vrintm(d2, d3), "febb2b43 vrintm.f64.f64 d2, d3"); + + COMPARE(vrintz(d0, d0), "eeb60bc0 vrintz.f64.f64 d0, d0"); + COMPARE(vrintz(d2, d3, ne), "1eb62bc3 vrintzne.f64.f64 d2, d3"); + } + + VERIFY_RUN(); +} + + TEST(Neon) { SET_UP(); -- 2.7.4