From b61696dd952ef9a702a23895e6aae31547df5567 Mon Sep 17 00:00:00 2001 From: "rmcilroy@chromium.org" Date: Fri, 11 Apr 2014 09:22:14 +0000 Subject: [PATCH] ARM: Do not set FPSCR when converting to clamped uint8 Setting the FPSCR flags is expensive on some CPUs. Get rid of repeated setting of the FPSCR by relying on the correct default flags being set when doing uint8 clamping. Also use vcvt_u32_f64 instead of vcvt_s32_f64, which enables removing the check against zero (vcvt_u32_f64 will clamp to zero). To be on the safe side, add asserts to check that the VFP rounding mode flags are set to default as expected. This increases performance of a hot loop repeatedly setting Uint8ClampedArray values on some CPUs by as much as a factor of 12. BUG=v8:3253 LOG=N R=jacob.bramley@arm.com, rmcilroy@chromium.org, ulan@chromium.org Review URL: https://codereview.chromium.org/230473005 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@20676 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- src/arm/macro-assembler-arm.cc | 37 ++++++++++++------------------------- src/objects.h | 1 + 2 files changed, 13 insertions(+), 25 deletions(-) diff --git a/src/arm/macro-assembler-arm.cc b/src/arm/macro-assembler-arm.cc index 1633adc..6911ccf 100644 --- a/src/arm/macro-assembler-arm.cc +++ b/src/arm/macro-assembler-arm.cc @@ -796,6 +796,10 @@ void MacroAssembler::VFPEnsureFPSCRState(Register scratch) { // If needed, restore wanted bits of FPSCR. Label fpscr_done; vmrs(scratch); + if (emit_debug_code()) { + tst(scratch, Operand(kVFPRoundingModeMask)); + Assert(eq, kDefaultRoundingModeNotSet); + } tst(scratch, Operand(kVFPDefaultNaNModeControlBit)); b(ne, &fpscr_done); orr(scratch, scratch, Operand(kVFPDefaultNaNModeControlBit)); @@ -3800,36 +3804,19 @@ void MacroAssembler::ClampUint8(Register output_reg, Register input_reg) { void MacroAssembler::ClampDoubleToUint8(Register result_reg, DwVfpRegister input_reg, LowDwVfpRegister double_scratch) { - Label above_zero; Label done; - Label in_bounds; - - VFPCompareAndSetFlags(input_reg, 0.0); - b(gt, &above_zero); - - // Double value is less than zero, NaN or Inf, return 0. - mov(result_reg, Operand::Zero()); - b(al, &done); - // Double value is >= 255, return 255. - bind(&above_zero); + // Handle inputs >= 255 (including +infinity). Vmov(double_scratch, 255.0, result_reg); - VFPCompareAndSetFlags(input_reg, double_scratch); - b(le, &in_bounds); mov(result_reg, Operand(255)); - b(al, &done); - - // In 0-255 range, round and truncate. - bind(&in_bounds); - // Save FPSCR. - vmrs(ip); - // Set rounding mode to round to the nearest integer by clearing bits[23:22]. - bic(result_reg, ip, Operand(kVFPRoundingModeMask)); - vmsr(result_reg); - vcvt_s32_f64(double_scratch.low(), input_reg, kFPSCRRounding); + VFPCompareAndSetFlags(input_reg, double_scratch); + b(ge, &done); + + // For inputs < 255 (including negative) vcvt_u32_f64 with round-to-nearest + // rounding mode will provide the correct result. + vcvt_u32_f64(double_scratch.low(), input_reg, kFPSCRRounding); vmov(result_reg, double_scratch.low()); - // Restore FPSCR. - vmsr(ip); + bind(&done); } diff --git a/src/objects.h b/src/objects.h index bb6b426..5769f7e 100644 --- a/src/objects.h +++ b/src/objects.h @@ -1127,6 +1127,7 @@ class MaybeObject BASE_EMBEDDED { V(kDeclarationInCatchContext, "Declaration in catch context") \ V(kDeclarationInWithContext, "Declaration in with context") \ V(kDefaultNaNModeNotSet, "Default NaN mode not set") \ + V(kDefaultRoundingModeNotSet, "Default rounding mode not set") \ V(kDeleteWithGlobalVariable, "Delete with global variable") \ V(kDeleteWithNonGlobalVariable, "Delete with non-global variable") \ V(kDestinationOfCopyNotAligned, "Destination of copy not aligned") \ -- 2.7.4