[ARM64] [turbofan] Improve construction of doubles.
authorjacob.bramley <jacob.bramley@arm.com>
Mon, 16 Mar 2015 17:15:16 +0000 (10:15 -0700)
committerCommit bot <commit-bot@chromium.org>
Mon, 16 Mar 2015 17:15:28 +0000 (17:15 +0000)
Improve the code generated for construction of a 64-bit floating point
number from two 32-bit integers.

Previously, this moved FP->core, inserted, then moved core->FP for each
half. Now, we construct the double in an X register and move core->FP.
Typically, the temporary register aliases the input register, so the
sequence improves from six to two instructions.

Patch from Martyn Capewell <m.m.capewell@googlemail.com>.

BUG=

Review URL: https://codereview.chromium.org/1008003004

Cr-Commit-Position: refs/heads/master@{#27227}

src/compiler/arm64/code-generator-arm64.cc
src/compiler/arm64/instruction-codes-arm64.h
src/compiler/arm64/instruction-selector-arm64.cc

index 349950fee2635df529250867e9416b8f041b5b9a..fa821f58ca0975ee509c6ca0fedc5a65db08d5b9 100644 (file)
@@ -588,6 +588,10 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) {
       __ Ubfx(i.OutputRegister32(), i.InputRegister32(0), i.InputInt8(1),
               i.InputInt8(2));
       break;
+    case kArm64Bfi:
+      __ Bfi(i.OutputRegister(), i.InputRegister(1), i.InputInt6(2),
+             i.InputInt6(3));
+      break;
     case kArm64TestAndBranch32:
     case kArm64TestAndBranch:
       // Pseudo instructions turned into tbz/tbnz in AssembleArchBranch.
@@ -697,10 +701,12 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) {
       __ Fmov(i.OutputRegister32(), i.InputFloat32Register(0));
       break;
     case kArm64Float64ExtractHighWord32:
+      // TODO(arm64): This should use MOV (to general) when NEON is supported.
       __ Fmov(i.OutputRegister(), i.InputFloat64Register(0));
       __ Lsr(i.OutputRegister(), i.OutputRegister(), 32);
       break;
     case kArm64Float64InsertLowWord32: {
+      // TODO(arm64): This should use MOV (from general) when NEON is supported.
       UseScratchRegisterScope scope(masm());
       Register tmp = scope.AcquireX();
       __ Fmov(tmp, i.InputFloat64Register(0));
@@ -709,6 +715,7 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) {
       break;
     }
     case kArm64Float64InsertHighWord32: {
+      // TODO(arm64): This should use MOV (from general) when NEON is supported.
       UseScratchRegisterScope scope(masm());
       Register tmp = scope.AcquireX();
       __ Fmov(tmp.W(), i.InputFloat32Register(0));
@@ -716,6 +723,10 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) {
       __ Fmov(i.OutputFloat64Register(), tmp);
       break;
     }
+    case kArm64Float64MoveU64: {
+      __ Fmov(i.OutputFloat64Register(), i.InputRegister(0));
+      break;
+    }
     case kArm64Ldrb:
       __ Ldrb(i.OutputRegister(), i.MemoryOperand());
       break;
index 0218ada2b6ad0b4230510cfe0843755fbe5477f8..242d25d0093440dbdfd77c8b4b53e26ddec610d9 100644 (file)
@@ -70,6 +70,7 @@ namespace compiler {
   V(Arm64Sxtw)                     \
   V(Arm64Ubfx)                     \
   V(Arm64Ubfx32)                   \
+  V(Arm64Bfi)                      \
   V(Arm64TestAndBranch32)          \
   V(Arm64TestAndBranch)            \
   V(Arm64CompareAndBranch32)       \
@@ -98,6 +99,7 @@ namespace compiler {
   V(Arm64Float64ExtractHighWord32) \
   V(Arm64Float64InsertLowWord32)   \
   V(Arm64Float64InsertHighWord32)  \
+  V(Arm64Float64MoveU64)           \
   V(Arm64LdrS)                     \
   V(Arm64StrS)                     \
   V(Arm64LdrD)                     \
index 7f8e9ea3bdd5d17522e0e72c1cf68ac468629053..fedda248d8272e43da6037ae772471d5b34e851f 100644 (file)
@@ -1606,20 +1606,35 @@ void InstructionSelector::VisitFloat64ExtractHighWord32(Node* node) {
 
 
 void InstructionSelector::VisitFloat64InsertLowWord32(Node* node) {
-  // TODO(arm64): Some AArch64 specialist should be able to improve this.
   Arm64OperandGenerator g(this);
   Node* left = node->InputAt(0);
   Node* right = node->InputAt(1);
+  if (left->opcode() == IrOpcode::kFloat64InsertHighWord32 &&
+      CanCover(node, left)) {
+    Node* right_of_left = left->InputAt(1);
+    Emit(kArm64Bfi, g.DefineSameAsFirst(right), g.UseRegister(right),
+         g.UseRegister(right_of_left), g.TempImmediate(32),
+         g.TempImmediate(32));
+    Emit(kArm64Float64MoveU64, g.DefineAsRegister(node), g.UseRegister(right));
+    return;
+  }
   Emit(kArm64Float64InsertLowWord32, g.DefineAsRegister(node),
        g.UseRegister(left), g.UseRegister(right));
 }
 
 
 void InstructionSelector::VisitFloat64InsertHighWord32(Node* node) {
-  // TODO(arm64): Some AArch64 specialist should be able to improve this.
   Arm64OperandGenerator g(this);
   Node* left = node->InputAt(0);
   Node* right = node->InputAt(1);
+  if (left->opcode() == IrOpcode::kFloat64InsertLowWord32 &&
+      CanCover(node, left)) {
+    Node* right_of_left = left->InputAt(1);
+    Emit(kArm64Bfi, g.DefineSameAsFirst(left), g.UseRegister(right_of_left),
+         g.UseRegister(right), g.TempImmediate(32), g.TempImmediate(32));
+    Emit(kArm64Float64MoveU64, g.DefineAsRegister(node), g.UseRegister(left));
+    return;
+  }
   Emit(kArm64Float64InsertHighWord32, g.DefineAsRegister(node),
        g.UseRegister(left), g.UseRegister(right));
 }