From db211892edde7dd6fafd108ecf770a2fecf8651d Mon Sep 17 00:00:00 2001
From: Amara Emerson <aemerson@apple.com>
Date: Tue, 20 Feb 2018 05:11:57 +0000
Subject: [PATCH] [AArch64][GlobalISel] When copying from a gpr32 to an fpr16
 reg, convert to fpr32 first.

This is a follow on commit to r[x] where we fix the other direction of copy.
For this case, after converting the source from gpr32 -> fpr32, we use a
subregister copy, which is essentially what EXTRACT_SUBREG does in SDAG land.

https://reviews.llvm.org/D43444

llvm-svn: 325550
---
 .../Target/AArch64/AArch64InstructionSelector.cpp  | 35 +++++++++--
 .../CodeGen/AArch64/GlobalISel/fp16-copy-gpr.mir   | 72 +++++++++++++++++++---
 .../AArch64/GlobalISel/select-insert-extract.mir   |  8 ++-
 3 files changed, 102 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
index 1d0cdc8..309f612 100644
--- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -317,12 +317,39 @@ static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
   return GenericOpc;
 }
 
+static bool selectFP16CopyFromGPR32(MachineInstr &I, const TargetInstrInfo &TII,
+                                    MachineRegisterInfo &MRI, unsigned SrcReg) {
+  // Copies from gpr32 to fpr16 need to use a sub-register copy.
+  unsigned CopyReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
+  BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::COPY))
+      .addDef(CopyReg)
+      .addUse(SrcReg);
+  unsigned SubRegCopy = MRI.createVirtualRegister(&AArch64::FPR16RegClass);
+  BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(TargetOpcode::COPY))
+      .addDef(SubRegCopy)
+      .addUse(CopyReg, 0, AArch64::hsub);
+
+  MachineOperand &RegOp = I.getOperand(1);
+  RegOp.setReg(SubRegCopy);
+  return true;
+}
+
 static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
                        MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
                        const RegisterBankInfo &RBI) {
 
   unsigned DstReg = I.getOperand(0).getReg();
+  unsigned SrcReg = I.getOperand(1).getReg();
+
   if (TargetRegisterInfo::isPhysicalRegister(DstReg)) {
+    if (TRI.getRegClass(AArch64::FPR16RegClassID)->contains(DstReg) &&
+        !TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
+      const RegisterBank &RegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
+      const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(
+          MRI.getType(SrcReg), RegBank, RBI, /* GetAllRegSet */ true);
+      if (SrcRC == &AArch64::GPR32allRegClass)
+        return selectFP16CopyFromGPR32(I, TII, MRI, SrcReg);
+    }
     assert(I.isCopy() && "Generic operators do not allow physical registers");
     return true;
   }
@@ -330,7 +357,6 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
   const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI);
   const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
   (void)DstSize;
-  unsigned SrcReg = I.getOperand(1).getReg();
   const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
   (void)SrcSize;
   assert((!TargetRegisterInfo::isPhysicalRegister(SrcReg) || I.isCopy()) &&
@@ -357,9 +383,7 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
   }
 
   if (!TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
-    const RegClassOrRegBank &RegClassOrBank =
-      MRI.getRegClassOrRegBank(SrcReg);
-
+    const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(SrcReg);
     const TargetRegisterClass *SrcRC =
         RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
     const RegisterBank *RB = nullptr;
@@ -378,6 +402,9 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
           .addImm(AArch64::hsub);
       MachineOperand &RegOp = I.getOperand(1);
       RegOp.setReg(PromoteReg);
+    } else if (RC == &AArch64::FPR16RegClass &&
+               SrcRC == &AArch64::GPR32allRegClass) {
+      selectFP16CopyFromGPR32(I, TII, MRI, SrcReg);
     }
   }
 
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/fp16-copy-gpr.mir b/llvm/test/CodeGen/AArch64/GlobalISel/fp16-copy-gpr.mir
index 3a718cf..76e819a 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/fp16-copy-gpr.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/fp16-copy-gpr.mir
@@ -6,18 +6,21 @@
   target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
   target triple = "aarch64-arm-none-eabi"
 
-  %struct.struct2 = type { [2 x half] }
+  ; Function Attrs: noinline nounwind optnone
+  define void @fp16_to_gpr([2 x half], [2 x half]* %addr) {
+    ret void
+  }
 
-  @global_arg0 = common dso_local global %struct.struct2 zeroinitializer, align 2
+  define void @gpr_to_fp16() {
+    ret void
+  }
 
-  ; Function Attrs: noinline nounwind optnone
-  define dso_local void @c_test([2 x half], [2 x half]* %addr) {
-    store [2 x half] %0, [2 x half]* %addr, align 2
+  define void @gpr_to_fp16_physreg() {
     ret void
   }
 ...
 ---
-name:            c_test
+name:            fp16_to_gpr
 alignment:       2
 legalized:       true
 regBankSelected: true
@@ -40,7 +43,7 @@ body:             |
   bb.1 (%ir-block.1):
     liveins: $h0, $h1, $x0
 
-    ; CHECK-LABEL: name: c_test
+    ; CHECK-LABEL: name: fp16_to_gpr
     ; CHECK: liveins: $h0, $h1, $x0
     ; CHECK: [[COPY:%[0-9]+]]:fpr16 = COPY $h0
     ; CHECK: [[COPY1:%[0-9]+]]:fpr16 = COPY $h1
@@ -67,3 +70,58 @@ body:             |
     RET_ReallyLR
 
 ...
+
+---
+name:            gpr_to_fp16
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: fpr }
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: $w0
+
+    ; CHECK-LABEL: name: gpr_to_fp16
+    ; CHECK: liveins: $w0
+    ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
+    ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY [[COPY]]
+    ; CHECK: [[COPY2:%[0-9]+]]:fpr16 = COPY [[COPY1]].hsub
+    ; CHECK: $h0 = COPY [[COPY2]]
+    ; CHECK: RET_ReallyLR implicit $h0
+    %0:gpr(s32) = COPY $w0
+    %1:gpr(s16) = G_TRUNC %0(s32)
+    %2:fpr(s16) = COPY %1(s16)
+    $h0 = COPY %2(s16)
+    RET_ReallyLR implicit $h0
+
+...
+---
+name:            gpr_to_fp16_physreg
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: $w0
+
+    ; CHECK-LABEL: name: gpr_to_fp16_physreg
+    ; CHECK: liveins: $w0
+    ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
+    ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY [[COPY]]
+    ; CHECK: [[COPY2:%[0-9]+]]:fpr16 = COPY [[COPY1]].hsub
+    ; CHECK: $h0 = COPY [[COPY2]]
+    ; CHECK: RET_ReallyLR implicit $h0
+    %0:gpr(s32) = COPY $w0
+    %1:gpr(s16) = G_TRUNC %0(s32)
+    $h0 = COPY %1(s16)
+    RET_ReallyLR implicit $h0
+
+...
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-insert-extract.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-insert-extract.mir
index dc22e72..deddea6 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-insert-extract.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-insert-extract.mir
@@ -97,8 +97,12 @@ body:             |
     ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
     ; CHECK: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY]], 0, 15
     ; CHECK: [[UBFMWri1:%[0-9]+]]:gpr32 = UBFMWri [[COPY]], 15, 30
-    ; CHECK: $h0 = COPY [[UBFMWri]]
-    ; CHECK: $h1 = COPY [[UBFMWri1]]
+    ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY [[UBFMWri]]
+    ; CHECK: [[COPY2:%[0-9]+]]:fpr16 = COPY [[COPY1]].hsub
+    ; CHECK: $h0 = COPY [[COPY2]]
+    ; CHECK: [[COPY3:%[0-9]+]]:fpr32 = COPY [[UBFMWri1]]
+    ; CHECK: [[COPY4:%[0-9]+]]:fpr16 = COPY [[COPY3]].hsub
+    ; CHECK: $h1 = COPY [[COPY4]]
     %0:gpr(s32) = COPY $w0
 
     %1:gpr(s16) = G_EXTRACT %0, 0
-- 
2.7.4