[AArch64][SVE] Teach copyPhysReg to copy ZPR2/3/4.
authorEli Friedman <efriedma@quicinc.com>
Thu, 23 Jul 2020 00:06:47 +0000 (17:06 -0700)
committerEli Friedman <efriedma@quicinc.com>
Thu, 23 Jul 2020 23:41:37 +0000 (16:41 -0700)
It's sort of tricky to hit this in practice, but not impossible. I have
a synthetic C testcase if anyone is interested.

The implementation is identical to the equivalent NEON register copies.

Differential Revision: https://reviews.llvm.org/D84373

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
llvm/test/CodeGen/AArch64/sve-copy-zprpair.mir [new file with mode: 0644]

index 5139ae5..08f80c9 100644 (file)
@@ -2744,6 +2744,35 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     return;
   }
 
+  // Copy a Z register pair by copying the individual sub-registers.
+  if (AArch64::ZPR2RegClass.contains(DestReg) &&
+      AArch64::ZPR2RegClass.contains(SrcReg)) {
+    static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1};
+    copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
+                     Indices);
+    return;
+  }
+
+  // Copy a Z register triple by copying the individual sub-registers.
+  if (AArch64::ZPR3RegClass.contains(DestReg) &&
+      AArch64::ZPR3RegClass.contains(SrcReg)) {
+    static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
+                                       AArch64::zsub2};
+    copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
+                     Indices);
+    return;
+  }
+
+  // Copy a Z register quad by copying the individual sub-registers.
+  if (AArch64::ZPR4RegClass.contains(DestReg) &&
+      AArch64::ZPR4RegClass.contains(SrcReg)) {
+    static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
+                                       AArch64::zsub2, AArch64::zsub3};
+    copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
+                     Indices);
+    return;
+  }
+
   if (AArch64::GPR64spRegClass.contains(DestReg) &&
       (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
     if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
diff --git a/llvm/test/CodeGen/AArch64/sve-copy-zprpair.mir b/llvm/test/CodeGen/AArch64/sve-copy-zprpair.mir
new file mode 100644 (file)
index 0000000..83a0b5d
--- /dev/null
@@ -0,0 +1,78 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -run-pass=postrapseudos -simplify-mir -verify-machineinstrs %s -o - | FileCheck %s
+
+---
+name:            copy_zpr2
+alignment:       4
+tracksRegLiveness: true
+liveins:
+  - { reg: '$z0_z1' }
+frameInfo:
+  maxCallFrameSize: 0
+body:             |
+  bb.0:
+    liveins: $z0_z1
+    ; CHECK-LABEL: name: copy_zpr2
+    ; CHECK: liveins: $z0_z1
+    ; CHECK: $z2 = ORR_ZZZ $z1, $z1
+    ; CHECK: $z1 = ORR_ZZZ $z0, $z0
+    ; CHECK: $z0 = ORR_ZZZ $z1, $z1
+    ; CHECK: $z1 = ORR_ZZZ $z2, $z2
+    ; CHECK: RET_ReallyLR
+    $z1_z2 = COPY $z0_z1
+    $z0_z1 = COPY $z1_z2
+    RET_ReallyLR
+
+...
+---
+name:            copy_zpr3
+alignment:       4
+tracksRegLiveness: true
+liveins:
+  - { reg: '$z0_z1_z2' }
+frameInfo:
+  maxCallFrameSize: 0
+body:             |
+  bb.0:
+    liveins: $z0_z1_z2
+    ; CHECK-LABEL: name: copy_zpr3
+    ; CHECK: liveins: $z0_z1_z2
+    ; CHECK: $z3 = ORR_ZZZ $z2, $z2
+    ; CHECK: $z2 = ORR_ZZZ $z1, $z1
+    ; CHECK: $z1 = ORR_ZZZ $z0, $z0
+    ; CHECK: $z0 = ORR_ZZZ $z1, $z1
+    ; CHECK: $z1 = ORR_ZZZ $z2, $z2
+    ; CHECK: $z2 = ORR_ZZZ $z3, $z3
+    ; CHECK: RET_ReallyLR
+    $z1_z2_z3 = COPY $z0_z1_z2
+    $z0_z1_z2 = COPY $z1_z2_z3
+    RET_ReallyLR
+
+...
+---
+name:            copy_zpr4
+alignment:       4
+tracksRegLiveness: true
+liveins:
+  - { reg: '$z0_z1_z2_z3' }
+frameInfo:
+  maxCallFrameSize: 0
+body:             |
+  bb.0:
+    liveins: $z0_z1_z2_z3
+    ; CHECK-LABEL: name: copy_zpr4
+    ; CHECK: liveins: $z0_z1_z2_z3
+    ; CHECK: $z4 = ORR_ZZZ $z3, $z3
+    ; CHECK: $z3 = ORR_ZZZ $z2, $z2
+    ; CHECK: $z2 = ORR_ZZZ $z1, $z1
+    ; CHECK: $z1 = ORR_ZZZ $z0, $z0
+    ; CHECK: $z0 = ORR_ZZZ $z1, $z1
+    ; CHECK: $z1 = ORR_ZZZ $z2, $z2
+    ; CHECK: $z2 = ORR_ZZZ $z3, $z3
+    ; CHECK: $z3 = ORR_ZZZ $z4, $z4
+    ; CHECK: RET_ReallyLR
+    $z1_z2_z3_z4 = COPY $z0_z1_z2_z3
+    $z0_z1_z2_z3 = COPY $z1_z2_z3_z4
+    RET_ReallyLR
+
+...