[AArch64][GlobalISel] Regbankselect + select @llvm.aarch64.neon.uaddlv
authorJessica Paquette <jpaquette@apple.com>
Tue, 13 Apr 2021 17:03:17 +0000 (10:03 -0700)
committerJessica Paquette <jpaquette@apple.com>
Mon, 19 Apr 2021 17:47:49 +0000 (10:47 -0700)
It turns out we actually import a bunch of selection code for intrinsics. The
imported code checks that the register banks on the G_INTRINSIC instruction
are correct. If so, it goes ahead and selects it.

This adds code to AArch64RegisterBankInfo to allow us to correctly determine
register banks on intrinsics which have known register bank constraints.

For now, this only handles @llvm.aarch64.neon.uaddlv. This is necessary for
porting AArch64TargetLowering::LowerCTPOP.

Also add a utility for getting the intrinsic ID from a G_INTRINSIC instruction.
This seems a little nicer than having to know about how intrinsic instructions
are structured.

Differential Revision: https://reviews.llvm.org/D100398

llvm/include/llvm/CodeGen/GlobalISel/Utils.h
llvm/lib/CodeGen/GlobalISel/Utils.cpp
llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
llvm/test/CodeGen/AArch64/GlobalISel/regbank-intrinsic.mir [new file with mode: 0644]
llvm/test/CodeGen/AArch64/GlobalISel/select-intrinsic-uaddlv.mir [new file with mode: 0644]

index 19a5589..1a592b6 100644 (file)
@@ -380,5 +380,10 @@ int64_t getICmpTrueVal(const TargetLowering &TLI, bool IsVector, bool IsFP);
 /// Returns true if the given block should be optimized for size.
 bool shouldOptForSize(const MachineBasicBlock &MBB, ProfileSummaryInfo *PSI,
                       BlockFrequencyInfo *BFI);
+
+/// \returns the intrinsic ID for a G_INTRINSIC or G_INTRINSIC_W_SIDE_EFFECTS
+/// instruction \p MI.
+unsigned getIntrinsicID(const MachineInstr &MI);
+
 } // End namespace llvm.
 #endif
index 68f51c3..0b89ae4 100644 (file)
@@ -989,3 +989,12 @@ bool llvm::shouldOptForSize(const MachineBasicBlock &MBB,
   return F.hasOptSize() || F.hasMinSize() ||
          llvm::shouldOptimizeForSize(MBB.getBasicBlock(), PSI, BFI);
 }
+
+unsigned llvm::getIntrinsicID(const MachineInstr &MI) {
+#ifndef NDEBUG
+  unsigned Opc = MI.getOpcode();
+  assert(Opc == TargetOpcode::G_INTRINSIC ||
+         Opc == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
+#endif
+  return MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID();
+}
index 7410c76..8b488ea 100644 (file)
@@ -17,6 +17,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/GlobalISel/RegisterBank.h"
 #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
 #include "llvm/CodeGen/LowLevelType.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstr.h"
@@ -25,6 +26,7 @@
 #include "llvm/CodeGen/TargetOpcodes.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/IntrinsicsAArch64.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <algorithm>
 #include <cassert>
@@ -466,11 +468,24 @@ AArch64RegisterBankInfo::getSameKindOfOperandsMapping(
                                getValueMapping(RBIdx, Size), NumOperands);
 }
 
+/// \returns true if a given intrinsic \p ID only uses and defines FPRs.
+static bool isFPIntrinsic(unsigned ID) {
+  // TODO: Add more intrinsics.
+  switch (ID) {
+  default:
+    return false;
+  case Intrinsic::aarch64_neon_uaddlv:
+    return true;
+  }
+}
+
 bool AArch64RegisterBankInfo::hasFPConstraints(const MachineInstr &MI,
                                                const MachineRegisterInfo &MRI,
                                                const TargetRegisterInfo &TRI,
                                                unsigned Depth) const {
   unsigned Op = MI.getOpcode();
+  if (Op == TargetOpcode::G_INTRINSIC && isFPIntrinsic(getIntrinsicID(MI)))
+    return true;
 
   // Do we have an explicit floating point instruction?
   if (isPreISelGenericFloatingPointOpcode(Op))
@@ -915,6 +930,20 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
     // Assign them FPR for now.
     OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR, PMI_FirstFPR};
     break;
+  case TargetOpcode::G_INTRINSIC: {
+    // Check if we know that the intrinsic has any constraints on its register
+    // banks. If it does, then update the mapping accordingly.
+    unsigned ID = getIntrinsicID(MI);
+    unsigned Idx = 0;
+    if (!isFPIntrinsic(ID))
+      break;
+    for (const auto &Op : MI.explicit_operands()) {
+      if (Op.isReg())
+        OpRegBankIdx[Idx] = PMI_FirstFPR;
+      ++Idx;
+    }
+    break;
+  }
   }
 
   // Finally construct the computed mapping.
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-intrinsic.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-intrinsic.mir
new file mode 100644 (file)
index 0000000..7e89c99
--- /dev/null
@@ -0,0 +1,70 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64 -run-pass=regbankselect -verify-machineinstrs %s -o - | FileCheck %s
+#
+# Verify register banks for intrinsics with known constraints. (E.g. all
+# operands must be FPRs.
+#
+
+...
+---
+name:            uaddlv_fpr
+alignment:       4
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $q0
+
+    ; CHECK-LABEL: name: uaddlv_fpr
+    ; CHECK: liveins: $q0
+    ; CHECK: %copy:fpr(<16 x s8>) = COPY $q0
+    ; CHECK: %intrin:fpr(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), %copy(<16 x s8>)
+    ; CHECK: $w0 = COPY %intrin(s32)
+    ; CHECK: RET_ReallyLR implicit $w0
+    %copy:_(<16 x s8>) = COPY $q0
+    %intrin:_(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), %copy(<16 x s8>)
+    $w0 = COPY %intrin(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            uaddlv_fpr_load
+alignment:       4
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0
+    ; CHECK-LABEL: name: uaddlv_fpr_load
+    ; CHECK: liveins: $x0
+    ; CHECK: %ptr:gpr(p0) = COPY $x0
+    ; CHECK: %load:fpr(<2 x s32>) = G_LOAD %ptr(p0) :: (load 8)
+    ; CHECK: %intrin:fpr(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), %load(<2 x s32>)
+    ; CHECK: $w0 = COPY %intrin(s32)
+    ; CHECK: RET_ReallyLR implicit $w0
+    %ptr:_(p0) = COPY $x0
+    %load:_(<2 x s32>) = G_LOAD %ptr :: (load 8)
+    %intrin:_(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), %load(<2 x s32>)
+    $w0 = COPY %intrin(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            uaddlv_fpr_store
+alignment:       4
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0, $x1
+    ; CHECK-LABEL: name: uaddlv_fpr_store
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK: %copy:gpr(<2 x s32>) = COPY $x0
+    ; CHECK: %ptr:gpr(p0) = COPY $x0
+    ; CHECK: [[COPY:%[0-9]+]]:fpr(<2 x s32>) = COPY %copy(<2 x s32>)
+    ; CHECK: %intrin:fpr(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), [[COPY]](<2 x s32>)
+    ; CHECK: G_STORE %intrin(s32), %ptr(p0) :: (store 4)
+    %copy:_(<2 x s32>) = COPY $x0
+    %ptr:_(p0) = COPY $x0
+    %intrin:_(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), %copy(<2 x s32>)
+    G_STORE %intrin, %ptr :: (store 4)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-intrinsic-uaddlv.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-intrinsic-uaddlv.mir
new file mode 100644 (file)
index 0000000..9a81493
--- /dev/null
@@ -0,0 +1,109 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
+
+...
+---
+name:            uaddlv_v8s8
+legalized:       true
+regBankSelected: true
+body:             |
+  bb.0:
+    liveins: $d0
+    ; CHECK-LABEL: name: uaddlv_v8s8
+    ; CHECK: %copy:fpr64 = COPY $d0
+    ; CHECK: [[UADDLVv8i8v:%[0-9]+]]:fpr16 = UADDLVv8i8v %copy
+    ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[UADDLVv8i8v]], %subreg.hsub
+    ; CHECK: %intrin:fpr32 = COPY [[INSERT_SUBREG]].ssub
+    ; CHECK: $w0 = COPY %intrin
+    ; CHECK: RET_ReallyLR implicit $w0
+    %copy:fpr(<8 x s8>) = COPY $d0
+    %intrin:fpr(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), %copy(<8 x s8>)
+    $w0 = COPY %intrin(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            uaddlv_v16s8
+legalized:       true
+regBankSelected: true
+body:             |
+  bb.0:
+    liveins: $q0
+
+    ; CHECK-LABEL: name: uaddlv_v16s8
+    ; CHECK: %copy:fpr128 = COPY $q0
+    ; CHECK: [[UADDLVv16i8v:%[0-9]+]]:fpr16 = UADDLVv16i8v %copy
+    ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[UADDLVv16i8v]], %subreg.hsub
+    ; CHECK: %intrin:fpr32 = COPY [[INSERT_SUBREG]].ssub
+    ; CHECK: $w0 = COPY %intrin
+    ; CHECK: RET_ReallyLR implicit $w0
+    %copy:fpr(<16 x s8>) = COPY $q0
+    %intrin:fpr(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), %copy(<16 x s8>)
+    $w0 = COPY %intrin(s32)
+    RET_ReallyLR implicit $w0
+...
+---
+name:            uaddlv_v4s16
+legalized:       true
+regBankSelected: true
+body:             |
+  bb.0:
+    liveins: $d0
+    ; CHECK-LABEL: name: uaddlv_v4s16
+    ; CHECK: %copy:fpr64 = COPY $d0
+    ; CHECK: [[UADDLVv4i16v:%[0-9]+]]:fpr32 = UADDLVv4i16v %copy
+    ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[UADDLVv4i16v]], %subreg.ssub
+    ; CHECK: %intrin:fpr32 = COPY [[INSERT_SUBREG]].ssub
+    ; CHECK: $w0 = COPY %intrin
+    ; CHECK: RET_ReallyLR implicit $w0
+    %copy:fpr(<4 x s16>) = COPY $d0
+    %intrin:fpr(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), %copy(<4 x s16>)
+    $w0 = COPY %intrin(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            uaddlv_v8s16
+legalized:       true
+regBankSelected: true
+body:             |
+  bb.0:
+    liveins: $q0
+
+    ; CHECK-LABEL: name: uaddlv_v8s16
+    ; CHECK: %copy:fpr128 = COPY $q0
+    ; CHECK: [[UADDLVv8i16v:%[0-9]+]]:fpr32 = UADDLVv8i16v %copy
+    ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[UADDLVv8i16v]], %subreg.ssub
+    ; CHECK: %intrin:fpr32 = COPY [[INSERT_SUBREG]].ssub
+    ; CHECK: $w0 = COPY %intrin
+    ; CHECK: RET_ReallyLR implicit $w0
+    %copy:fpr(<8 x s16>) = COPY $q0
+    %intrin:fpr(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), %copy(<8 x s16>)
+    $w0 = COPY %intrin(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            uaddlv_v4s32
+legalized:       true
+regBankSelected: true
+body:             |
+  bb.0:
+    liveins: $q0
+
+    ; CHECK-LABEL: name: uaddlv_v4s32
+    ; CHECK: %copy:fpr128 = COPY $q0
+    ; CHECK: [[UADDLVv4i32v:%[0-9]+]]:fpr64 = UADDLVv4i32v %copy
+    ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[UADDLVv4i32v]], %subreg.dsub
+    ; CHECK: %intrin:fpr64 = COPY [[INSERT_SUBREG]].dsub
+    ; CHECK: $x0 = COPY %intrin
+    ; CHECK: RET_ReallyLR implicit $x0
+    %copy:fpr(<4 x s32>) = COPY $q0
+    %intrin:fpr(s64) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), %copy(<4 x s32>)
+    $x0 = COPY %intrin(s64)
+    RET_ReallyLR implicit $x0