[AArch64][GlobalISel] Regbankselect + select @llvm.aarch64.neon.uaddlv

author Jessica Paquette <jpaquette@apple.com>

Tue, 13 Apr 2021 17:03:17 +0000 (10:03 -0700)

committer Jessica Paquette <jpaquette@apple.com>

Mon, 19 Apr 2021 17:47:49 +0000 (10:47 -0700)
author Jessica Paquette <jpaquette@apple.com>
Tue, 13 Apr 2021 17:03:17 +0000 (10:03 -0700)
committer Jessica Paquette <jpaquette@apple.com>
Mon, 19 Apr 2021 17:47:49 +0000 (10:47 -0700)
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h

index 19a5589..1a592b6 100644 (file)
--- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
@@ -380,5 +380,10 @@ int64_t getICmpTrueVal(const TargetLowering &TLI, bool IsVector, bool IsFP);
  /// Returns true if the given block should be optimized for size.
  bool shouldOptForSize(const MachineBasicBlock &MBB, ProfileSummaryInfo *PSI,
                        BlockFrequencyInfo *BFI);
+
+/// \returns the intrinsic ID for a G_INTRINSIC or G_INTRINSIC_W_SIDE_EFFECTS
+/// instruction \p MI.
+unsigned getIntrinsicID(const MachineInstr &MI);
+
  } // End namespace llvm.
  #endif
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp

index 68f51c3..0b89ae4 100644 (file)
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -989,3 +989,12 @@ bool llvm::shouldOptForSize(const MachineBasicBlock &MBB,
    return F.hasOptSize() || F.hasMinSize() ||
           llvm::shouldOptimizeForSize(MBB.getBasicBlock(), PSI, BFI);
  }
+
+unsigned llvm::getIntrinsicID(const MachineInstr &MI) {
+#ifndef NDEBUG
+  unsigned Opc = MI.getOpcode();
+  assert(Opc == TargetOpcode::G_INTRINSIC ||
+         Opc == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
+#endif
+  return MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID();
+}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp

index 7410c76..8b488ea 100644 (file)
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -17,6 +17,7 @@
  #include "llvm/ADT/SmallVector.h"
  #include "llvm/CodeGen/GlobalISel/RegisterBank.h"
  #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
  #include "llvm/CodeGen/LowLevelType.h"
  #include "llvm/CodeGen/MachineFunction.h"
  #include "llvm/CodeGen/MachineInstr.h"
@@ -25,6 +26,7 @@
  #include "llvm/CodeGen/TargetOpcodes.h"
  #include "llvm/CodeGen/TargetRegisterInfo.h"
  #include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/IntrinsicsAArch64.h"
  #include "llvm/Support/ErrorHandling.h"
  #include <algorithm>
  #include <cassert>
@@ -466,11 +468,24 @@ AArch64RegisterBankInfo::getSameKindOfOperandsMapping(
                                 getValueMapping(RBIdx, Size), NumOperands);
  }
  
+/// \returns true if a given intrinsic \p ID only uses and defines FPRs.
+static bool isFPIntrinsic(unsigned ID) {
+  // TODO: Add more intrinsics.
+  switch (ID) {
+  default:
+    return false;
+  case Intrinsic::aarch64_neon_uaddlv:
+    return true;
+  }
+}
+
  bool AArch64RegisterBankInfo::hasFPConstraints(const MachineInstr &MI,
                                                 const MachineRegisterInfo &MRI,
                                                 const TargetRegisterInfo &TRI,
                                                 unsigned Depth) const {
    unsigned Op = MI.getOpcode();
+  if (Op == TargetOpcode::G_INTRINSIC && isFPIntrinsic(getIntrinsicID(MI)))
+    return true;
  
    // Do we have an explicit floating point instruction?
    if (isPreISelGenericFloatingPointOpcode(Op))
@@ -915,6 +930,20 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
      // Assign them FPR for now.
      OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR, PMI_FirstFPR};
      break;
+  case TargetOpcode::G_INTRINSIC: {
+    // Check if we know that the intrinsic has any constraints on its register
+    // banks. If it does, then update the mapping accordingly.
+    unsigned ID = getIntrinsicID(MI);
+    unsigned Idx = 0;
+    if (!isFPIntrinsic(ID))
+      break;
+    for (const auto &Op : MI.explicit_operands()) {
+      if (Op.isReg())
+        OpRegBankIdx[Idx] = PMI_FirstFPR;
+      ++Idx;
+    }
+    break;
+  }
    }
  
    // Finally construct the computed mapping.
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-intrinsic.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-intrinsic.mir

new file mode 100644 (file)

index 0000000..7e89c99
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-intrinsic.mir
@@ -0,0 +1,70 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64 -run-pass=regbankselect -verify-machineinstrs %s -o - | FileCheck %s
+#
+# Verify register banks for intrinsics with known constraints. (E.g. all
+# operands must be FPRs.
+#
+
+...
+---
+name:            uaddlv_fpr
+alignment:       4
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $q0
+
+    ; CHECK-LABEL: name: uaddlv_fpr
+    ; CHECK: liveins: $q0
+    ; CHECK: %copy:fpr(<16 x s8>) = COPY $q0
+    ; CHECK: %intrin:fpr(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), %copy(<16 x s8>)
+    ; CHECK: $w0 = COPY %intrin(s32)
+    ; CHECK: RET_ReallyLR implicit $w0
+    %copy:_(<16 x s8>) = COPY $q0
+    %intrin:_(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), %copy(<16 x s8>)
+    $w0 = COPY %intrin(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            uaddlv_fpr_load
+alignment:       4
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0
+    ; CHECK-LABEL: name: uaddlv_fpr_load
+    ; CHECK: liveins: $x0
+    ; CHECK: %ptr:gpr(p0) = COPY $x0
+    ; CHECK: %load:fpr(<2 x s32>) = G_LOAD %ptr(p0) :: (load 8)
+    ; CHECK: %intrin:fpr(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), %load(<2 x s32>)
+    ; CHECK: $w0 = COPY %intrin(s32)
+    ; CHECK: RET_ReallyLR implicit $w0
+    %ptr:_(p0) = COPY $x0
+    %load:_(<2 x s32>) = G_LOAD %ptr :: (load 8)
+    %intrin:_(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), %load(<2 x s32>)
+    $w0 = COPY %intrin(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            uaddlv_fpr_store
+alignment:       4
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0, $x1
+    ; CHECK-LABEL: name: uaddlv_fpr_store
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK: %copy:gpr(<2 x s32>) = COPY $x0
+    ; CHECK: %ptr:gpr(p0) = COPY $x0
+    ; CHECK: [[COPY:%[0-9]+]]:fpr(<2 x s32>) = COPY %copy(<2 x s32>)
+    ; CHECK: %intrin:fpr(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), [[COPY]](<2 x s32>)
+    ; CHECK: G_STORE %intrin(s32), %ptr(p0) :: (store 4)
+    %copy:_(<2 x s32>) = COPY $x0
+    %ptr:_(p0) = COPY $x0
+    %intrin:_(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), %copy(<2 x s32>)
+    G_STORE %intrin, %ptr :: (store 4)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-intrinsic-uaddlv.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-intrinsic-uaddlv.mir

new file mode 100644 (file)

index 0000000..9a81493
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-intrinsic-uaddlv.mir
@@ -0,0 +1,109 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
+
+...
+---
+name:            uaddlv_v8s8
+legalized:       true
+regBankSelected: true
+body:             |
+  bb.0:
+    liveins: $d0
+    ; CHECK-LABEL: name: uaddlv_v8s8
+    ; CHECK: %copy:fpr64 = COPY $d0
+    ; CHECK: [[UADDLVv8i8v:%[0-9]+]]:fpr16 = UADDLVv8i8v %copy
+    ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[UADDLVv8i8v]], %subreg.hsub
+    ; CHECK: %intrin:fpr32 = COPY [[INSERT_SUBREG]].ssub
+    ; CHECK: $w0 = COPY %intrin
+    ; CHECK: RET_ReallyLR implicit $w0
+    %copy:fpr(<8 x s8>) = COPY $d0
+    %intrin:fpr(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), %copy(<8 x s8>)
+    $w0 = COPY %intrin(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            uaddlv_v16s8
+legalized:       true
+regBankSelected: true
+body:             |
+  bb.0:
+    liveins: $q0
+
+    ; CHECK-LABEL: name: uaddlv_v16s8
+    ; CHECK: %copy:fpr128 = COPY $q0
+    ; CHECK: [[UADDLVv16i8v:%[0-9]+]]:fpr16 = UADDLVv16i8v %copy
+    ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[UADDLVv16i8v]], %subreg.hsub
+    ; CHECK: %intrin:fpr32 = COPY [[INSERT_SUBREG]].ssub
+    ; CHECK: $w0 = COPY %intrin
+    ; CHECK: RET_ReallyLR implicit $w0
+    %copy:fpr(<16 x s8>) = COPY $q0
+    %intrin:fpr(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), %copy(<16 x s8>)
+    $w0 = COPY %intrin(s32)
+    RET_ReallyLR implicit $w0
+...
+---
+name:            uaddlv_v4s16
+legalized:       true
+regBankSelected: true
+body:             |
+  bb.0:
+    liveins: $d0
+    ; CHECK-LABEL: name: uaddlv_v4s16
+    ; CHECK: %copy:fpr64 = COPY $d0
+    ; CHECK: [[UADDLVv4i16v:%[0-9]+]]:fpr32 = UADDLVv4i16v %copy
+    ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[UADDLVv4i16v]], %subreg.ssub
+    ; CHECK: %intrin:fpr32 = COPY [[INSERT_SUBREG]].ssub
+    ; CHECK: $w0 = COPY %intrin
+    ; CHECK: RET_ReallyLR implicit $w0
+    %copy:fpr(<4 x s16>) = COPY $d0
+    %intrin:fpr(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), %copy(<4 x s16>)
+    $w0 = COPY %intrin(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            uaddlv_v8s16
+legalized:       true
+regBankSelected: true
+body:             |
+  bb.0:
+    liveins: $q0
+
+    ; CHECK-LABEL: name: uaddlv_v8s16
+    ; CHECK: %copy:fpr128 = COPY $q0
+    ; CHECK: [[UADDLVv8i16v:%[0-9]+]]:fpr32 = UADDLVv8i16v %copy
+    ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[UADDLVv8i16v]], %subreg.ssub
+    ; CHECK: %intrin:fpr32 = COPY [[INSERT_SUBREG]].ssub
+    ; CHECK: $w0 = COPY %intrin
+    ; CHECK: RET_ReallyLR implicit $w0
+    %copy:fpr(<8 x s16>) = COPY $q0
+    %intrin:fpr(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), %copy(<8 x s16>)
+    $w0 = COPY %intrin(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            uaddlv_v4s32
+legalized:       true
+regBankSelected: true
+body:             |
+  bb.0:
+    liveins: $q0
+
+    ; CHECK-LABEL: name: uaddlv_v4s32
+    ; CHECK: %copy:fpr128 = COPY $q0
+    ; CHECK: [[UADDLVv4i32v:%[0-9]+]]:fpr64 = UADDLVv4i32v %copy
+    ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[UADDLVv4i32v]], %subreg.dsub
+    ; CHECK: %intrin:fpr64 = COPY [[INSERT_SUBREG]].dsub
+    ; CHECK: $x0 = COPY %intrin
+    ; CHECK: RET_ReallyLR implicit $x0
+    %copy:fpr(<4 x s32>) = COPY $q0
+    %intrin:fpr(s64) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), %copy(<4 x s32>)
+    $x0 = COPY %intrin(s64)
+    RET_ReallyLR implicit $x0
author	Jessica Paquette <jpaquette@apple.com>
	Tue, 13 Apr 2021 17:03:17 +0000 (10:03 -0700)
committer	Jessica Paquette <jpaquette@apple.com>
	Mon, 19 Apr 2021 17:47:49 +0000 (10:47 -0700)
llvm/include/llvm/CodeGen/GlobalISel/Utils.h		patch \| blob \| history
llvm/lib/CodeGen/GlobalISel/Utils.cpp		patch \| blob \| history
llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp		patch \| blob \| history
llvm/test/CodeGen/AArch64/GlobalISel/regbank-intrinsic.mir	[new file with mode: 0644]	patch \| blob
llvm/test/CodeGen/AArch64/GlobalISel/select-intrinsic-uaddlv.mir	[new file with mode: 0644]	patch \| blob