From: Jessica Paquette <jpaquette@apple.com>
Date: Wed, 4 Sep 2019 22:54:52 +0000 (+0000)
Subject: [AArch64][GlobalISel] Teach AArch64CallLowering to handle basic sibling calls
X-Git-Tag: llvmorg-11-init~10008
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=b78324fc4087f0d9faf66ece89438a84041d5fa8;p=platform%2Fupstream%2Fllvm.git

[AArch64][GlobalISel] Teach AArch64CallLowering to handle basic sibling calls

This adds support for basic sibling call lowering in AArch64. The intent here is
to only handle tail calls which do not change the ABI (hence, sibling calls.)

At this point, it is very restricted. It does not handle

- Vararg calls.
- Calls with outgoing arguments.
- Calls whose calling conventions differ from the caller's calling convention.
- Tail/sibling calls with BTI enabled.

This patch adds

- `AArch64CallLowering::isEligibleForTailCallOptimization`, which is equivalent
   to the same function in AArch64ISelLowering.cpp (albeit with the restrictions
   above.)
- `mayTailCallThisCC` and `canGuaranteeTCO`, which are identical to those in
   AArch64ISelLowering.cpp.
- `getCallOpcode`, which is exactly what it sounds like.

Tail/sibling calls are lowered by checking if they pass target-independent tail
call positioning checks, and checking if they satisfy
`isEligibleForTailCallOptimization`. If they do, then a tail call instruction is
emitted instead of a normal call. If we have a sibling call (which is always the
case in this patch), then we do not emit any stack adjustment operations. When
we go to lower a return, we check if we've already emitted a tail call. If so,
then we skip the return lowering.

For testing, this patch

- Adds call-translator-tail-call.ll to test which tail calls we currently lower,
  which ones we don't, and which ones we shouldn't.
- Updates branch-target-enforcement-indirect-calls.ll to show that we fall back
  as expected.

Differential Revision: https://reviews.llvm.org/D67189

llvm-svn: 370996
---

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
index e996cf1..cfdf3f5 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
@@ -90,6 +90,13 @@ public:
 
     /// True if the call must be tail call optimized.
     bool IsMustTailCall = false;
+
+    /// True if the call passes all target-independent checks for tail call
+    /// optimization.
+    bool IsTailCall = false;
+
+    /// True if the call is to a vararg function.
+    bool IsVarArg = false;
   };
 
   /// Argument handling is mostly uniform between the four places that
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index d433155..1c8e454 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -64,7 +64,9 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, ImmutableCallSite CS,
   Info.CallConv = CS.getCallingConv();
   Info.SwiftErrorVReg = SwiftErrorVReg;
   Info.IsMustTailCall = CS.isMustTailCall();
-
+  Info.IsTailCall = CS.isTailCall() &&
+                    isInTailCallPosition(CS, MIRBuilder.getMF().getTarget());
+  Info.IsVarArg = CS.getFunctionType()->isVarArg();
   return lowerCall(MIRBuilder, Info);
 }
 
diff --git a/llvm/lib/Target/AArch64/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/AArch64CallLowering.cpp
index 9ace33b..a8a1389 100644
--- a/llvm/lib/Target/AArch64/AArch64CallLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64CallLowering.cpp
@@ -233,6 +233,17 @@ bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
                                       const Value *Val,
                                       ArrayRef<Register> VRegs,
                                       Register SwiftErrorVReg) const {
+
+  // Check if a tail call was lowered in this block. If so, we already handled
+  // the terminator.
+  MachineFunction &MF = MIRBuilder.getMF();
+  if (MF.getFrameInfo().hasTailCall()) {
+    MachineBasicBlock &MBB = MIRBuilder.getMBB();
+    auto FirstTerm = MBB.getFirstTerminator();
+    if (FirstTerm != MBB.end() && FirstTerm->isCall())
+      return true;
+  }
+
   auto MIB = MIRBuilder.buildInstrNoInsert(AArch64::RET_ReallyLR);
   assert(((Val && !VRegs.empty()) || (!Val && VRegs.empty())) &&
          "Return value without a vreg");
@@ -403,6 +414,129 @@ bool AArch64CallLowering::lowerFormalArguments(
   return true;
 }
 
+/// Return true if the calling convention is one that we can guarantee TCO for.
+static bool canGuaranteeTCO(CallingConv::ID CC) {
+  return CC == CallingConv::Fast;
+}
+
+/// Return true if we might ever do TCO for calls with this calling convention.
+static bool mayTailCallThisCC(CallingConv::ID CC) {
+  switch (CC) {
+  case CallingConv::C:
+  case CallingConv::PreserveMost:
+  case CallingConv::Swift:
+    return true;
+  default:
+    return canGuaranteeTCO(CC);
+  }
+}
+
+bool AArch64CallLowering::isEligibleForTailCallOptimization(
+    MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info) const {
+  CallingConv::ID CalleeCC = Info.CallConv;
+  MachineFunction &MF = MIRBuilder.getMF();
+  const Function &CallerF = MF.getFunction();
+  CallingConv::ID CallerCC = CallerF.getCallingConv();
+  bool CCMatch = CallerCC == CalleeCC;
+
+  LLVM_DEBUG(dbgs() << "Attempting to lower call as tail call\n");
+
+  if (!mayTailCallThisCC(CalleeCC)) {
+    LLVM_DEBUG(dbgs() << "... Calling convention cannot be tail called.\n");
+    return false;
+  }
+
+  if (Info.IsVarArg) {
+    LLVM_DEBUG(dbgs() << "... Tail calling varargs not supported yet.\n");
+    return false;
+  }
+
+  // Byval parameters hand the function a pointer directly into the stack area
+  // we want to reuse during a tail call. Working around this *is* possible (see
+  // X86).
+  //
+  // FIXME: In AArch64ISelLowering, this isn't worked around. Can/should we try
+  // it?
+  //
+  // On Windows, "inreg" attributes signify non-aggregate indirect returns.
+  // In this case, it is necessary to save/restore X0 in the callee. Tail
+  // call opt interferes with this. So we disable tail call opt when the
+  // caller has an argument with "inreg" attribute.
+  //
+  // FIXME: Check whether the callee also has an "inreg" argument.
+  if (any_of(CallerF.args(), [](const Argument &A) {
+        return A.hasByValAttr() || A.hasInRegAttr();
+      })) {
+    LLVM_DEBUG(dbgs() << "... Cannot tail call from callers with byval or "
+                         "inreg arguments.\n");
+    return false;
+  }
+
+  // Externally-defined functions with weak linkage should not be
+  // tail-called on AArch64 when the OS does not support dynamic
+  // pre-emption of symbols, as the AAELF spec requires normal calls
+  // to undefined weak functions to be replaced with a NOP or jump to the
+  // next instruction. The behaviour of branch instructions in this
+  // situation (as used for tail calls) is implementation-defined, so we
+  // cannot rely on the linker replacing the tail call with a return.
+  if (Info.Callee.isGlobal()) {
+    const GlobalValue *GV = Info.Callee.getGlobal();
+    const Triple &TT = MF.getTarget().getTargetTriple();
+    if (GV->hasExternalWeakLinkage() &&
+        (!TT.isOSWindows() || TT.isOSBinFormatELF() ||
+         TT.isOSBinFormatMachO())) {
+      LLVM_DEBUG(dbgs() << "... Cannot tail call externally-defined function "
+                           "with weak linkage for this OS.\n");
+      return false;
+    }
+  }
+
+  // If we have -tailcallopt and matching CCs, at this point, we could return
+  // true. However, we don't have full tail call support yet. So, continue
+  // checking. We want to emit a sibling call.
+
+  // I want anyone implementing a new calling convention to think long and hard
+  // about this assert.
+  assert((!Info.IsVarArg || CalleeCC == CallingConv::C) &&
+         "Unexpected variadic calling convention");
+
+  // For now, only support the case where the calling conventions match.
+  if (!CCMatch) {
+    LLVM_DEBUG(
+        dbgs()
+        << "... Cannot tail call with mismatched calling conventions yet.\n");
+    return false;
+  }
+
+  // For now, only handle callees that take no arguments.
+  if (!Info.OrigArgs.empty()) {
+    LLVM_DEBUG(
+        dbgs()
+        << "... Cannot tail call callees with outgoing arguments yet.\n");
+    return false;
+  }
+
+  LLVM_DEBUG(
+      dbgs() << "... Call is eligible for tail call optimization.\n");
+  return true;
+}
+
+static unsigned getCallOpcode(const Function &CallerF, bool IsIndirect,
+                              bool IsTailCall) {
+  if (!IsTailCall)
+    return IsIndirect ? AArch64::BLR : AArch64::BL;
+
+  if (!IsIndirect)
+    return AArch64::TCRETURNdi;
+
+  // When BTI is enabled, we need to use TCRETURNriBTI to make sure that we use
+  // x16 or x17.
+  if (CallerF.hasFnAttribute("branch-target-enforcement"))
+    return AArch64::TCRETURNriBTI;
+
+  return AArch64::TCRETURNri;
+}
+
 bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
                                     CallLoweringInfo &Info) const {
   MachineFunction &MF = MIRBuilder.getMF();
@@ -411,6 +545,7 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
   auto &DL = F.getParent()->getDataLayout();
 
   if (Info.IsMustTailCall) {
+    // TODO: Until we lower all tail calls, we should fall back on this.
     LLVM_DEBUG(dbgs() << "Cannot lower musttail calls yet.\n");
     return false;
   }
@@ -423,6 +558,11 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
       SplitArgs.back().Flags[0].setZExt();
   }
 
+  bool IsSibCall =
+      Info.IsTailCall && isEligibleForTailCallOptimization(MIRBuilder, Info);
+  if (IsSibCall)
+    MF.getFrameInfo().setHasTailCall();
+
   // Find out which ABI gets to decide where things go.
   const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
   CCAssignFn *AssignFnFixed =
@@ -430,14 +570,33 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
   CCAssignFn *AssignFnVarArg =
       TLI.CCAssignFnForCall(Info.CallConv, /*IsVarArg=*/true);
 
-  auto CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN);
+  // If we have a sibling call, then we don't have to adjust the stack.
+  // Otherwise, we need to adjust it.
+  MachineInstrBuilder CallSeqStart;
+  if (!IsSibCall)
+    CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN);
 
   // Create a temporarily-floating call instruction so we can add the implicit
   // uses of arg registers.
-  auto MIB = MIRBuilder.buildInstrNoInsert(Info.Callee.isReg() ? AArch64::BLR
-                                                               : AArch64::BL);
+  unsigned Opc = getCallOpcode(F, Info.Callee.isReg(), IsSibCall);
+
+  // TODO: Right now, regbankselect doesn't know how to handle the rtcGPR64
+  // register class. Until we can do that, we should fall back here.
+  if (Opc == AArch64::TCRETURNriBTI) {
+    LLVM_DEBUG(
+        dbgs() << "Cannot lower indirect tail calls with BTI enabled yet.\n");
+    return false;
+  }
+
+  auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
   MIB.add(Info.Callee);
 
+  // Add the byte offset for the tail call. We only have sibling calls, so this
+  // is always 0.
+  // TODO: Handle tail calls where we will have a different value here.
+  if (IsSibCall)
+    MIB.addImm(0);
+
   // Tell the call which registers are clobbered.
   auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
   const uint32_t *Mask = TRI->getCallPreservedMask(MF, F.getCallingConv());
@@ -486,10 +645,13 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
     MIRBuilder.buildCopy(Info.SwiftErrorVReg, Register(AArch64::X21));
   }
 
-  CallSeqStart.addImm(Handler.StackSize).addImm(0);
-  MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP)
-      .addImm(Handler.StackSize)
-      .addImm(0);
+  if (!IsSibCall) {
+    // If we aren't sibcalling, we need to move the stack.
+    CallSeqStart.addImm(Handler.StackSize).addImm(0);
+    MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP)
+        .addImm(Handler.StackSize)
+        .addImm(0);
+  }
 
   return true;
 }
diff --git a/llvm/lib/Target/AArch64/AArch64CallLowering.h b/llvm/lib/Target/AArch64/AArch64CallLowering.h
index 5da72286..0bf250b 100644
--- a/llvm/lib/Target/AArch64/AArch64CallLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64CallLowering.h
@@ -43,6 +43,10 @@ public:
   bool lowerCall(MachineIRBuilder &MIRBuilder,
                  CallLoweringInfo &Info) const override;
 
+  /// Returns true if the call can be lowered as a tail call.
+  bool isEligibleForTailCallOptimization(MachineIRBuilder &MIRBuilder,
+                                         CallLoweringInfo &Info) const;
+
   bool supportSwiftError() const override { return true; }
 
 private:
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call.ll b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call.ll
new file mode 100644
index 0000000..688c097
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call.ll
@@ -0,0 +1,134 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+; RUN: llc %s -stop-after=irtranslator -verify-machineinstrs -mtriple aarch64-apple-darwin -global-isel -o - 2>&1 | FileCheck %s --check-prefixes=DARWIN,COMMON
+; RUN: llc %s -stop-after=irtranslator -verify-machineinstrs -mtriple aarch64-windows -global-isel -o - 2>&1 | FileCheck %s --check-prefixes=WINDOWS,COMMON
+
+declare void @simple_fn()
+define void @tail_call() {
+  ; COMMON-LABEL: name: tail_call
+  ; COMMON: bb.1 (%ir-block.0):
+  ; COMMON:   TCRETURNdi @simple_fn, 0, csr_aarch64_aapcs, implicit $sp
+  tail call void @simple_fn()
+  ret void
+}
+
+; We should get a TCRETURNri here.
+; FIXME: We don't need the COPY.
+define void @indirect_tail_call(void()* %func) {
+  ; COMMON-LABEL: name: indirect_tail_call
+  ; COMMON: bb.1 (%ir-block.0):
+  ; COMMON:   liveins: $x0
+  ; COMMON:   [[COPY:%[0-9]+]]:tcgpr64(p0) = COPY $x0
+  ; COMMON:   TCRETURNri [[COPY]](p0), 0, csr_aarch64_aapcs, implicit $sp
+  tail call void %func()
+  ret void
+}
+
+declare void @outgoing_args_fn(i32)
+; Right now, callees with outgoing arguments should not be tail called.
+; TODO: Support this.
+define void @test_outgoing_args(i32 %a) {
+  ; COMMON-LABEL: name: test_outgoing_args
+  ; COMMON: bb.1 (%ir-block.0):
+  ; COMMON:   liveins: $w0
+  ; COMMON:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; COMMON:   ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+  ; COMMON:   $w0 = COPY [[COPY]](s32)
+  ; COMMON:   BL @outgoing_args_fn, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0
+  ; COMMON:   ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+  ; COMMON:   RET_ReallyLR
+  tail call void @outgoing_args_fn(i32 %a)
+  ret void
+}
+
+; Right now, this should not be tail called.
+; TODO: Support this.
+declare void @varargs(i32, double, i64, ...)
+define void @test_varargs() {
+  ; COMMON-LABEL: name: test_varargs
+  ; COMMON: bb.1 (%ir-block.0):
+  ; COMMON:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
+  ; COMMON:   [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
+  ; COMMON:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+  ; COMMON:   ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+  ; COMMON:   $w0 = COPY [[C]](s32)
+  ; COMMON:   $d0 = COPY [[C1]](s64)
+  ; COMMON:   $x1 = COPY [[C2]](s64)
+  ; COMMON:   BL @varargs, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0, implicit $d0, implicit $x1
+  ; COMMON:   ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+  ; COMMON:   RET_ReallyLR
+  tail call void(i32, double, i64, ...) @varargs(i32 42, double 1.0, i64 12)
+  ret void
+}
+
+; Unsupported calling convention for tail calls. Make sure we never tail call
+; it.
+declare ghccc void @bad_call_conv_fn()
+define void @test_bad_call_conv() {
+  ; COMMON-LABEL: name: test_bad_call_conv
+  ; COMMON: bb.1 (%ir-block.0):
+  ; COMMON:   ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+  ; COMMON:   BL @bad_call_conv_fn, csr_aarch64_aapcs, implicit-def $lr, implicit $sp
+  ; COMMON:   ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+  ; COMMON:   RET_ReallyLR
+  tail call ghccc void @bad_call_conv_fn()
+  ret void
+}
+
+; Shouldn't tail call when the caller has byval arguments.
+define void @test_byval(i8* byval %ptr) {
+  ; COMMON-LABEL: name: test_byval
+  ; COMMON: bb.1 (%ir-block.0):
+  ; COMMON:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
+  ; COMMON:   [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 8 from %fixed-stack.0, align 1)
+  ; COMMON:   ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+  ; COMMON:   BL @simple_fn, csr_aarch64_aapcs, implicit-def $lr, implicit $sp
+  ; COMMON:   ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+  ; COMMON:   RET_ReallyLR
+  tail call void @simple_fn()
+  ret void
+}
+
+; Shouldn't tail call when the caller has inreg arguments.
+define void @test_inreg(i8* inreg %ptr) {
+  ; COMMON-LABEL: name: test_inreg
+  ; COMMON: bb.1 (%ir-block.0):
+  ; COMMON:   liveins: $x0
+  ; COMMON:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; COMMON:   ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+  ; COMMON:   BL @simple_fn, csr_aarch64_aapcs, implicit-def $lr, implicit $sp
+  ; COMMON:   ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+  ; COMMON:   RET_ReallyLR
+  tail call void @simple_fn()
+  ret void
+}
+
+; Shouldn't tail call when the OS doesn't support it. Windows supports this,
+; so we should be able to tail call there.
+declare extern_weak void @extern_weak_fn()
+define void @test_extern_weak() {
+  ; DARWIN-LABEL: name: test_extern_weak
+  ; DARWIN: bb.1 (%ir-block.0):
+  ; DARWIN:   ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+  ; DARWIN:   BL @extern_weak_fn, csr_aarch64_aapcs, implicit-def $lr, implicit $sp
+  ; DARWIN:   ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+  ; DARWIN:   RET_ReallyLR
+  ; WINDOWS-LABEL: name: test_extern_weak
+  ; WINDOWS: bb.1 (%ir-block.0):
+  ; WINDOWS:   TCRETURNdi @extern_weak_fn, 0, csr_aarch64_aapcs, implicit $sp
+  tail call void @extern_weak_fn()
+  ret void
+}
+
+; Right now, mismatched calling conventions should not be tail called.
+; TODO: Support this.
+declare fastcc void @fast_fn()
+define void @test_mismatched_caller() {
+  ; COMMON-LABEL: name: test_mismatched_caller
+  ; COMMON: bb.1 (%ir-block.0):
+  ; COMMON:   ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+  ; COMMON:   BL @fast_fn, csr_aarch64_aapcs, implicit-def $lr, implicit $sp
+  ; COMMON:   ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+  ; COMMON:   RET_ReallyLR
+  tail call fastcc void @fast_fn()
+  ret void
+}
diff --git a/llvm/test/CodeGen/AArch64/branch-target-enforcement-indirect-calls.ll b/llvm/test/CodeGen/AArch64/branch-target-enforcement-indirect-calls.ll
index d7e3748..3fb9e32 100644
--- a/llvm/test/CodeGen/AArch64/branch-target-enforcement-indirect-calls.ll
+++ b/llvm/test/CodeGen/AArch64/branch-target-enforcement-indirect-calls.ll
@@ -1,4 +1,7 @@
 ; RUN: llc -mtriple aarch64--none-eabi -mattr=+bti < %s | FileCheck %s
+; RUN: llc -mtriple aarch64--none-eabi -global-isel -global-isel-abort=2 -pass-remarks-missed=gisel* -mattr=+bti %s -verify-machineinstrs -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,FALLBACK
+
+; FALLBACK: remark: <unknown>:0:0: unable to translate instruction: call: '  tail call void %p()' (in function: bti_enabled)
 
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 target triple = "aarch64-arm-none-eabi"
diff --git a/llvm/test/CodeGen/AArch64/dllimport.ll b/llvm/test/CodeGen/AArch64/dllimport.ll
index 281c847..cd440c3 100644
--- a/llvm/test/CodeGen/AArch64/dllimport.ll
+++ b/llvm/test/CodeGen/AArch64/dllimport.ll
@@ -59,4 +59,4 @@ define i32 @call_internal() {
 ; CHECK-LABEL: call_internal
 ; DAG-ISEL: b internal
 ; FAST-ISEL: b internal
-; GLOBAL-ISEL: bl internal
+; GLOBAL-ISEL: b internal