add_llvm_target(RISCVCodeGen
RISCVAsmPrinter.cpp
RISCVCallLowering.cpp
+ RISCVCodeGenPrepare.cpp
RISCVMakeCompressible.cpp
RISCVExpandAtomicPseudoInsts.cpp
RISCVExpandPseudoInsts.cpp
class MachineOperand;
class PassRegistry;
+FunctionPass *createRISCVCodeGenPreparePass();
+void initializeRISCVCodeGenPreparePass(PassRegistry &);
+
bool lowerRISCVMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
AsmPrinter &AP);
bool lowerRISCVMachineOperandToMCOperand(const MachineOperand &MO,
--- /dev/null
+//===----- RISCVCodeGenPrepare.cpp ----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a RISCV specific version of CodeGenPrepare.
+// It munges the code in the input function to better prepare it for
+// SelectionDAG-based code generation. This works around limitations in it's
+// basic-block-at-a-time approach.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RISCV.h"
+#include "RISCVTargetMachine.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "riscv-codegenprepare"
+#define PASS_NAME "RISCV CodeGenPrepare"
+
+STATISTIC(NumZExtToSExt, "Number of SExt instructions converted to ZExt");
+
+namespace {
+
+class RISCVCodeGenPrepare : public FunctionPass {
+ const DataLayout *DL;
+ const RISCVSubtarget *ST;
+
+public:
+ static char ID;
+
+ RISCVCodeGenPrepare() : FunctionPass(ID) {}
+
+ bool runOnFunction(Function &F) override;
+
+ StringRef getPassName() const override { return PASS_NAME; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<TargetPassConfig>();
+ }
+
+private:
+ bool optimizeZExt(ZExtInst *I);
+};
+
+} // end anonymous namespace
+
+bool RISCVCodeGenPrepare::optimizeZExt(ZExtInst *ZExt) {
+ if (!ST->is64Bit())
+ return false;
+
+ Value *Src = ZExt->getOperand(0);
+
+ // We only care about ZExt from i32 to i64.
+ if (!ZExt->getType()->isIntegerTy(64) || !Src->getType()->isIntegerTy(32))
+ return false;
+
+ // Look for an opportunity to replace (i64 (zext (i32 X))) with a sext if we
+ // can determine that the sign bit of X is zero via a dominating condition.
+ // This often occurs with widened induction variables.
+ if (isImpliedByDomCondition(ICmpInst::ICMP_SGE, Src,
+ Constant::getNullValue(Src->getType()), ZExt,
+ *DL)) {
+ IRBuilder<> Builder(ZExt);
+ Value *SExt = Builder.CreateSExt(Src, ZExt->getType());
+ SExt->takeName(ZExt);
+
+ ZExt->replaceAllUsesWith(SExt);
+ ZExt->eraseFromParent();
+ ++NumZExtToSExt;
+ return true;
+ }
+
+ return false;
+}
+
+bool RISCVCodeGenPrepare::runOnFunction(Function &F) {
+ if (skipFunction(F))
+ return false;
+
+ auto &TPC = getAnalysis<TargetPassConfig>();
+ auto &TM = TPC.getTM<RISCVTargetMachine>();
+ ST = &TM.getSubtarget<RISCVSubtarget>(F);
+
+ DL = &F.getParent()->getDataLayout();
+
+ bool MadeChange = false;
+ for (auto &BB : F) {
+ for (Instruction &I : llvm::make_early_inc_range(BB)) {
+ if (auto *ZExt = dyn_cast<ZExtInst>(&I))
+ MadeChange |= optimizeZExt(ZExt);
+ }
+ }
+
+ return MadeChange;
+}
+
+INITIALIZE_PASS_BEGIN(RISCVCodeGenPrepare, DEBUG_TYPE, PASS_NAME, false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_END(RISCVCodeGenPrepare, DEBUG_TYPE, PASS_NAME, false, false)
+
+char RISCVCodeGenPrepare::ID = 0;
+
+FunctionPass *llvm::createRISCVCodeGenPreparePass() {
+ return new RISCVCodeGenPrepare();
+}
initializeGlobalISel(*PR);
initializeRISCVMakeCompressibleOptPass(*PR);
initializeRISCVGatherScatterLoweringPass(*PR);
+ initializeRISCVCodeGenPreparePass(*PR);
initializeRISCVMergeBaseOffsetOptPass(*PR);
initializeRISCVSExtWRemovalPass(*PR);
initializeRISCVExpandPseudoPass(*PR);
}
void addIRPasses() override;
+ void addCodeGenPrepare() override;
bool addPreISel() override;
bool addInstSelector() override;
bool addIRTranslator() override;
TargetPassConfig::addIRPasses();
}
+void RISCVPassConfig::addCodeGenPrepare() {
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(createRISCVCodeGenPreparePass());
+ TargetPassConfig::addCodeGenPrepare();
+}
+
bool RISCVPassConfig::addPreISel() {
if (TM->getOptLevel() != CodeGenOpt::None) {
// Add a barrier before instruction selection so that we will not get
; CHECK-NEXT: Expand reduction intrinsics
; CHECK-NEXT: Natural Loop Information
; CHECK-NEXT: TLS Variable Hoist
+; CHECK-NEXT: RISCV CodeGenPrepare
; CHECK-NEXT: CodeGen Prepare
; CHECK-NEXT: Dominator Tree Construction
; CHECK-NEXT: Exception handling preparation
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=riscv64 | FileCheck %s
+
+
+; Make sure we don't emit a pair of shift for the zext in the preheader. We
+; can tell that bit 31 is 0 in the preheader and rely on %n already being
+; sign extended without adding zeros explicitly.
+define void @test1(ptr nocapture noundef %a, i32 noundef signext %n) {
+; CHECK-LABEL: test1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: blez a1, .LBB0_2
+; CHECK-NEXT: .LBB0_1: # %for.body
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: lw a2, 0(a0)
+; CHECK-NEXT: addiw a2, a2, 4
+; CHECK-NEXT: sw a2, 0(a0)
+; CHECK-NEXT: addi a1, a1, -1
+; CHECK-NEXT: addi a0, a0, 4
+; CHECK-NEXT: bnez a1, .LBB0_1
+; CHECK-NEXT: .LBB0_2: # %for.cond.cleanup
+; CHECK-NEXT: ret
+entry:
+ %cmp3 = icmp sgt i32 %n, 0
+ br i1 %cmp3, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader: ; preds = %entry
+ %wide.trip.count = zext i32 %n to i64
+ br label %for.body
+
+for.cond.cleanup: ; preds = %for.body, %entry
+ ret void
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+ %0 = load i32, ptr %arrayidx, align 4
+ %add = add nsw i32 %0, 4
+ store i32 %add, ptr %arrayidx, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt %s -S -riscv-codegenprepare -mtriple=riscv64 | FileCheck %s
+
+; Test that we can convert the %wide.trip.count zext to a sext. The dominating
+; condition %cmp3 ruled out %n being negative.
+define void @test1(ptr nocapture noundef %a, i32 noundef signext %n) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP3:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT: br i1 [[CMP3]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK: for.body.preheader:
+; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = sext i32 [[N]] to i64
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.cond.cleanup.loopexit:
+; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
+; CHECK: for.cond.cleanup:
+; CHECK-NEXT: ret void
+; CHECK: for.body:
+; CHECK-NEXT: [[LSR_IV5:%.*]] = phi i64 [ [[WIDE_TRIP_COUNT]], [[FOR_BODY_PREHEADER]] ], [ [[LSR_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[LSR_IV:%.*]] = phi ptr [ [[A:%.*]], [[FOR_BODY_PREHEADER]] ], [ [[UGLYGEP:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[LSR_IV]], align 4
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 4
+; CHECK-NEXT: store i32 [[ADD]], ptr [[LSR_IV]], align 4
+; CHECK-NEXT: [[UGLYGEP]] = getelementptr i8, ptr [[LSR_IV]], i64 4
+; CHECK-NEXT: [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV5]], -1
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]]
+;
+entry:
+ %cmp3 = icmp sgt i32 %n, 0
+ br i1 %cmp3, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader: ; preds = %entry
+ %wide.trip.count = zext i32 %n to i64
+ br label %for.body
+
+for.cond.cleanup.loopexit: ; preds = %for.body
+ br label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
+ ret void
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %lsr.iv5 = phi i64 [ %wide.trip.count, %for.body.preheader ], [ %lsr.iv.next, %for.body ]
+ %lsr.iv = phi ptr [ %a, %for.body.preheader ], [ %uglygep, %for.body ]
+ %0 = load i32, ptr %lsr.iv, align 4
+ %add = add nsw i32 %0, 4
+ store i32 %add, ptr %lsr.iv, align 4
+ %uglygep = getelementptr i8, ptr %lsr.iv, i64 4
+ %lsr.iv.next = add nsw i64 %lsr.iv5, -1
+ %exitcond.not = icmp eq i64 %lsr.iv.next, 0
+ br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body
+}