Like what has been done in AArch64 (D125335).
We enable this under `-O2` to show the codegen diffs here but we
may only do this under `-O3` like AArch64.
There are two cases that we may produce these eliminable copies:
1. ISel of `FrameIndex`. Like `rvv/fixed-vectors-calling-conv.ll`.
2. Tail duplication. Like `select-optimize-multiple.ll`.
Reviewed By: craig.topper
Differential Revision: https://reviews.llvm.org/D144535
"autovectorization with fixed width vectors."),
cl::init(-1), cl::Hidden);
+static cl::opt<bool> EnableRISCVCopyPropagation(
+ "riscv-enable-copy-propagation",
+ cl::desc("Enable the copy propagation with RISCV copy instr"),
+ cl::init(true), cl::Hidden);
+
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
RegisterTargetMachine<RISCVTargetMachine> X(getTheRISCV32Target());
RegisterTargetMachine<RISCVTargetMachine> Y(getTheRISCV64Target());
void RISCVPassConfig::addPreEmitPass2() {
addPass(createRISCVExpandPseudoPass());
+
+ // Do the copy propagation after expanding pseudos because we may produce some
+ // MVs when expanding.
+ if (TM->getOptLevel() >= CodeGenOpt::Default && EnableRISCVCopyPropagation)
+ addPass(createMachineCopyPropagationPass(true));
+
// Schedule the expansion of AMOs at the last possible moment, avoiding the
// possibility for other passes to break the requirements for forward
// progress in the LR/SC block.
; CHECK-NEXT: Machine Optimization Remark Emitter
; CHECK-NEXT: Stack Frame Layout Analysis
; CHECK-NEXT: RISCV pseudo instruction expansion pass
+; CHECK-NEXT: Machine Copy Propagation Pass
; CHECK-NEXT: RISCV atomic pseudo instruction expansion pass
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
; CHECK-NEXT: Machine Optimization Remark Emitter
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -O3 -mtriple=riscv64 -riscv-enable-copy-propagation=false | FileCheck %s --check-prefix=NOPROP
+; RUN: llc < %s -O3 -mtriple=riscv64 -riscv-enable-copy-propagation=true | FileCheck %s --check-prefix=PROP
+
+define void @copyprop_after_mbp(i32 %v, i32* %a, i32* %b, i32* %c, i32* %d) {
+; NOPROP-LABEL: copyprop_after_mbp:
+; NOPROP: # %bb.0:
+; NOPROP-NEXT: sext.w a0, a0
+; NOPROP-NEXT: li a5, 10
+; NOPROP-NEXT: bne a0, a5, .LBB0_2
+; NOPROP-NEXT: # %bb.1: # %bb.0
+; NOPROP-NEXT: li a0, 15
+; NOPROP-NEXT: sw a0, 0(a2)
+; NOPROP-NEXT: li a0, 1
+; NOPROP-NEXT: sw a0, 0(a1)
+; NOPROP-NEXT: li a0, 12
+; NOPROP-NEXT: sw a0, 0(a4)
+; NOPROP-NEXT: ret
+; NOPROP-NEXT: .LBB0_2: # %bb.1
+; NOPROP-NEXT: li a0, 0
+; NOPROP-NEXT: li a2, 25
+; NOPROP-NEXT: sw a2, 0(a3)
+; NOPROP-NEXT: sw a0, 0(a1)
+; NOPROP-NEXT: li a0, 12
+; NOPROP-NEXT: sw a0, 0(a4)
+; NOPROP-NEXT: ret
+;
+; PROP-LABEL: copyprop_after_mbp:
+; PROP: # %bb.0:
+; PROP-NEXT: sext.w a0, a0
+; PROP-NEXT: li a5, 10
+; PROP-NEXT: bne a0, a5, .LBB0_2
+; PROP-NEXT: # %bb.1: # %bb.0
+; PROP-NEXT: li a0, 15
+; PROP-NEXT: sw a0, 0(a2)
+; PROP-NEXT: li a0, 1
+; PROP-NEXT: sw a0, 0(a1)
+; PROP-NEXT: li a0, 12
+; PROP-NEXT: sw a0, 0(a4)
+; PROP-NEXT: ret
+; PROP-NEXT: .LBB0_2: # %bb.1
+; PROP-NEXT: li a2, 25
+; PROP-NEXT: sw a2, 0(a3)
+; PROP-NEXT: sw zero, 0(a1)
+; PROP-NEXT: li a0, 12
+; PROP-NEXT: sw a0, 0(a4)
+; PROP-NEXT: ret
+ %1 = icmp eq i32 %v, 10
+ br i1 %1, label %bb.0, label %bb.1
+
+bb.0:
+ store i32 15, i32* %b, align 4
+ br label %bb.2
+
+bb.1:
+ store i32 25, i32* %c, align 4
+ br label %bb.2
+
+bb.2:
+ %2 = phi i32 [ 1, %bb.0 ], [ 0, %bb.1 ]
+ store i32 %2, i32* %a, align 4
+ store i32 12, i32* %d, align 4
+ ret void
+}
; RV64IFD-NEXT: ret
; RV64IFD-NEXT: .LBB0_3: # %entry
; RV64IFD-NEXT: mv a0, a2
-; RV64IFD-NEXT: blt a1, a0, .LBB0_2
+; RV64IFD-NEXT: blt a1, a2, .LBB0_2
; RV64IFD-NEXT: .LBB0_4: # %entry
; RV64IFD-NEXT: lui a0, 524288
; RV64IFD-NEXT: ret
; RV64-NEXT: ret
; RV64-NEXT: .LBB3_3: # %entry
; RV64-NEXT: mv a0, a2
-; RV64-NEXT: blt a1, a0, .LBB3_2
+; RV64-NEXT: blt a1, a2, .LBB3_2
; RV64-NEXT: .LBB3_4: # %entry
; RV64-NEXT: lui a0, 524288
; RV64-NEXT: ret
; RV64IFD-NEXT: ret
; RV64IFD-NEXT: .LBB27_3: # %entry
; RV64IFD-NEXT: mv a0, a2
-; RV64IFD-NEXT: blt a1, a0, .LBB27_2
+; RV64IFD-NEXT: blt a1, a2, .LBB27_2
; RV64IFD-NEXT: .LBB27_4: # %entry
; RV64IFD-NEXT: lui a0, 524288
; RV64IFD-NEXT: ret
; RV64-NEXT: ret
; RV64-NEXT: .LBB30_3: # %entry
; RV64-NEXT: mv a0, a2
-; RV64-NEXT: blt a1, a0, .LBB30_2
+; RV64-NEXT: blt a1, a2, .LBB30_2
; RV64-NEXT: .LBB30_4: # %entry
; RV64-NEXT: lui a0, 524288
; RV64-NEXT: ret
; CHECK-NOV-NEXT: blt a0, a3, .LBB0_2
; CHECK-NOV-NEXT: .LBB0_6: # %entry
; CHECK-NOV-NEXT: mv a0, a3
-; CHECK-NOV-NEXT: blt a2, a0, .LBB0_3
+; CHECK-NOV-NEXT: blt a2, a3, .LBB0_3
; CHECK-NOV-NEXT: .LBB0_7: # %entry
; CHECK-NOV-NEXT: lui a0, 524288
; CHECK-NOV-NEXT: blt a2, a1, .LBB0_4
; CHECK-NOV-NEXT: blt a5, a6, .LBB3_4
; CHECK-NOV-NEXT: .LBB3_13: # %entry
; CHECK-NOV-NEXT: mv a5, a6
-; CHECK-NOV-NEXT: blt a3, a5, .LBB3_5
+; CHECK-NOV-NEXT: blt a3, a6, .LBB3_5
; CHECK-NOV-NEXT: .LBB3_14: # %entry
; CHECK-NOV-NEXT: lui a5, 524288
; CHECK-NOV-NEXT: blt a3, a4, .LBB3_6
; CHECK-NOV-NEXT: blt a3, a4, .LBB6_4
; CHECK-NOV-NEXT: .LBB6_13: # %entry
; CHECK-NOV-NEXT: mv a3, a4
-; CHECK-NOV-NEXT: blt a1, a3, .LBB6_5
+; CHECK-NOV-NEXT: blt a1, a4, .LBB6_5
; CHECK-NOV-NEXT: .LBB6_14: # %entry
; CHECK-NOV-NEXT: lui a3, 524288
; CHECK-NOV-NEXT: blt a1, a2, .LBB6_6
; CHECK-NOV-NEXT: blt a0, a3, .LBB27_2
; CHECK-NOV-NEXT: .LBB27_6: # %entry
; CHECK-NOV-NEXT: mv a0, a3
-; CHECK-NOV-NEXT: blt a2, a0, .LBB27_3
+; CHECK-NOV-NEXT: blt a2, a3, .LBB27_3
; CHECK-NOV-NEXT: .LBB27_7: # %entry
; CHECK-NOV-NEXT: lui a0, 524288
; CHECK-NOV-NEXT: blt a2, a1, .LBB27_4
; CHECK-NOV-NEXT: blt a5, a6, .LBB30_4
; CHECK-NOV-NEXT: .LBB30_13: # %entry
; CHECK-NOV-NEXT: mv a5, a6
-; CHECK-NOV-NEXT: blt a3, a5, .LBB30_5
+; CHECK-NOV-NEXT: blt a3, a6, .LBB30_5
; CHECK-NOV-NEXT: .LBB30_14: # %entry
; CHECK-NOV-NEXT: lui a5, 524288
; CHECK-NOV-NEXT: blt a3, a4, .LBB30_6
; CHECK-NOV-NEXT: blt a3, a4, .LBB33_4
; CHECK-NOV-NEXT: .LBB33_13: # %entry
; CHECK-NOV-NEXT: mv a3, a4
-; CHECK-NOV-NEXT: blt a1, a3, .LBB33_5
+; CHECK-NOV-NEXT: blt a1, a4, .LBB33_5
; CHECK-NOV-NEXT: .LBB33_14: # %entry
; CHECK-NOV-NEXT: lui a3, 524288
; CHECK-NOV-NEXT: blt a1, a2, .LBB33_6
; LMULMAX8-LABEL: vector_arg_via_stack:
; LMULMAX8: # %bb.0:
; LMULMAX8-NEXT: li a0, 32
-; LMULMAX8-NEXT: mv a1, sp
; LMULMAX8-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; LMULMAX8-NEXT: vle32.v v16, (a1)
+; LMULMAX8-NEXT: vle32.v v16, (sp)
; LMULMAX8-NEXT: vadd.vv v8, v8, v16
; LMULMAX8-NEXT: ret
;
; LMULMAX4-LABEL: vector_arg_via_stack:
; LMULMAX4: # %bb.0:
; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, ta, ma
-; LMULMAX4-NEXT: mv a0, sp
-; LMULMAX4-NEXT: vle32.v v16, (a0)
+; LMULMAX4-NEXT: vle32.v v16, (sp)
; LMULMAX4-NEXT: addi a0, sp, 64
; LMULMAX4-NEXT: vle32.v v20, (a0)
; LMULMAX4-NEXT: vadd.vv v8, v8, v16
; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; LMULMAX2-NEXT: addi a0, sp, 64
; LMULMAX2-NEXT: vle32.v v16, (a0)
-; LMULMAX2-NEXT: mv a0, sp
-; LMULMAX2-NEXT: vle32.v v18, (a0)
+; LMULMAX2-NEXT: vle32.v v18, (sp)
; LMULMAX2-NEXT: addi a0, sp, 32
; LMULMAX2-NEXT: vle32.v v20, (a0)
; LMULMAX2-NEXT: addi a0, sp, 96
; LMULMAX1-NEXT: vle32.v v19, (a0)
; LMULMAX1-NEXT: addi a0, sp, 32
; LMULMAX1-NEXT: vle32.v v20, (a0)
-; LMULMAX1-NEXT: mv a0, sp
-; LMULMAX1-NEXT: vle32.v v21, (a0)
+; LMULMAX1-NEXT: vle32.v v21, (sp)
; LMULMAX1-NEXT: addi a0, sp, 16
; LMULMAX1-NEXT: vle32.v v22, (a0)
; LMULMAX1-NEXT: addi a0, sp, 48
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB0_3:
; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: blt a1, a0, .LBB0_2
+; RV64I-NEXT: blt a1, a2, .LBB0_2
; RV64I-NEXT: .LBB0_4:
; RV64I-NEXT: lui a0, 524288
; RV64I-NEXT: ret
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB0_3:
; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: blt a1, a0, .LBB0_2
+; RV64I-NEXT: blt a1, a2, .LBB0_2
; RV64I-NEXT: .LBB0_4:
; RV64I-NEXT: lui a0, 524288
; RV64I-NEXT: ret
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB6_3: # %entry
; RV32I-NEXT: mv a1, a2
-; RV32I-NEXT: mv a2, a1
; RV32I-NEXT: beq a0, a4, .LBB6_2
; RV32I-NEXT: .LBB6_4: # %entry
-; RV32I-NEXT: mv a2, a3
-; RV32I-NEXT: add a0, a1, a2
+; RV32I-NEXT: add a0, a1, a3
; RV32I-NEXT: ret
;
; RV64I-LABEL: cmovccdep:
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB6_3: # %entry
; RV64I-NEXT: mv a1, a2
-; RV64I-NEXT: mv a2, a1
; RV64I-NEXT: beq a0, a4, .LBB6_2
; RV64I-NEXT: .LBB6_4: # %entry
-; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: addw a0, a1, a2
+; RV64I-NEXT: addw a0, a1, a3
; RV64I-NEXT: ret
entry:
%cmp = icmp eq i32 %a, 123
; RV32I-NEXT: mv a2, a3
; RV32I-NEXT: bnez a1, .LBB7_2
; RV32I-NEXT: .LBB7_4: # %entry
-; RV32I-NEXT: mv a4, a5
-; RV32I-NEXT: add a0, a2, a4
+; RV32I-NEXT: add a0, a2, a5
; RV32I-NEXT: ret
;
; RV64I-LABEL: cmovdiffcc:
; RV64I-NEXT: mv a2, a3
; RV64I-NEXT: bnez a1, .LBB7_2
; RV64I-NEXT: .LBB7_4: # %entry
-; RV64I-NEXT: mv a4, a5
-; RV64I-NEXT: addw a0, a2, a4
+; RV64I-NEXT: addw a0, a2, a5
; RV64I-NEXT: ret
entry:
%cond1 = select i1 %a, i32 %c, i32 %d
; RV32I-NEXT: addi a4, a4, -1920
; RV32I-NEXT: add a1, a1, a4
; RV32I-NEXT: add a0, a0, a4
-; RV32I-NEXT: bge a3, a2, .LBB1_2
+; RV32I-NEXT: blez a2, .LBB1_2
; RV32I-NEXT: .LBB1_1: # %while_body
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: addi a4, a3, 1
; RV32I-NEXT: sw a4, 0(a1)
; RV32I-NEXT: sw a3, 4(a1)
; RV32I-NEXT: mv a3, a4
-; RV32I-NEXT: blt a3, a2, .LBB1_1
+; RV32I-NEXT: blt a4, a2, .LBB1_1
; RV32I-NEXT: .LBB1_2: # %while_end
; RV32I-NEXT: ret
;
; RV64I-NEXT: add a1, a1, a4
; RV64I-NEXT: add a0, a0, a4
; RV64I-NEXT: sext.w a2, a2
-; RV64I-NEXT: bge a3, a2, .LBB1_2
+; RV64I-NEXT: blez a2, .LBB1_2
; RV64I-NEXT: .LBB1_1: # %while_body
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: addiw a4, a3, 1
; RV64I-NEXT: sw a4, 0(a1)
; RV64I-NEXT: sw a3, 4(a1)
; RV64I-NEXT: mv a3, a4
-; RV64I-NEXT: blt a3, a2, .LBB1_1
+; RV64I-NEXT: blt a4, a2, .LBB1_1
; RV64I-NEXT: .LBB1_2: # %while_end
; RV64I-NEXT: ret
entry:
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB0_3:
; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: blt a1, a0, .LBB0_2
+; RV64I-NEXT: blt a1, a2, .LBB0_2
; RV64I-NEXT: .LBB0_4:
; RV64I-NEXT: lui a0, 524288
; RV64I-NEXT: ret
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB0_3:
; RV64I-NEXT: mv a0, a2
-; RV64I-NEXT: blt a1, a0, .LBB0_2
+; RV64I-NEXT: blt a1, a2, .LBB0_2
; RV64I-NEXT: .LBB0_4:
; RV64I-NEXT: lui a0, 524288
; RV64I-NEXT: ret
; CHECK-NEXT: vsetvli a4, a2, e8, m8, ta, ma
; CHECK-NEXT: bne a4, a2, .LBB0_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a3, a0
; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; CHECK-NEXT: vle8.v v8, (a1)
-; CHECK-NEXT: vse8.v v8, (a3)
+; CHECK-NEXT: vse8.v v8, (a0)
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB0_2: # %if.then
; CHECK-NEXT: add a2, a0, a2