From 002005e6740e6de8f748ed234db08625cc533195 Mon Sep 17 00:00:00 2001 From: Hsiangkai Wang Date: Thu, 22 Dec 2022 07:07:37 +0000 Subject: [PATCH] [RISCV] Add integer scalar instructions to isAssociativeAndCommutative Inspired by D138107. We can add ADD, AND, OR, XOR, MUL, MIN[U]/MAX[U] to isAssociativeAndCommutative to increase instruction-level parallelism by the existing MachineCombiner pass. Differential Revision: https://reviews.llvm.org/D140530 --- llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 46 +- llvm/test/CodeGen/RISCV/addc-adde-sube-subc.ll | 4 +- llvm/test/CodeGen/RISCV/addcarry.ll | 10 +- llvm/test/CodeGen/RISCV/addimm-mulimm.ll | 40 +- llvm/test/CodeGen/RISCV/alu64.ll | 4 +- llvm/test/CodeGen/RISCV/bswap-bitreverse.ll | 36 +- .../RISCV/calling-conv-ilp32-ilp32f-common.ll | 32 +- .../calling-conv-ilp32-ilp32f-ilp32d-common.ll | 68 +- .../RISCV/calling-conv-lp64-lp64f-lp64d-common.ll | 38 +- llvm/test/CodeGen/RISCV/compress.ll | 2 +- llvm/test/CodeGen/RISCV/copysign-casts.ll | 24 +- llvm/test/CodeGen/RISCV/div-by-constant.ll | 6 +- llvm/test/CodeGen/RISCV/div-pow2.ll | 38 +- llvm/test/CodeGen/RISCV/div.ll | 6 +- llvm/test/CodeGen/RISCV/fpclamptosat.ll | 22 +- llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll | 24 +- llvm/test/CodeGen/RISCV/iabs.ll | 16 +- llvm/test/CodeGen/RISCV/machine-combiner.ll | 613 ++++++++++++- llvm/test/CodeGen/RISCV/mul.ll | 204 ++--- llvm/test/CodeGen/RISCV/neg-abs.ll | 8 +- llvm/test/CodeGen/RISCV/rv32zbb.ll | 6 +- llvm/test/CodeGen/RISCV/rv64zbb.ll | 10 +- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-elen.ll | 40 +- .../CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll | 12 +- llvm/test/CodeGen/RISCV/sadd_sat.ll | 8 +- llvm/test/CodeGen/RISCV/sadd_sat_plus.ll | 8 +- llvm/test/CodeGen/RISCV/select-binop-identity.ll | 4 +- llvm/test/CodeGen/RISCV/shadowcallstack.ll | 4 +- llvm/test/CodeGen/RISCV/split-udiv-by-constant.ll | 108 +-- llvm/test/CodeGen/RISCV/srem-lkk.ll | 4 +- .../test/CodeGen/RISCV/srem-seteq-illegal-types.ll | 26 +- llvm/test/CodeGen/RISCV/srem-vector-lkk.ll | 74 +- llvm/test/CodeGen/RISCV/ssub_sat.ll | 4 +- llvm/test/CodeGen/RISCV/ssub_sat_plus.ll | 4 +- llvm/test/CodeGen/RISCV/uadd_sat.ll | 4 +- llvm/test/CodeGen/RISCV/uadd_sat_plus.ll | 8 +- .../RISCV/umulo-128-legalisation-lowering.ll | 74 +- llvm/test/CodeGen/RISCV/unaligned-load-store.ll | 34 +- llvm/test/CodeGen/RISCV/urem-lkk.ll | 4 +- .../test/CodeGen/RISCV/urem-seteq-illegal-types.ll | 28 +- llvm/test/CodeGen/RISCV/urem-vector-lkk.ll | 78 +- llvm/test/CodeGen/RISCV/usub_sat.ll | 4 +- llvm/test/CodeGen/RISCV/usub_sat_plus.ll | 8 +- llvm/test/CodeGen/RISCV/vararg.ll | 56 +- ...e-scalar-shift-by-byte-multiple-legalization.ll | 990 ++++++++++----------- .../RISCV/wide-scalar-shift-legalization.ll | 896 +++++++++---------- llvm/test/CodeGen/RISCV/xaluo.ll | 384 ++++---- 47 files changed, 2388 insertions(+), 1733 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 742fee0..d73bb5b 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -1345,7 +1345,13 @@ bool RISCVInstrInfo::hasReassociableSibling(const MachineInstr &Inst, const MachineInstr &Sibling = *MRI.getVRegDef(Inst.getOperand(OperandIdx).getReg()); - return RISCV::hasEqualFRM(Inst, Sibling); + int16_t InstFrmOpIdx = + RISCV::getNamedOperandIdx(Inst.getOpcode(), RISCV::OpName::frm); + int16_t SiblingFrmOpIdx = + RISCV::getNamedOperandIdx(Sibling.getOpcode(), RISCV::OpName::frm); + + return (InstFrmOpIdx < 0 && SiblingFrmOpIdx < 0) || + RISCV::hasEqualFRM(Inst, Sibling); } bool RISCVInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst, @@ -1361,6 +1367,36 @@ bool RISCVInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst, if (isFADD(Opc) || isFMUL(Opc)) return Inst.getFlag(MachineInstr::MIFlag::FmReassoc) && Inst.getFlag(MachineInstr::MIFlag::FmNsz); + + switch (Opc) { + default: + return false; + case RISCV::ADD: + case RISCV::ADDW: + case RISCV::AND: + case RISCV::OR: + case RISCV::XOR: + // From RISC-V ISA spec, if both the high and low bits of the same product + // are required, then the recommended code sequence is: + // + // MULH[[S]U] rdh, rs1, rs2 + // MUL rdl, rs1, rs2 + // (source register specifiers must be in same order and rdh cannot be the + // same as rs1 or rs2) + // + // Microarchitectures can then fuse these into a single multiply operation + // instead of performing two separate multiplies. + // MachineCombiner may reassociate MUL operands and lose the fusion + // opportunity. + case RISCV::MUL: + case RISCV::MULW: + case RISCV::MIN: + case RISCV::MINU: + case RISCV::MAX: + case RISCV::MAXU: + return true; + } + return false; } @@ -1381,6 +1417,14 @@ RISCVInstrInfo::getInverseOpcode(unsigned Opcode) const { return RISCV::FADD_S; case RISCV::FSUB_D: return RISCV::FADD_D; + case RISCV::ADD: + return RISCV::SUB; + case RISCV::SUB: + return RISCV::ADD; + case RISCV::ADDW: + return RISCV::SUBW; + case RISCV::SUBW: + return RISCV::ADDW; } } diff --git a/llvm/test/CodeGen/RISCV/addc-adde-sube-subc.ll b/llvm/test/CodeGen/RISCV/addc-adde-sube-subc.ll index 5fd8261..268e2a8 100644 --- a/llvm/test/CodeGen/RISCV/addc-adde-sube-subc.ll +++ b/llvm/test/CodeGen/RISCV/addc-adde-sube-subc.ll @@ -7,9 +7,9 @@ define i64 @addc_adde(i64 %a, i64 %b) nounwind { ; RV32I-LABEL: addc_adde: ; RV32I: # %bb.0: -; RV32I-NEXT: add a1, a1, a3 ; RV32I-NEXT: add a2, a0, a2 ; RV32I-NEXT: sltu a0, a2, a0 +; RV32I-NEXT: add a0, a3, a0 ; RV32I-NEXT: add a1, a1, a0 ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: ret @@ -21,8 +21,8 @@ define i64 @subc_sube(i64 %a, i64 %b) nounwind { ; RV32I-LABEL: subc_sube: ; RV32I: # %bb.0: ; RV32I-NEXT: sltu a4, a0, a2 +; RV32I-NEXT: add a3, a3, a4 ; RV32I-NEXT: sub a1, a1, a3 -; RV32I-NEXT: sub a1, a1, a4 ; RV32I-NEXT: sub a0, a0, a2 ; RV32I-NEXT: ret %1 = sub i64 %a, %b diff --git a/llvm/test/CodeGen/RISCV/addcarry.ll b/llvm/test/CodeGen/RISCV/addcarry.ll index 053b987..6c300cb 100644 --- a/llvm/test/CodeGen/RISCV/addcarry.ll +++ b/llvm/test/CodeGen/RISCV/addcarry.ll @@ -18,12 +18,12 @@ define i64 @addcarry(i64 %x, i64 %y) nounwind { ; RISCV32-NEXT: sltu a7, a4, a6 ; RISCV32-NEXT: sltu a5, a6, a5 ; RISCV32-NEXT: mulhu a6, a0, a3 -; RISCV32-NEXT: add a5, a6, a5 -; RISCV32-NEXT: mulhu a6, a1, a2 -; RISCV32-NEXT: add a5, a5, a6 +; RISCV32-NEXT: mulhu t0, a1, a2 +; RISCV32-NEXT: add a5, a5, t0 +; RISCV32-NEXT: add a5, a5, a7 +; RISCV32-NEXT: mul a7, a1, a3 ; RISCV32-NEXT: add a5, a5, a7 -; RISCV32-NEXT: mul a6, a1, a3 -; RISCV32-NEXT: add a5, a5, a6 +; RISCV32-NEXT: add a5, a6, a5 ; RISCV32-NEXT: bgez a1, .LBB0_2 ; RISCV32-NEXT: # %bb.1: ; RISCV32-NEXT: sub a5, a5, a2 diff --git a/llvm/test/CodeGen/RISCV/addimm-mulimm.ll b/llvm/test/CodeGen/RISCV/addimm-mulimm.ll index 4454af8..83b4f03 100644 --- a/llvm/test/CodeGen/RISCV/addimm-mulimm.ll +++ b/llvm/test/CodeGen/RISCV/addimm-mulimm.ll @@ -51,11 +51,11 @@ define i64 @add_mul_combine_accept_a3(i64 %x) { ; RV32IMB-NEXT: li a2, 29 ; RV32IMB-NEXT: mul a1, a1, a2 ; RV32IMB-NEXT: mulhu a3, a0, a2 -; RV32IMB-NEXT: add a1, a3, a1 ; RV32IMB-NEXT: mul a2, a0, a2 ; RV32IMB-NEXT: addi a0, a2, 1073 ; RV32IMB-NEXT: sltu a2, a0, a2 ; RV32IMB-NEXT: add a1, a1, a2 +; RV32IMB-NEXT: add a1, a3, a1 ; RV32IMB-NEXT: ret ; ; RV64IMB-LABEL: add_mul_combine_accept_a3: @@ -121,13 +121,13 @@ define i64 @add_mul_combine_accept_b3(i64 %x) { ; RV32IMB-NEXT: li a2, 23 ; RV32IMB-NEXT: mul a1, a1, a2 ; RV32IMB-NEXT: mulhu a3, a0, a2 -; RV32IMB-NEXT: add a1, a3, a1 ; RV32IMB-NEXT: mul a2, a0, a2 ; RV32IMB-NEXT: lui a0, 50 ; RV32IMB-NEXT: addi a0, a0, 1119 ; RV32IMB-NEXT: add a0, a2, a0 ; RV32IMB-NEXT: sltu a2, a0, a2 ; RV32IMB-NEXT: add a1, a1, a2 +; RV32IMB-NEXT: add a1, a3, a1 ; RV32IMB-NEXT: ret ; ; RV64IMB-LABEL: add_mul_combine_accept_b3: @@ -187,13 +187,13 @@ define i64 @add_mul_combine_reject_a3(i64 %x) { ; RV32IMB-NEXT: li a2, 29 ; RV32IMB-NEXT: mul a1, a1, a2 ; RV32IMB-NEXT: mulhu a3, a0, a2 -; RV32IMB-NEXT: add a1, a3, a1 ; RV32IMB-NEXT: mul a2, a0, a2 ; RV32IMB-NEXT: lui a0, 14 ; RV32IMB-NEXT: addi a0, a0, -185 ; RV32IMB-NEXT: add a0, a2, a0 ; RV32IMB-NEXT: sltu a2, a0, a2 ; RV32IMB-NEXT: add a1, a1, a2 +; RV32IMB-NEXT: add a1, a3, a1 ; RV32IMB-NEXT: ret ; ; RV64IMB-LABEL: add_mul_combine_reject_a3: @@ -253,13 +253,13 @@ define i64 @add_mul_combine_reject_c3(i64 %x) { ; RV32IMB-NEXT: li a2, 73 ; RV32IMB-NEXT: mul a1, a1, a2 ; RV32IMB-NEXT: mulhu a3, a0, a2 -; RV32IMB-NEXT: add a1, a3, a1 ; RV32IMB-NEXT: mul a2, a0, a2 ; RV32IMB-NEXT: lui a0, 18 ; RV32IMB-NEXT: addi a0, a0, -728 ; RV32IMB-NEXT: add a0, a2, a0 ; RV32IMB-NEXT: sltu a2, a0, a2 ; RV32IMB-NEXT: add a1, a1, a2 +; RV32IMB-NEXT: add a1, a3, a1 ; RV32IMB-NEXT: ret ; ; RV64IMB-LABEL: add_mul_combine_reject_c3: @@ -318,14 +318,14 @@ define i64 @add_mul_combine_reject_d3(i64 %x) { ; RV32IMB-NEXT: mulhu a2, a0, a2 ; RV32IMB-NEXT: sh1add a1, a1, a1 ; RV32IMB-NEXT: slli a1, a1, 6 -; RV32IMB-NEXT: add a1, a2, a1 ; RV32IMB-NEXT: sh1add a0, a0, a0 -; RV32IMB-NEXT: slli a2, a0, 6 +; RV32IMB-NEXT: slli a3, a0, 6 ; RV32IMB-NEXT: lui a0, 47 ; RV32IMB-NEXT: addi a0, a0, -512 -; RV32IMB-NEXT: add a0, a2, a0 -; RV32IMB-NEXT: sltu a2, a0, a2 -; RV32IMB-NEXT: add a1, a1, a2 +; RV32IMB-NEXT: add a0, a3, a0 +; RV32IMB-NEXT: sltu a3, a0, a3 +; RV32IMB-NEXT: add a1, a1, a3 +; RV32IMB-NEXT: add a1, a2, a1 ; RV32IMB-NEXT: ret ; ; RV64IMB-LABEL: add_mul_combine_reject_d3: @@ -383,13 +383,13 @@ define i64 @add_mul_combine_reject_e3(i64 %x) { ; RV32IMB-NEXT: li a2, 29 ; RV32IMB-NEXT: mul a1, a1, a2 ; RV32IMB-NEXT: mulhu a3, a0, a2 -; RV32IMB-NEXT: add a1, a3, a1 ; RV32IMB-NEXT: mul a2, a0, a2 ; RV32IMB-NEXT: lui a0, 14 ; RV32IMB-NEXT: addi a0, a0, -185 ; RV32IMB-NEXT: add a0, a2, a0 ; RV32IMB-NEXT: sltu a2, a0, a2 ; RV32IMB-NEXT: add a1, a1, a2 +; RV32IMB-NEXT: add a1, a3, a1 ; RV32IMB-NEXT: ret ; ; RV64IMB-LABEL: add_mul_combine_reject_e3: @@ -451,13 +451,13 @@ define i64 @add_mul_combine_reject_f3(i64 %x) { ; RV32IMB-NEXT: li a2, 29 ; RV32IMB-NEXT: mul a1, a1, a2 ; RV32IMB-NEXT: mulhu a3, a0, a2 -; RV32IMB-NEXT: add a1, a3, a1 ; RV32IMB-NEXT: mul a2, a0, a2 ; RV32IMB-NEXT: lui a0, 14 ; RV32IMB-NEXT: addi a0, a0, -145 ; RV32IMB-NEXT: add a0, a2, a0 ; RV32IMB-NEXT: sltu a2, a0, a2 ; RV32IMB-NEXT: add a1, a1, a2 +; RV32IMB-NEXT: add a1, a3, a1 ; RV32IMB-NEXT: ret ; ; RV64IMB-LABEL: add_mul_combine_reject_f3: @@ -520,13 +520,13 @@ define i64 @add_mul_combine_reject_g3(i64 %x) { ; RV32IMB-NEXT: li a2, 73 ; RV32IMB-NEXT: mul a1, a1, a2 ; RV32IMB-NEXT: mulhu a3, a0, a2 -; RV32IMB-NEXT: add a1, a3, a1 ; RV32IMB-NEXT: mul a2, a0, a2 ; RV32IMB-NEXT: lui a0, 2 ; RV32IMB-NEXT: addi a0, a0, -882 ; RV32IMB-NEXT: add a0, a2, a0 ; RV32IMB-NEXT: sltu a2, a0, a2 ; RV32IMB-NEXT: add a1, a1, a2 +; RV32IMB-NEXT: add a1, a3, a1 ; RV32IMB-NEXT: ret ; ; RV64IMB-LABEL: add_mul_combine_reject_g3: @@ -622,13 +622,13 @@ define i64 @mul3000_add8990_c(i64 %x) { ; RV32IMB-NEXT: addi a2, a2, -1096 ; RV32IMB-NEXT: mul a1, a1, a2 ; RV32IMB-NEXT: mulhu a3, a0, a2 -; RV32IMB-NEXT: add a1, a3, a1 ; RV32IMB-NEXT: mul a2, a0, a2 ; RV32IMB-NEXT: lui a0, 2 ; RV32IMB-NEXT: addi a0, a0, 798 ; RV32IMB-NEXT: add a0, a2, a0 ; RV32IMB-NEXT: sltu a2, a0, a2 ; RV32IMB-NEXT: add a1, a1, a2 +; RV32IMB-NEXT: add a1, a3, a1 ; RV32IMB-NEXT: ret ; ; RV64IMB-LABEL: mul3000_add8990_c: @@ -697,13 +697,13 @@ define i64 @mul3000_sub8990_c(i64 %x) { ; RV32IMB-NEXT: addi a2, a2, -1096 ; RV32IMB-NEXT: mul a1, a1, a2 ; RV32IMB-NEXT: mulhu a3, a0, a2 -; RV32IMB-NEXT: add a1, a3, a1 ; RV32IMB-NEXT: mul a2, a0, a2 ; RV32IMB-NEXT: lui a0, 1048574 ; RV32IMB-NEXT: addi a0, a0, -798 ; RV32IMB-NEXT: add a0, a2, a0 ; RV32IMB-NEXT: sltu a2, a0, a2 ; RV32IMB-NEXT: add a1, a1, a2 +; RV32IMB-NEXT: add a1, a3, a1 ; RV32IMB-NEXT: addi a1, a1, -1 ; RV32IMB-NEXT: ret ; @@ -773,14 +773,14 @@ define i64 @mulneg3000_add8990_c(i64 %x) { ; RV32IMB-NEXT: addi a2, a2, 1096 ; RV32IMB-NEXT: mul a1, a1, a2 ; RV32IMB-NEXT: mulhu a3, a0, a2 -; RV32IMB-NEXT: sub a3, a3, a0 -; RV32IMB-NEXT: add a1, a3, a1 +; RV32IMB-NEXT: sub a1, a0, a1 ; RV32IMB-NEXT: mul a2, a0, a2 ; RV32IMB-NEXT: lui a0, 2 ; RV32IMB-NEXT: addi a0, a0, 798 ; RV32IMB-NEXT: add a0, a2, a0 ; RV32IMB-NEXT: sltu a2, a0, a2 -; RV32IMB-NEXT: add a1, a1, a2 +; RV32IMB-NEXT: sub a1, a1, a2 +; RV32IMB-NEXT: sub a1, a3, a1 ; RV32IMB-NEXT: ret ; ; RV64IMB-LABEL: mulneg3000_add8990_c: @@ -849,14 +849,14 @@ define i64 @mulneg3000_sub8990_c(i64 %x) { ; RV32IMB-NEXT: addi a2, a2, 1096 ; RV32IMB-NEXT: mul a1, a1, a2 ; RV32IMB-NEXT: mulhu a3, a0, a2 -; RV32IMB-NEXT: sub a3, a3, a0 -; RV32IMB-NEXT: add a1, a3, a1 +; RV32IMB-NEXT: sub a1, a0, a1 ; RV32IMB-NEXT: mul a2, a0, a2 ; RV32IMB-NEXT: lui a0, 1048574 ; RV32IMB-NEXT: addi a0, a0, -798 ; RV32IMB-NEXT: add a0, a2, a0 ; RV32IMB-NEXT: sltu a2, a0, a2 -; RV32IMB-NEXT: add a1, a1, a2 +; RV32IMB-NEXT: sub a1, a1, a2 +; RV32IMB-NEXT: sub a1, a3, a1 ; RV32IMB-NEXT: addi a1, a1, -1 ; RV32IMB-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/alu64.ll b/llvm/test/CodeGen/RISCV/alu64.ll index 34bcb0b..d9916aa 100644 --- a/llvm/test/CodeGen/RISCV/alu64.ll +++ b/llvm/test/CodeGen/RISCV/alu64.ll @@ -172,9 +172,9 @@ define i64 @add(i64 %a, i64 %b) nounwind { ; ; RV32I-LABEL: add: ; RV32I: # %bb.0: -; RV32I-NEXT: add a1, a1, a3 ; RV32I-NEXT: add a2, a0, a2 ; RV32I-NEXT: sltu a0, a2, a0 +; RV32I-NEXT: add a0, a3, a0 ; RV32I-NEXT: add a1, a1, a0 ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: ret @@ -191,8 +191,8 @@ define i64 @sub(i64 %a, i64 %b) nounwind { ; RV32I-LABEL: sub: ; RV32I: # %bb.0: ; RV32I-NEXT: sltu a4, a0, a2 +; RV32I-NEXT: add a3, a3, a4 ; RV32I-NEXT: sub a1, a1, a3 -; RV32I-NEXT: sub a1, a1, a4 ; RV32I-NEXT: sub a0, a0, a2 ; RV32I-NEXT: ret %1 = sub i64 %a, %b diff --git a/llvm/test/CodeGen/RISCV/bswap-bitreverse.ll b/llvm/test/CodeGen/RISCV/bswap-bitreverse.ll index 616b9fce..569bb8e3 100644 --- a/llvm/test/CodeGen/RISCV/bswap-bitreverse.ll +++ b/llvm/test/CodeGen/RISCV/bswap-bitreverse.ll @@ -64,7 +64,7 @@ define i32 @test_bswap_i32(i32 %a) nounwind { ; RV32I-NEXT: and a2, a0, a2 ; RV32I-NEXT: slli a2, a2, 8 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, a2 +; RV32I-NEXT: or a1, a2, a1 ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: ret ; @@ -79,7 +79,7 @@ define i32 @test_bswap_i32(i32 %a) nounwind { ; RV64I-NEXT: and a2, a0, a2 ; RV64I-NEXT: slli a2, a2, 8 ; RV64I-NEXT: slliw a0, a0, 24 -; RV64I-NEXT: or a0, a0, a2 +; RV64I-NEXT: or a1, a2, a1 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret ; @@ -109,7 +109,7 @@ define i64 @test_bswap_i64(i64 %a) nounwind { ; RV32I-NEXT: and a4, a1, a3 ; RV32I-NEXT: slli a4, a4, 8 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, a4 +; RV32I-NEXT: or a2, a4, a2 ; RV32I-NEXT: or a2, a1, a2 ; RV32I-NEXT: srli a1, a0, 8 ; RV32I-NEXT: and a1, a1, a3 @@ -118,7 +118,7 @@ define i64 @test_bswap_i64(i64 %a) nounwind { ; RV32I-NEXT: and a3, a0, a3 ; RV32I-NEXT: slli a3, a3, 8 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, a3 +; RV32I-NEXT: or a1, a3, a1 ; RV32I-NEXT: or a1, a0, a1 ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: ret @@ -137,8 +137,8 @@ define i64 @test_bswap_i64(i64 %a) nounwind { ; RV64I-NEXT: srli a5, a0, 8 ; RV64I-NEXT: srliw a5, a5, 24 ; RV64I-NEXT: slli a5, a5, 24 -; RV64I-NEXT: or a3, a5, a3 ; RV64I-NEXT: or a1, a3, a1 +; RV64I-NEXT: or a1, a5, a1 ; RV64I-NEXT: and a4, a0, a4 ; RV64I-NEXT: slli a4, a4, 24 ; RV64I-NEXT: srliw a3, a0, 24 @@ -147,8 +147,8 @@ define i64 @test_bswap_i64(i64 %a) nounwind { ; RV64I-NEXT: and a2, a0, a2 ; RV64I-NEXT: slli a2, a2, 40 ; RV64I-NEXT: slli a0, a0, 56 -; RV64I-NEXT: or a0, a0, a2 -; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: or a2, a2, a3 +; RV64I-NEXT: or a1, a2, a1 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret ; @@ -401,7 +401,7 @@ define i32 @test_bitreverse_i32(i32 %a) nounwind { ; RV32I-NEXT: and a2, a0, a2 ; RV32I-NEXT: slli a2, a2, 8 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, a2 +; RV32I-NEXT: or a1, a2, a1 ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: srli a1, a0, 4 ; RV32I-NEXT: lui a2, 61681 @@ -437,7 +437,7 @@ define i32 @test_bitreverse_i32(i32 %a) nounwind { ; RV64I-NEXT: and a2, a0, a2 ; RV64I-NEXT: slli a2, a2, 8 ; RV64I-NEXT: slliw a0, a0, 24 -; RV64I-NEXT: or a0, a0, a2 +; RV64I-NEXT: or a1, a2, a1 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: srli a1, a0, 4 ; RV64I-NEXT: lui a2, 61681 @@ -545,7 +545,7 @@ define i64 @test_bitreverse_i64(i64 %a) nounwind { ; RV32I-NEXT: and a4, a1, a3 ; RV32I-NEXT: slli a4, a4, 8 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, a4 +; RV32I-NEXT: or a2, a4, a2 ; RV32I-NEXT: or a1, a1, a2 ; RV32I-NEXT: srli a2, a1, 4 ; RV32I-NEXT: lui a4, 61681 @@ -575,7 +575,7 @@ define i64 @test_bitreverse_i64(i64 %a) nounwind { ; RV32I-NEXT: and a3, a0, a3 ; RV32I-NEXT: slli a3, a3, 8 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, a3 +; RV32I-NEXT: or a1, a3, a1 ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: srli a1, a0, 4 ; RV32I-NEXT: and a1, a1, a4 @@ -609,8 +609,8 @@ define i64 @test_bitreverse_i64(i64 %a) nounwind { ; RV64I-NEXT: srli a5, a0, 8 ; RV64I-NEXT: srliw a5, a5, 24 ; RV64I-NEXT: slli a5, a5, 24 -; RV64I-NEXT: or a3, a5, a3 ; RV64I-NEXT: or a1, a3, a1 +; RV64I-NEXT: or a1, a5, a1 ; RV64I-NEXT: and a4, a0, a4 ; RV64I-NEXT: slli a4, a4, 24 ; RV64I-NEXT: srliw a3, a0, 24 @@ -619,14 +619,14 @@ define i64 @test_bitreverse_i64(i64 %a) nounwind { ; RV64I-NEXT: and a2, a0, a2 ; RV64I-NEXT: slli a2, a2, 40 ; RV64I-NEXT: slli a0, a0, 56 -; RV64I-NEXT: or a0, a0, a2 -; RV64I-NEXT: lui a2, %hi(.LCPI6_0) -; RV64I-NEXT: ld a2, %lo(.LCPI6_0)(a2) -; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: or a2, a2, a3 +; RV64I-NEXT: lui a3, %hi(.LCPI6_0) +; RV64I-NEXT: ld a3, %lo(.LCPI6_0)(a3) +; RV64I-NEXT: or a1, a2, a1 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: srli a1, a0, 4 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: and a1, a1, a3 +; RV64I-NEXT: and a0, a0, a3 ; RV64I-NEXT: lui a2, %hi(.LCPI6_1) ; RV64I-NEXT: ld a2, %lo(.LCPI6_1)(a2) ; RV64I-NEXT: slli a0, a0, 4 diff --git a/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-common.ll b/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-common.ll index ef9fb40..8c655f5 100644 --- a/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-common.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-common.ll @@ -94,14 +94,14 @@ define i32 @callee_aligned_stack(i32 %a, i32 %b, fp128 %c, i32 %d, i32 %e, i64 % ; RV32I-FPELIM-LABEL: callee_aligned_stack: ; RV32I-FPELIM: # %bb.0: ; RV32I-FPELIM-NEXT: lw a0, 0(a2) -; RV32I-FPELIM-NEXT: lw a1, 20(sp) -; RV32I-FPELIM-NEXT: lw a2, 0(sp) -; RV32I-FPELIM-NEXT: lw a3, 8(sp) -; RV32I-FPELIM-NEXT: lw a4, 16(sp) -; RV32I-FPELIM-NEXT: add a0, a0, a7 -; RV32I-FPELIM-NEXT: add a0, a0, a2 -; RV32I-FPELIM-NEXT: add a0, a0, a3 -; RV32I-FPELIM-NEXT: add a0, a0, a4 +; RV32I-FPELIM-NEXT: lw a1, 0(sp) +; RV32I-FPELIM-NEXT: lw a2, 8(sp) +; RV32I-FPELIM-NEXT: lw a3, 16(sp) +; RV32I-FPELIM-NEXT: lw a4, 20(sp) +; RV32I-FPELIM-NEXT: add a1, a7, a1 +; RV32I-FPELIM-NEXT: add a1, a1, a2 +; RV32I-FPELIM-NEXT: add a1, a1, a3 +; RV32I-FPELIM-NEXT: add a1, a1, a4 ; RV32I-FPELIM-NEXT: add a0, a0, a1 ; RV32I-FPELIM-NEXT: ret ; @@ -112,14 +112,14 @@ define i32 @callee_aligned_stack(i32 %a, i32 %b, fp128 %c, i32 %d, i32 %e, i64 % ; RV32I-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-WITHFP-NEXT: addi s0, sp, 16 ; RV32I-WITHFP-NEXT: lw a0, 0(a2) -; RV32I-WITHFP-NEXT: lw a1, 20(s0) -; RV32I-WITHFP-NEXT: lw a2, 0(s0) -; RV32I-WITHFP-NEXT: lw a3, 8(s0) -; RV32I-WITHFP-NEXT: lw a4, 16(s0) -; RV32I-WITHFP-NEXT: add a0, a0, a7 -; RV32I-WITHFP-NEXT: add a0, a0, a2 -; RV32I-WITHFP-NEXT: add a0, a0, a3 -; RV32I-WITHFP-NEXT: add a0, a0, a4 +; RV32I-WITHFP-NEXT: lw a1, 0(s0) +; RV32I-WITHFP-NEXT: lw a2, 8(s0) +; RV32I-WITHFP-NEXT: lw a3, 16(s0) +; RV32I-WITHFP-NEXT: lw a4, 20(s0) +; RV32I-WITHFP-NEXT: add a1, a7, a1 +; RV32I-WITHFP-NEXT: add a1, a1, a2 +; RV32I-WITHFP-NEXT: add a1, a1, a3 +; RV32I-WITHFP-NEXT: add a1, a1, a4 ; RV32I-WITHFP-NEXT: add a0, a0, a1 ; RV32I-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll b/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll index adff9c7..3a5d2c4 100644 --- a/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll @@ -87,16 +87,16 @@ define i32 @callee_many_scalars(i8 %a, i16 %b, i32 %c, i64 %d, i32 %e, i32 %f, i ; RV32I-FPELIM-NEXT: andi a0, a0, 255 ; RV32I-FPELIM-NEXT: slli a1, a1, 16 ; RV32I-FPELIM-NEXT: srli a1, a1, 16 -; RV32I-FPELIM-NEXT: add a0, a0, a1 -; RV32I-FPELIM-NEXT: add a0, a0, a2 -; RV32I-FPELIM-NEXT: xor a1, a4, t1 -; RV32I-FPELIM-NEXT: xor a2, a3, a7 -; RV32I-FPELIM-NEXT: or a1, a2, a1 -; RV32I-FPELIM-NEXT: seqz a1, a1 -; RV32I-FPELIM-NEXT: add a0, a1, a0 +; RV32I-FPELIM-NEXT: add a1, a1, a2 +; RV32I-FPELIM-NEXT: xor a2, a4, t1 +; RV32I-FPELIM-NEXT: xor a3, a3, a7 +; RV32I-FPELIM-NEXT: or a2, a3, a2 +; RV32I-FPELIM-NEXT: seqz a2, a2 +; RV32I-FPELIM-NEXT: add a1, a2, a1 ; RV32I-FPELIM-NEXT: add a0, a0, a5 ; RV32I-FPELIM-NEXT: add a0, a0, a6 ; RV32I-FPELIM-NEXT: add a0, a0, t0 +; RV32I-FPELIM-NEXT: add a0, a1, a0 ; RV32I-FPELIM-NEXT: ret ; ; RV32I-WITHFP-LABEL: callee_many_scalars: @@ -110,16 +110,16 @@ define i32 @callee_many_scalars(i8 %a, i16 %b, i32 %c, i64 %d, i32 %e, i32 %f, i ; RV32I-WITHFP-NEXT: andi a0, a0, 255 ; RV32I-WITHFP-NEXT: slli a1, a1, 16 ; RV32I-WITHFP-NEXT: srli a1, a1, 16 -; RV32I-WITHFP-NEXT: add a0, a0, a1 -; RV32I-WITHFP-NEXT: add a0, a0, a2 -; RV32I-WITHFP-NEXT: xor a1, a4, t1 -; RV32I-WITHFP-NEXT: xor a2, a3, a7 -; RV32I-WITHFP-NEXT: or a1, a2, a1 -; RV32I-WITHFP-NEXT: seqz a1, a1 -; RV32I-WITHFP-NEXT: add a0, a1, a0 +; RV32I-WITHFP-NEXT: add a1, a1, a2 +; RV32I-WITHFP-NEXT: xor a2, a4, t1 +; RV32I-WITHFP-NEXT: xor a3, a3, a7 +; RV32I-WITHFP-NEXT: or a2, a3, a2 +; RV32I-WITHFP-NEXT: seqz a2, a2 +; RV32I-WITHFP-NEXT: add a1, a2, a1 ; RV32I-WITHFP-NEXT: add a0, a0, a5 ; RV32I-WITHFP-NEXT: add a0, a0, a6 ; RV32I-WITHFP-NEXT: add a0, a0, t0 +; RV32I-WITHFP-NEXT: add a0, a1, a0 ; RV32I-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-WITHFP-NEXT: addi sp, sp, 16 @@ -203,8 +203,8 @@ define i32 @callee_large_scalars(i128 %a, fp128 %b) nounwind { ; RV32I-FPELIM-NEXT: or a4, a4, a5 ; RV32I-FPELIM-NEXT: xor a0, a0, a1 ; RV32I-FPELIM-NEXT: xor a2, a3, a2 -; RV32I-FPELIM-NEXT: or a0, a2, a0 ; RV32I-FPELIM-NEXT: or a0, a0, a4 +; RV32I-FPELIM-NEXT: or a0, a2, a0 ; RV32I-FPELIM-NEXT: seqz a0, a0 ; RV32I-FPELIM-NEXT: ret ; @@ -227,8 +227,8 @@ define i32 @callee_large_scalars(i128 %a, fp128 %b) nounwind { ; RV32I-WITHFP-NEXT: or a4, a4, a5 ; RV32I-WITHFP-NEXT: xor a0, a0, a1 ; RV32I-WITHFP-NEXT: xor a2, a3, a2 -; RV32I-WITHFP-NEXT: or a0, a2, a0 ; RV32I-WITHFP-NEXT: or a0, a0, a4 +; RV32I-WITHFP-NEXT: or a0, a2, a0 ; RV32I-WITHFP-NEXT: seqz a0, a0 ; RV32I-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload @@ -310,8 +310,8 @@ define i32 @callee_large_scalars_exhausted_regs(i32 %a, i32 %b, i32 %c, i32 %d, ; RV32I-FPELIM-NEXT: or a3, a3, a4 ; RV32I-FPELIM-NEXT: xor a0, a7, a0 ; RV32I-FPELIM-NEXT: xor a1, a2, a1 -; RV32I-FPELIM-NEXT: or a0, a1, a0 ; RV32I-FPELIM-NEXT: or a0, a0, a3 +; RV32I-FPELIM-NEXT: or a0, a1, a0 ; RV32I-FPELIM-NEXT: seqz a0, a0 ; RV32I-FPELIM-NEXT: ret ; @@ -335,8 +335,8 @@ define i32 @callee_large_scalars_exhausted_regs(i32 %a, i32 %b, i32 %c, i32 %d, ; RV32I-WITHFP-NEXT: or a3, a3, a4 ; RV32I-WITHFP-NEXT: xor a0, a7, a0 ; RV32I-WITHFP-NEXT: xor a1, a2, a1 -; RV32I-WITHFP-NEXT: or a0, a1, a0 ; RV32I-WITHFP-NEXT: or a0, a0, a3 +; RV32I-WITHFP-NEXT: or a0, a1, a0 ; RV32I-WITHFP-NEXT: seqz a0, a0 ; RV32I-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload @@ -614,14 +614,14 @@ define i32 @callee_aligned_stack(i32 %a, i32 %b, fp128 %c, i32 %d, i32 %e, i64 % ; RV32I-FPELIM-LABEL: callee_aligned_stack: ; RV32I-FPELIM: # %bb.0: ; RV32I-FPELIM-NEXT: lw a0, 0(a2) -; RV32I-FPELIM-NEXT: lw a1, 20(sp) -; RV32I-FPELIM-NEXT: lw a2, 0(sp) -; RV32I-FPELIM-NEXT: lw a3, 8(sp) -; RV32I-FPELIM-NEXT: lw a4, 16(sp) -; RV32I-FPELIM-NEXT: add a0, a0, a7 -; RV32I-FPELIM-NEXT: add a0, a0, a2 -; RV32I-FPELIM-NEXT: add a0, a0, a3 -; RV32I-FPELIM-NEXT: add a0, a0, a4 +; RV32I-FPELIM-NEXT: lw a1, 0(sp) +; RV32I-FPELIM-NEXT: lw a2, 8(sp) +; RV32I-FPELIM-NEXT: lw a3, 16(sp) +; RV32I-FPELIM-NEXT: lw a4, 20(sp) +; RV32I-FPELIM-NEXT: add a1, a7, a1 +; RV32I-FPELIM-NEXT: add a1, a1, a2 +; RV32I-FPELIM-NEXT: add a1, a1, a3 +; RV32I-FPELIM-NEXT: add a1, a1, a4 ; RV32I-FPELIM-NEXT: add a0, a0, a1 ; RV32I-FPELIM-NEXT: ret ; @@ -632,14 +632,14 @@ define i32 @callee_aligned_stack(i32 %a, i32 %b, fp128 %c, i32 %d, i32 %e, i64 % ; RV32I-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-WITHFP-NEXT: addi s0, sp, 16 ; RV32I-WITHFP-NEXT: lw a0, 0(a2) -; RV32I-WITHFP-NEXT: lw a1, 20(s0) -; RV32I-WITHFP-NEXT: lw a2, 0(s0) -; RV32I-WITHFP-NEXT: lw a3, 8(s0) -; RV32I-WITHFP-NEXT: lw a4, 16(s0) -; RV32I-WITHFP-NEXT: add a0, a0, a7 -; RV32I-WITHFP-NEXT: add a0, a0, a2 -; RV32I-WITHFP-NEXT: add a0, a0, a3 -; RV32I-WITHFP-NEXT: add a0, a0, a4 +; RV32I-WITHFP-NEXT: lw a1, 0(s0) +; RV32I-WITHFP-NEXT: lw a2, 8(s0) +; RV32I-WITHFP-NEXT: lw a3, 16(s0) +; RV32I-WITHFP-NEXT: lw a4, 20(s0) +; RV32I-WITHFP-NEXT: add a1, a7, a1 +; RV32I-WITHFP-NEXT: add a1, a1, a2 +; RV32I-WITHFP-NEXT: add a1, a1, a3 +; RV32I-WITHFP-NEXT: add a1, a1, a4 ; RV32I-WITHFP-NEXT: add a0, a0, a1 ; RV32I-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-lp64d-common.ll b/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-lp64d-common.ll index 858f4a1..185155e 100644 --- a/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-lp64d-common.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-lp64d-common.ll @@ -53,16 +53,16 @@ define i32 @callee_many_scalars(i8 %a, i16 %b, i32 %c, i128 %d, i32 %e, i32 %f, ; RV64I-NEXT: andi a0, a0, 255 ; RV64I-NEXT: slli a1, a1, 48 ; RV64I-NEXT: srli a1, a1, 48 -; RV64I-NEXT: add a0, a0, a1 -; RV64I-NEXT: add a0, a0, a2 -; RV64I-NEXT: xor a1, a4, t1 -; RV64I-NEXT: xor a2, a3, a7 -; RV64I-NEXT: or a1, a2, a1 -; RV64I-NEXT: seqz a1, a1 -; RV64I-NEXT: add a0, a1, a0 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: xor a2, a4, t1 +; RV64I-NEXT: xor a3, a3, a7 +; RV64I-NEXT: or a2, a3, a2 +; RV64I-NEXT: seqz a2, a2 +; RV64I-NEXT: add a1, a2, a1 ; RV64I-NEXT: add a0, a0, a5 ; RV64I-NEXT: add a0, a0, a6 -; RV64I-NEXT: addw a0, a0, t0 +; RV64I-NEXT: add a0, a0, t0 +; RV64I-NEXT: addw a0, a1, a0 ; RV64I-NEXT: ret %a_ext = zext i8 %a to i32 %b_ext = zext i16 %b to i32 @@ -119,8 +119,8 @@ define i64 @callee_large_scalars(i256 %a, i256 %b) nounwind { ; RV64I-NEXT: or a4, a4, a5 ; RV64I-NEXT: xor a0, a0, a1 ; RV64I-NEXT: xor a2, a3, a2 -; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: or a0, a0, a4 +; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: seqz a0, a0 ; RV64I-NEXT: ret %1 = icmp eq i256 %a, %b @@ -174,8 +174,8 @@ define i64 @callee_large_scalars_exhausted_regs(i64 %a, i64 %b, i64 %c, i64 %d, ; RV64I-NEXT: or a3, a3, a4 ; RV64I-NEXT: xor a0, a7, a0 ; RV64I-NEXT: xor a1, a2, a1 -; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: seqz a0, a0 ; RV64I-NEXT: ret %1 = icmp eq i256 %h, %j @@ -328,15 +328,15 @@ define i64 @callee_aligned_stack(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i128 %f ; should only be 8-byte aligned ; RV64I-LABEL: callee_aligned_stack: ; RV64I: # %bb.0: -; RV64I-NEXT: ld a0, 40(sp) -; RV64I-NEXT: ld a1, 0(sp) -; RV64I-NEXT: ld a2, 16(sp) -; RV64I-NEXT: ld a3, 32(sp) -; RV64I-NEXT: add a5, a5, a7 -; RV64I-NEXT: add a1, a5, a1 -; RV64I-NEXT: add a1, a1, a2 -; RV64I-NEXT: add a1, a1, a3 -; RV64I-NEXT: add a0, a1, a0 +; RV64I-NEXT: ld a0, 0(sp) +; RV64I-NEXT: ld a1, 16(sp) +; RV64I-NEXT: ld a2, 32(sp) +; RV64I-NEXT: ld a3, 40(sp) +; RV64I-NEXT: add a0, a7, a0 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: add a0, a0, a2 +; RV64I-NEXT: add a0, a0, a3 +; RV64I-NEXT: add a0, a5, a0 ; RV64I-NEXT: ret %f_trunc = trunc i128 %f to i64 %1 = add i64 %f_trunc, %g diff --git a/llvm/test/CodeGen/RISCV/compress.ll b/llvm/test/CodeGen/RISCV/compress.ll index d1eca38..479b7e5 100644 --- a/llvm/test/CodeGen/RISCV/compress.ll +++ b/llvm/test/CodeGen/RISCV/compress.ll @@ -35,8 +35,8 @@ define i32 @simple_arith(i32 %a, i32 %b) #0 { ; RV32IC-NEXT: c.andi a2, 11 ; RV32IC-NEXT: c.slli a2, 7 ; RV32IC-NEXT: c.srai a1, 9 -; RV32IC-NEXT: c.add a1, a2 ; RV32IC-NEXT: sub a0, a1, a0 +; RV32IC-NEXT: c.add a0, a2 ; RV32IC-NEXT: c.jr ra %1 = add i32 %a, 1 %2 = and i32 %1, 11 diff --git a/llvm/test/CodeGen/RISCV/copysign-casts.ll b/llvm/test/CodeGen/RISCV/copysign-casts.ll index 64f49c8..d9bb35d 100644 --- a/llvm/test/CodeGen/RISCV/copysign-casts.ll +++ b/llvm/test/CodeGen/RISCV/copysign-casts.ll @@ -447,8 +447,8 @@ define half @fold_demote_h_s(half %a, float %b) nounwind { ; RV32IF-NEXT: srli a1, a1, 16 ; RV32IF-NEXT: slli a0, a0, 17 ; RV32IF-NEXT: srli a0, a0, 17 -; RV32IF-NEXT: or a0, a0, a1 -; RV32IF-NEXT: lui a1, 1048560 +; RV32IF-NEXT: lui a2, 1048560 +; RV32IF-NEXT: or a1, a1, a2 ; RV32IF-NEXT: or a0, a0, a1 ; RV32IF-NEXT: fmv.w.x fa0, a0 ; RV32IF-NEXT: ret @@ -462,8 +462,8 @@ define half @fold_demote_h_s(half %a, float %b) nounwind { ; RV32IFD-NEXT: srli a1, a1, 16 ; RV32IFD-NEXT: slli a0, a0, 17 ; RV32IFD-NEXT: srli a0, a0, 17 -; RV32IFD-NEXT: or a0, a0, a1 -; RV32IFD-NEXT: lui a1, 1048560 +; RV32IFD-NEXT: lui a2, 1048560 +; RV32IFD-NEXT: or a1, a1, a2 ; RV32IFD-NEXT: or a0, a0, a1 ; RV32IFD-NEXT: fmv.w.x fa0, a0 ; RV32IFD-NEXT: ret @@ -477,8 +477,8 @@ define half @fold_demote_h_s(half %a, float %b) nounwind { ; RV64IFD-NEXT: srli a1, a1, 16 ; RV64IFD-NEXT: slli a0, a0, 49 ; RV64IFD-NEXT: srli a0, a0, 49 -; RV64IFD-NEXT: or a0, a0, a1 -; RV64IFD-NEXT: lui a1, 1048560 +; RV64IFD-NEXT: lui a2, 1048560 +; RV64IFD-NEXT: or a1, a1, a2 ; RV64IFD-NEXT: or a0, a0, a1 ; RV64IFD-NEXT: fmv.w.x fa0, a0 ; RV64IFD-NEXT: ret @@ -581,8 +581,8 @@ define half @fold_demote_h_d(half %a, double %b) nounwind { ; RV32IF-NEXT: srli a1, a1, 16 ; RV32IF-NEXT: slli a0, a0, 17 ; RV32IF-NEXT: srli a0, a0, 17 -; RV32IF-NEXT: or a0, a0, a1 -; RV32IF-NEXT: lui a1, 1048560 +; RV32IF-NEXT: lui a2, 1048560 +; RV32IF-NEXT: or a1, a1, a2 ; RV32IF-NEXT: or a0, a0, a1 ; RV32IF-NEXT: fmv.w.x fa0, a0 ; RV32IF-NEXT: ret @@ -598,9 +598,9 @@ define half @fold_demote_h_d(half %a, double %b) nounwind { ; RV32IFD-NEXT: srli a0, a0, 16 ; RV32IFD-NEXT: slli a1, a1, 17 ; RV32IFD-NEXT: srli a1, a1, 17 +; RV32IFD-NEXT: lui a2, 1048560 +; RV32IFD-NEXT: or a0, a0, a2 ; RV32IFD-NEXT: or a0, a1, a0 -; RV32IFD-NEXT: lui a1, 1048560 -; RV32IFD-NEXT: or a0, a0, a1 ; RV32IFD-NEXT: fmv.w.x fa0, a0 ; RV32IFD-NEXT: addi sp, sp, 16 ; RV32IFD-NEXT: ret @@ -614,9 +614,9 @@ define half @fold_demote_h_d(half %a, double %b) nounwind { ; RV64IFD-NEXT: srli a0, a0, 63 ; RV64IFD-NEXT: slli a0, a0, 63 ; RV64IFD-NEXT: srli a0, a0, 48 +; RV64IFD-NEXT: lui a2, 1048560 +; RV64IFD-NEXT: or a0, a0, a2 ; RV64IFD-NEXT: or a0, a1, a0 -; RV64IFD-NEXT: lui a1, 1048560 -; RV64IFD-NEXT: or a0, a0, a1 ; RV64IFD-NEXT: fmv.w.x fa0, a0 ; RV64IFD-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/div-by-constant.ll b/llvm/test/CodeGen/RISCV/div-by-constant.ll index da30456..6ba8e62 100644 --- a/llvm/test/CodeGen/RISCV/div-by-constant.ll +++ b/llvm/test/CodeGen/RISCV/div-by-constant.ll @@ -80,11 +80,11 @@ define i64 @udiv64_constant_no_add(i64 %a) nounwind { ; RV32-NEXT: addi a3, a3, -820 ; RV32-NEXT: mul a3, a5, a3 ; RV32-NEXT: mulhu a6, a5, a4 -; RV32-NEXT: add a3, a6, a3 ; RV32-NEXT: sltu a0, a0, a2 ; RV32-NEXT: sub a1, a1, a0 -; RV32-NEXT: mul a1, a1, a4 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: mul a0, a1, a4 +; RV32-NEXT: add a0, a3, a0 +; RV32-NEXT: add a1, a6, a0 ; RV32-NEXT: mul a0, a5, a4 ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/div-pow2.ll b/llvm/test/CodeGen/RISCV/div-pow2.ll index ba62163..d5a8036 100644 --- a/llvm/test/CodeGen/RISCV/div-pow2.ll +++ b/llvm/test/CodeGen/RISCV/div-pow2.ll @@ -213,8 +213,8 @@ define i64 @sdiv64_pow2_negative_2(i64 %a) { ; RV32I-NEXT: neg a0, a3 ; RV32I-NEXT: snez a2, a3 ; RV32I-NEXT: srai a1, a1, 1 -; RV32I-NEXT: add a1, a1, a2 -; RV32I-NEXT: neg a1, a1 +; RV32I-NEXT: neg a2, a2 +; RV32I-NEXT: sub a1, a2, a1 ; RV32I-NEXT: ret ; ; RV64I-LABEL: sdiv64_pow2_negative_2: @@ -269,8 +269,8 @@ define i64 @sdiv64_pow2_negative_2048(i64 %a) { ; RV32I-NEXT: neg a0, a3 ; RV32I-NEXT: snez a2, a3 ; RV32I-NEXT: srai a1, a1, 11 -; RV32I-NEXT: add a1, a1, a2 -; RV32I-NEXT: neg a1, a1 +; RV32I-NEXT: neg a2, a2 +; RV32I-NEXT: sub a1, a2, a1 ; RV32I-NEXT: ret ; ; RV64I-LABEL: sdiv64_pow2_negative_2048: @@ -326,8 +326,8 @@ define i64 @sdiv64_pow2_negative_4096(i64 %a) { ; RV32I-NEXT: neg a0, a3 ; RV32I-NEXT: snez a2, a3 ; RV32I-NEXT: srai a1, a1, 12 -; RV32I-NEXT: add a1, a1, a2 -; RV32I-NEXT: neg a1, a1 +; RV32I-NEXT: neg a2, a2 +; RV32I-NEXT: sub a1, a2, a1 ; RV32I-NEXT: ret ; ; RV64I-LABEL: sdiv64_pow2_negative_4096: @@ -383,8 +383,8 @@ define i64 @sdiv64_pow2_negative_65536(i64 %a) { ; RV32I-NEXT: neg a0, a3 ; RV32I-NEXT: snez a2, a3 ; RV32I-NEXT: srai a1, a1, 16 -; RV32I-NEXT: add a1, a1, a2 -; RV32I-NEXT: neg a1, a1 +; RV32I-NEXT: neg a2, a2 +; RV32I-NEXT: sub a1, a2, a1 ; RV32I-NEXT: ret ; ; RV64I-LABEL: sdiv64_pow2_negative_65536: @@ -404,11 +404,11 @@ define i64 @sdiv64_pow2_8589934592(i64 %a) { ; RV32I-LABEL: sdiv64_pow2_8589934592: ; RV32I: # %bb.0: # %entry ; RV32I-NEXT: srli a2, a1, 31 -; RV32I-NEXT: add a2, a1, a2 -; RV32I-NEXT: srai a1, a1, 31 -; RV32I-NEXT: add a1, a0, a1 -; RV32I-NEXT: sltu a0, a1, a0 -; RV32I-NEXT: add a1, a2, a0 +; RV32I-NEXT: srai a3, a1, 31 +; RV32I-NEXT: add a3, a0, a3 +; RV32I-NEXT: sltu a0, a3, a0 +; RV32I-NEXT: add a0, a2, a0 +; RV32I-NEXT: add a1, a1, a0 ; RV32I-NEXT: srai a0, a1, 1 ; RV32I-NEXT: srai a1, a1, 31 ; RV32I-NEXT: ret @@ -429,16 +429,16 @@ define i64 @sdiv64_pow2_negative_8589934592(i64 %a) { ; RV32I-LABEL: sdiv64_pow2_negative_8589934592: ; RV32I: # %bb.0: # %entry ; RV32I-NEXT: srli a2, a1, 31 -; RV32I-NEXT: add a2, a1, a2 -; RV32I-NEXT: srai a1, a1, 31 -; RV32I-NEXT: add a1, a0, a1 -; RV32I-NEXT: sltu a0, a1, a0 +; RV32I-NEXT: srai a3, a1, 31 +; RV32I-NEXT: add a3, a0, a3 +; RV32I-NEXT: sltu a0, a3, a0 ; RV32I-NEXT: add a0, a2, a0 +; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: srai a1, a0, 31 ; RV32I-NEXT: srai a0, a0, 1 ; RV32I-NEXT: snez a2, a0 -; RV32I-NEXT: add a1, a1, a2 -; RV32I-NEXT: neg a1, a1 +; RV32I-NEXT: neg a2, a2 +; RV32I-NEXT: sub a1, a2, a1 ; RV32I-NEXT: neg a0, a0 ; RV32I-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/div.ll b/llvm/test/CodeGen/RISCV/div.ll index 1466fb4..9963d04 100644 --- a/llvm/test/CodeGen/RISCV/div.ll +++ b/llvm/test/CodeGen/RISCV/div.ll @@ -195,11 +195,11 @@ define i64 @udiv64_constant(i64 %a) nounwind { ; RV32IM-NEXT: addi a3, a3, -820 ; RV32IM-NEXT: mul a3, a5, a3 ; RV32IM-NEXT: mulhu a6, a5, a4 -; RV32IM-NEXT: add a3, a6, a3 ; RV32IM-NEXT: sltu a0, a0, a2 ; RV32IM-NEXT: sub a1, a1, a0 -; RV32IM-NEXT: mul a1, a1, a4 -; RV32IM-NEXT: add a1, a3, a1 +; RV32IM-NEXT: mul a0, a1, a4 +; RV32IM-NEXT: add a0, a3, a0 +; RV32IM-NEXT: add a1, a6, a0 ; RV32IM-NEXT: mul a0, a5, a4 ; RV32IM-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat.ll b/llvm/test/CodeGen/RISCV/fpclamptosat.ll index 7eb7e14..68a7ec4 100644 --- a/llvm/test/CodeGen/RISCV/fpclamptosat.ll +++ b/llvm/test/CodeGen/RISCV/fpclamptosat.ll @@ -3227,14 +3227,14 @@ define i64 @utest_f64i64_mm(double %x) { ; RV32IF-NEXT: or a4, a1, a0 ; RV32IF-NEXT: snez a4, a4 ; RV32IF-NEXT: addi a4, a4, -1 -; RV32IF-NEXT: and a3, a4, a3 ; RV32IF-NEXT: xori a0, a0, 1 ; RV32IF-NEXT: or a0, a0, a1 ; RV32IF-NEXT: seqz a0, a0 ; RV32IF-NEXT: addi a1, a0, -1 ; RV32IF-NEXT: and a0, a1, a3 -; RV32IF-NEXT: and a2, a4, a2 +; RV32IF-NEXT: and a0, a0, a4 ; RV32IF-NEXT: and a1, a1, a2 +; RV32IF-NEXT: and a1, a1, a4 ; RV32IF-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 32 ; RV32IF-NEXT: ret @@ -3248,11 +3248,11 @@ define i64 @utest_f64i64_mm(double %x) { ; RV64-NEXT: call __fixunsdfti@plt ; RV64-NEXT: snez a2, a1 ; RV64-NEXT: addi a2, a2, -1 -; RV64-NEXT: and a0, a2, a0 ; RV64-NEXT: addi a1, a1, -1 ; RV64-NEXT: seqz a1, a1 ; RV64-NEXT: addi a1, a1, -1 ; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: and a0, a0, a2 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -3272,14 +3272,14 @@ define i64 @utest_f64i64_mm(double %x) { ; RV32IFD-NEXT: or a4, a1, a0 ; RV32IFD-NEXT: snez a4, a4 ; RV32IFD-NEXT: addi a4, a4, -1 -; RV32IFD-NEXT: and a3, a4, a3 ; RV32IFD-NEXT: xori a0, a0, 1 ; RV32IFD-NEXT: or a0, a0, a1 ; RV32IFD-NEXT: seqz a0, a0 ; RV32IFD-NEXT: addi a1, a0, -1 ; RV32IFD-NEXT: and a0, a1, a3 -; RV32IFD-NEXT: and a2, a4, a2 +; RV32IFD-NEXT: and a0, a0, a4 ; RV32IFD-NEXT: and a1, a1, a2 +; RV32IFD-NEXT: and a1, a1, a4 ; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: addi sp, sp, 32 ; RV32IFD-NEXT: ret @@ -3590,14 +3590,14 @@ define i64 @utest_f32i64_mm(float %x) { ; RV32-NEXT: or a4, a1, a0 ; RV32-NEXT: snez a4, a4 ; RV32-NEXT: addi a4, a4, -1 -; RV32-NEXT: and a3, a4, a3 ; RV32-NEXT: xori a0, a0, 1 ; RV32-NEXT: or a0, a0, a1 ; RV32-NEXT: seqz a0, a0 ; RV32-NEXT: addi a1, a0, -1 ; RV32-NEXT: and a0, a1, a3 -; RV32-NEXT: and a2, a4, a2 +; RV32-NEXT: and a0, a0, a4 ; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: and a1, a1, a4 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret @@ -3611,11 +3611,11 @@ define i64 @utest_f32i64_mm(float %x) { ; RV64-NEXT: call __fixunssfti@plt ; RV64-NEXT: snez a2, a1 ; RV64-NEXT: addi a2, a2, -1 -; RV64-NEXT: and a0, a2, a0 ; RV64-NEXT: addi a1, a1, -1 ; RV64-NEXT: seqz a1, a1 ; RV64-NEXT: addi a1, a1, -1 ; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: and a0, a0, a2 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -3901,14 +3901,14 @@ define i64 @utesth_f16i64_mm(half %x) { ; RV32-NEXT: or a4, a1, a0 ; RV32-NEXT: snez a4, a4 ; RV32-NEXT: addi a4, a4, -1 -; RV32-NEXT: and a3, a4, a3 ; RV32-NEXT: xori a0, a0, 1 ; RV32-NEXT: or a0, a0, a1 ; RV32-NEXT: seqz a0, a0 ; RV32-NEXT: addi a1, a0, -1 ; RV32-NEXT: and a0, a1, a3 -; RV32-NEXT: and a2, a4, a2 +; RV32-NEXT: and a0, a0, a4 ; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: and a1, a1, a4 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret @@ -3924,11 +3924,11 @@ define i64 @utesth_f16i64_mm(half %x) { ; RV64-NEXT: call __fixunssfti@plt ; RV64-NEXT: snez a2, a1 ; RV64-NEXT: addi a2, a2, -1 -; RV64-NEXT: and a0, a2, a0 ; RV64-NEXT: addi a1, a1, -1 ; RV64-NEXT: seqz a1, a1 ; RV64-NEXT: addi a1, a1, -1 ; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: and a0, a0, a2 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll index 233b2e6..e27ac74 100644 --- a/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll @@ -5586,17 +5586,17 @@ define <2 x i64> @utest_f64i64_mm(<2 x double> %x) { ; CHECK-NOV-NEXT: call __fixunsdfti@plt ; CHECK-NOV-NEXT: snez a2, a1 ; CHECK-NOV-NEXT: addi a2, a2, -1 -; CHECK-NOV-NEXT: and a0, a2, a0 ; CHECK-NOV-NEXT: addi a1, a1, -1 ; CHECK-NOV-NEXT: seqz a1, a1 ; CHECK-NOV-NEXT: addi a1, a1, -1 ; CHECK-NOV-NEXT: and a0, a1, a0 +; CHECK-NOV-NEXT: and a0, a0, a2 ; CHECK-NOV-NEXT: snez a1, s1 ; CHECK-NOV-NEXT: addi a1, a1, -1 -; CHECK-NOV-NEXT: and a1, a1, s0 ; CHECK-NOV-NEXT: addi s1, s1, -1 ; CHECK-NOV-NEXT: seqz a2, s1 ; CHECK-NOV-NEXT: addi a2, a2, -1 +; CHECK-NOV-NEXT: and a2, a2, s0 ; CHECK-NOV-NEXT: and a1, a2, a1 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -5634,18 +5634,18 @@ define <2 x i64> @utest_f64i64_mm(<2 x double> %x) { ; CHECK-V-NEXT: call __fixunsdfti@plt ; CHECK-V-NEXT: snez a2, s1 ; CHECK-V-NEXT: addi a2, a2, -1 -; CHECK-V-NEXT: and a2, a2, s0 ; CHECK-V-NEXT: addi s1, s1, -1 ; CHECK-V-NEXT: seqz a3, s1 ; CHECK-V-NEXT: addi a3, a3, -1 +; CHECK-V-NEXT: and a3, a3, s0 ; CHECK-V-NEXT: and a2, a3, a2 ; CHECK-V-NEXT: snez a3, a1 ; CHECK-V-NEXT: addi a3, a3, -1 -; CHECK-V-NEXT: and a0, a3, a0 ; CHECK-V-NEXT: addi a1, a1, -1 ; CHECK-V-NEXT: seqz a1, a1 ; CHECK-V-NEXT: addi a1, a1, -1 ; CHECK-V-NEXT: and a0, a1, a0 +; CHECK-V-NEXT: and a0, a0, a3 ; CHECK-V-NEXT: sd a0, 24(sp) ; CHECK-V-NEXT: sd a2, 32(sp) ; CHECK-V-NEXT: addi a0, sp, 24 @@ -6076,17 +6076,17 @@ define <2 x i64> @utest_f32i64_mm(<2 x float> %x) { ; CHECK-NOV-NEXT: call __fixunssfti@plt ; CHECK-NOV-NEXT: snez a2, a1 ; CHECK-NOV-NEXT: addi a2, a2, -1 -; CHECK-NOV-NEXT: and a0, a2, a0 ; CHECK-NOV-NEXT: addi a1, a1, -1 ; CHECK-NOV-NEXT: seqz a1, a1 ; CHECK-NOV-NEXT: addi a1, a1, -1 ; CHECK-NOV-NEXT: and a0, a1, a0 +; CHECK-NOV-NEXT: and a0, a0, a2 ; CHECK-NOV-NEXT: snez a1, s1 ; CHECK-NOV-NEXT: addi a1, a1, -1 -; CHECK-NOV-NEXT: and a1, a1, s0 ; CHECK-NOV-NEXT: addi s1, s1, -1 ; CHECK-NOV-NEXT: seqz a2, s1 ; CHECK-NOV-NEXT: addi a2, a2, -1 +; CHECK-NOV-NEXT: and a2, a2, s0 ; CHECK-NOV-NEXT: and a1, a2, a1 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -6124,18 +6124,18 @@ define <2 x i64> @utest_f32i64_mm(<2 x float> %x) { ; CHECK-V-NEXT: call __fixunssfti@plt ; CHECK-V-NEXT: snez a2, s1 ; CHECK-V-NEXT: addi a2, a2, -1 -; CHECK-V-NEXT: and a2, a2, s0 ; CHECK-V-NEXT: addi s1, s1, -1 ; CHECK-V-NEXT: seqz a3, s1 ; CHECK-V-NEXT: addi a3, a3, -1 +; CHECK-V-NEXT: and a3, a3, s0 ; CHECK-V-NEXT: and a2, a3, a2 ; CHECK-V-NEXT: snez a3, a1 ; CHECK-V-NEXT: addi a3, a3, -1 -; CHECK-V-NEXT: and a0, a3, a0 ; CHECK-V-NEXT: addi a1, a1, -1 ; CHECK-V-NEXT: seqz a1, a1 ; CHECK-V-NEXT: addi a1, a1, -1 ; CHECK-V-NEXT: and a0, a1, a0 +; CHECK-V-NEXT: and a0, a0, a3 ; CHECK-V-NEXT: sd a0, 24(sp) ; CHECK-V-NEXT: sd a2, 32(sp) ; CHECK-V-NEXT: addi a0, sp, 24 @@ -6561,17 +6561,17 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { ; CHECK-NOV-NEXT: call __fixunssfti@plt ; CHECK-NOV-NEXT: snez a2, a1 ; CHECK-NOV-NEXT: addi a2, a2, -1 -; CHECK-NOV-NEXT: and a0, a2, a0 ; CHECK-NOV-NEXT: addi a1, a1, -1 ; CHECK-NOV-NEXT: seqz a1, a1 ; CHECK-NOV-NEXT: addi a1, a1, -1 ; CHECK-NOV-NEXT: and a0, a1, a0 +; CHECK-NOV-NEXT: and a0, a0, a2 ; CHECK-NOV-NEXT: snez a1, s2 ; CHECK-NOV-NEXT: addi a1, a1, -1 -; CHECK-NOV-NEXT: and a1, a1, s1 ; CHECK-NOV-NEXT: addi s2, s2, -1 ; CHECK-NOV-NEXT: seqz a2, s2 ; CHECK-NOV-NEXT: addi a2, a2, -1 +; CHECK-NOV-NEXT: and a2, a2, s1 ; CHECK-NOV-NEXT: and a1, a2, a1 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -6603,17 +6603,17 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { ; CHECK-V-NEXT: call __fixunssfti@plt ; CHECK-V-NEXT: snez a2, a1 ; CHECK-V-NEXT: addi a2, a2, -1 -; CHECK-V-NEXT: and a0, a2, a0 ; CHECK-V-NEXT: addi a1, a1, -1 ; CHECK-V-NEXT: seqz a1, a1 ; CHECK-V-NEXT: addi a1, a1, -1 ; CHECK-V-NEXT: and a0, a1, a0 +; CHECK-V-NEXT: and a0, a0, a2 ; CHECK-V-NEXT: snez a1, s2 ; CHECK-V-NEXT: addi a1, a1, -1 -; CHECK-V-NEXT: and a1, a1, s1 ; CHECK-V-NEXT: addi s2, s2, -1 ; CHECK-V-NEXT: seqz a2, s2 ; CHECK-V-NEXT: addi a2, a2, -1 +; CHECK-V-NEXT: and a2, a2, s1 ; CHECK-V-NEXT: and a1, a2, a1 ; CHECK-V-NEXT: sd a1, 8(sp) ; CHECK-V-NEXT: sd a0, 0(sp) diff --git a/llvm/test/CodeGen/RISCV/iabs.ll b/llvm/test/CodeGen/RISCV/iabs.ll index 0a202e1..9744b83 100644 --- a/llvm/test/CodeGen/RISCV/iabs.ll +++ b/llvm/test/CodeGen/RISCV/iabs.ll @@ -317,8 +317,8 @@ define i128 @abs128(i128 %x) { ; RV32I-NEXT: sltu t0, a7, a6 ; RV32I-NEXT: snez a1, a1 ; RV32I-NEXT: add a1, a4, a1 -; RV32I-NEXT: add a1, a1, t0 -; RV32I-NEXT: neg a4, a1 +; RV32I-NEXT: neg a4, t0 +; RV32I-NEXT: sub a4, a4, a1 ; RV32I-NEXT: sub a1, a7, a6 ; RV32I-NEXT: add a2, a2, a5 ; RV32I-NEXT: neg a2, a2 @@ -348,8 +348,8 @@ define i128 @abs128(i128 %x) { ; RV32ZBB-NEXT: sltu t0, a7, a6 ; RV32ZBB-NEXT: snez a1, a1 ; RV32ZBB-NEXT: add a1, a4, a1 -; RV32ZBB-NEXT: add a1, a1, t0 -; RV32ZBB-NEXT: neg a4, a1 +; RV32ZBB-NEXT: neg a4, t0 +; RV32ZBB-NEXT: sub a4, a4, a1 ; RV32ZBB-NEXT: sub a1, a7, a6 ; RV32ZBB-NEXT: add a2, a2, a5 ; RV32ZBB-NEXT: neg a2, a2 @@ -405,8 +405,8 @@ define i128 @select_abs128(i128 %x) { ; RV32I-NEXT: sltu t0, a7, a6 ; RV32I-NEXT: snez a1, a1 ; RV32I-NEXT: add a1, a4, a1 -; RV32I-NEXT: add a1, a1, t0 -; RV32I-NEXT: neg a4, a1 +; RV32I-NEXT: neg a4, t0 +; RV32I-NEXT: sub a4, a4, a1 ; RV32I-NEXT: sub a1, a7, a6 ; RV32I-NEXT: add a2, a2, a5 ; RV32I-NEXT: neg a2, a2 @@ -436,8 +436,8 @@ define i128 @select_abs128(i128 %x) { ; RV32ZBB-NEXT: sltu t0, a7, a6 ; RV32ZBB-NEXT: snez a1, a1 ; RV32ZBB-NEXT: add a1, a4, a1 -; RV32ZBB-NEXT: add a1, a1, t0 -; RV32ZBB-NEXT: neg a4, a1 +; RV32ZBB-NEXT: neg a4, t0 +; RV32ZBB-NEXT: sub a4, a4, a1 ; RV32ZBB-NEXT: sub a1, a7, a6 ; RV32ZBB-NEXT: add a2, a2, a5 ; RV32ZBB-NEXT: neg a2, a2 diff --git a/llvm/test/CodeGen/RISCV/machine-combiner.ll b/llvm/test/CodeGen/RISCV/machine-combiner.ll index 5555104..2ddf94b 100644 --- a/llvm/test/CodeGen/RISCV/machine-combiner.ll +++ b/llvm/test/CodeGen/RISCV/machine-combiner.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+d -verify-machineinstrs -mcpu=sifive-u74 \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zbb -verify-machineinstrs -mcpu=sifive-u74 \ ; RUN: -O1 -riscv-enable-machine-combiner=true < %s | \ ; RUN: FileCheck %s @@ -393,3 +393,614 @@ define double @test_reassoc_fsub12(double %a0, double %a1, double %a2, double %a %t2 = fsub nsz reassoc double %a3, %t1 ret double %t2 } + +define i8 @test_reassoc_add_i8(i8 %a0, i8 %a1, i8 %a2, i8 %a3) { +; CHECK-LABEL: test_reassoc_add_i8: +; CHECK: # %bb.0: +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: add a2, a2, a3 +; CHECK-NEXT: add a0, a0, a2 +; CHECK-NEXT: ret + %t0 = add i8 %a0, %a1 + %t1 = add i8 %t0, %a2 + %t2 = add i8 %t1, %a3 + ret i8 %t2 +} + +define i16 @test_reassoc_add_i16(i16 %a0, i16 %a1, i16 %a2, i16 %a3) { +; CHECK-LABEL: test_reassoc_add_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: add a2, a2, a3 +; CHECK-NEXT: add a0, a0, a2 +; CHECK-NEXT: ret + %t0 = add i16 %a0, %a1 + %t1 = add i16 %t0, %a2 + %t2 = add i16 %t1, %a3 + ret i16 %t2 +} + +define i32 @test_reassoc_add_i32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { +; CHECK-LABEL: test_reassoc_add_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: add a2, a2, a3 +; CHECK-NEXT: addw a0, a0, a2 +; CHECK-NEXT: ret + %t0 = add i32 %a0, %a1 + %t1 = add i32 %t0, %a2 + %t2 = add i32 %t1, %a3 + ret i32 %t2 +} + +define i64 @test_reassoc_add_i64(i64 %a0, i64 %a1, i64 %a2, i64 %a3) { +; CHECK-LABEL: test_reassoc_add_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: add a2, a2, a3 +; CHECK-NEXT: add a0, a0, a2 +; CHECK-NEXT: ret + %t0 = add i64 %a0, %a1 + %t1 = add i64 %t0, %a2 + %t2 = add i64 %t1, %a3 + ret i64 %t2 +} + +define i32 @test_reassoc_add_sub_i32_1(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { +; CHECK-LABEL: test_reassoc_add_sub_i32_1: +; CHECK: # %bb.0: +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: subw a2, a2, a3 +; CHECK-NEXT: subw a0, a0, a2 +; CHECK-NEXT: ret + %t0 = add i32 %a0, %a1 + %t1 = sub i32 %t0, %a2 + %t2 = add i32 %t1, %a3 + ret i32 %t2 +} + +define i32 @test_reassoc_add_sub_i32_2(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { +; CHECK-LABEL: test_reassoc_add_sub_i32_2: +; CHECK: # %bb.0: +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: subw a2, a2, a3 +; CHECK-NEXT: addw a0, a0, a2 +; CHECK-NEXT: ret + %t0 = add i32 %a0, %a1 + %t1 = add i32 %t0, %a2 + %t2 = sub i32 %t1, %a3 + ret i32 %t2 +} + +define i32 @test_reassoc_add_sub_i32_3(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { +; CHECK-LABEL: test_reassoc_add_sub_i32_3: +; CHECK: # %bb.0: +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: add a2, a2, a3 +; CHECK-NEXT: subw a0, a0, a2 +; CHECK-NEXT: ret + %t0 = add i32 %a0, %a1 + %t1 = sub i32 %t0, %a2 + %t2 = sub i32 %t1, %a3 + ret i32 %t2 +} + +define i64 @test_reassoc_add_sub_i64_1(i64 %a0, i64 %a1, i64 %a2, i64 %a3) { +; CHECK-LABEL: test_reassoc_add_sub_i64_1: +; CHECK: # %bb.0: +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: sub a2, a2, a3 +; CHECK-NEXT: sub a0, a0, a2 +; CHECK-NEXT: ret + %t0 = add i64 %a0, %a1 + %t1 = sub i64 %t0, %a2 + %t2 = add i64 %t1, %a3 + ret i64 %t2 +} + +define i64 @test_reassoc_add_sub_i64_2(i64 %a0, i64 %a1, i64 %a2, i64 %a3) { +; CHECK-LABEL: test_reassoc_add_sub_i64_2: +; CHECK: # %bb.0: +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: sub a2, a2, a3 +; CHECK-NEXT: add a0, a0, a2 +; CHECK-NEXT: ret + %t0 = add i64 %a0, %a1 + %t1 = add i64 %t0, %a2 + %t2 = sub i64 %t1, %a3 + ret i64 %t2 +} + +define i64 @test_reassoc_add_sub_i64_3(i64 %a0, i64 %a1, i64 %a2, i64 %a3) { +; CHECK-LABEL: test_reassoc_add_sub_i64_3: +; CHECK: # %bb.0: +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: add a2, a2, a3 +; CHECK-NEXT: sub a0, a0, a2 +; CHECK-NEXT: ret + %t0 = add i64 %a0, %a1 + %t1 = sub i64 %t0, %a2 + %t2 = sub i64 %t1, %a3 + ret i64 %t2 +} + +define i8 @test_reassoc_and_i8(i8 %a0, i8 %a1, i8 %a2, i8 %a3) { +; CHECK-LABEL: test_reassoc_and_i8: +; CHECK: # %bb.0: +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: and a2, a2, a3 +; CHECK-NEXT: and a0, a0, a2 +; CHECK-NEXT: ret + %t0 = and i8 %a0, %a1 + %t1 = and i8 %t0, %a2 + %t2 = and i8 %t1, %a3 + ret i8 %t2 +} + +define i16 @test_reassoc_and_i16(i16 %a0, i16 %a1, i16 %a2, i16 %a3) { +; CHECK-LABEL: test_reassoc_and_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: and a2, a2, a3 +; CHECK-NEXT: and a0, a0, a2 +; CHECK-NEXT: ret + %t0 = and i16 %a0, %a1 + %t1 = and i16 %t0, %a2 + %t2 = and i16 %t1, %a3 + ret i16 %t2 +} + +define i32 @test_reassoc_and_i32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { +; CHECK-LABEL: test_reassoc_and_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: and a2, a2, a3 +; CHECK-NEXT: and a0, a0, a2 +; CHECK-NEXT: ret + %t0 = and i32 %a0, %a1 + %t1 = and i32 %t0, %a2 + %t2 = and i32 %t1, %a3 + ret i32 %t2 +} + +define i64 @test_reassoc_and_i64(i64 %a0, i64 %a1, i64 %a2, i64 %a3) { +; CHECK-LABEL: test_reassoc_and_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: and a2, a2, a3 +; CHECK-NEXT: and a0, a0, a2 +; CHECK-NEXT: ret + %t0 = and i64 %a0, %a1 + %t1 = and i64 %t0, %a2 + %t2 = and i64 %t1, %a3 + ret i64 %t2 +} + +define i8 @test_reassoc_or_i8(i8 %a0, i8 %a1, i8 %a2, i8 %a3) { +; CHECK-LABEL: test_reassoc_or_i8: +; CHECK: # %bb.0: +; CHECK-NEXT: or a0, a0, a1 +; CHECK-NEXT: or a2, a2, a3 +; CHECK-NEXT: or a0, a0, a2 +; CHECK-NEXT: ret + %t0 = or i8 %a0, %a1 + %t1 = or i8 %t0, %a2 + %t2 = or i8 %t1, %a3 + ret i8 %t2 +} + +define i16 @test_reassoc_or_i16(i16 %a0, i16 %a1, i16 %a2, i16 %a3) { +; CHECK-LABEL: test_reassoc_or_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: or a0, a0, a1 +; CHECK-NEXT: or a2, a2, a3 +; CHECK-NEXT: or a0, a0, a2 +; CHECK-NEXT: ret + %t0 = or i16 %a0, %a1 + %t1 = or i16 %t0, %a2 + %t2 = or i16 %t1, %a3 + ret i16 %t2 +} + +define i32 @test_reassoc_or_i32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { +; CHECK-LABEL: test_reassoc_or_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: or a0, a0, a1 +; CHECK-NEXT: or a2, a2, a3 +; CHECK-NEXT: or a0, a0, a2 +; CHECK-NEXT: ret + %t0 = or i32 %a0, %a1 + %t1 = or i32 %t0, %a2 + %t2 = or i32 %t1, %a3 + ret i32 %t2 +} + +define i64 @test_reassoc_or_i64(i64 %a0, i64 %a1, i64 %a2, i64 %a3) { +; CHECK-LABEL: test_reassoc_or_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: or a0, a0, a1 +; CHECK-NEXT: or a2, a2, a3 +; CHECK-NEXT: or a0, a0, a2 +; CHECK-NEXT: ret + %t0 = or i64 %a0, %a1 + %t1 = or i64 %t0, %a2 + %t2 = or i64 %t1, %a3 + ret i64 %t2 +} + +define i8 @test_reassoc_xor_i8(i8 %a0, i8 %a1, i8 %a2, i8 %a3) { +; CHECK-LABEL: test_reassoc_xor_i8: +; CHECK: # %bb.0: +; CHECK-NEXT: xor a0, a0, a1 +; CHECK-NEXT: xor a2, a2, a3 +; CHECK-NEXT: xor a0, a0, a2 +; CHECK-NEXT: ret + %t0 = xor i8 %a0, %a1 + %t1 = xor i8 %t0, %a2 + %t2 = xor i8 %t1, %a3 + ret i8 %t2 +} + +define i16 @test_reassoc_xor_i16(i16 %a0, i16 %a1, i16 %a2, i16 %a3) { +; CHECK-LABEL: test_reassoc_xor_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: xor a0, a0, a1 +; CHECK-NEXT: xor a2, a2, a3 +; CHECK-NEXT: xor a0, a0, a2 +; CHECK-NEXT: ret + %t0 = xor i16 %a0, %a1 + %t1 = xor i16 %t0, %a2 + %t2 = xor i16 %t1, %a3 + ret i16 %t2 +} + +define i32 @test_reassoc_xor_i32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { +; CHECK-LABEL: test_reassoc_xor_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xor a0, a0, a1 +; CHECK-NEXT: xor a2, a2, a3 +; CHECK-NEXT: xor a0, a0, a2 +; CHECK-NEXT: ret + %t0 = xor i32 %a0, %a1 + %t1 = xor i32 %t0, %a2 + %t2 = xor i32 %t1, %a3 + ret i32 %t2 +} + +define i64 @test_reassoc_xor_i64(i64 %a0, i64 %a1, i64 %a2, i64 %a3) { +; CHECK-LABEL: test_reassoc_xor_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: xor a0, a0, a1 +; CHECK-NEXT: xor a2, a2, a3 +; CHECK-NEXT: xor a0, a0, a2 +; CHECK-NEXT: ret + %t0 = xor i64 %a0, %a1 + %t1 = xor i64 %t0, %a2 + %t2 = xor i64 %t1, %a3 + ret i64 %t2 +} + +define i8 @test_reassoc_mul_i8(i8 %a0, i8 %a1, i8 %a2, i8 %a3) { +; CHECK-LABEL: test_reassoc_mul_i8: +; CHECK: # %bb.0: +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: mul a1, a2, a3 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: ret + %t0 = mul i8 %a0, %a1 + %t1 = mul i8 %t0, %a2 + %t2 = mul i8 %t1, %a3 + ret i8 %t2 +} + +define i16 @test_reassoc_mul_i16(i16 %a0, i16 %a1, i16 %a2, i16 %a3) { +; CHECK-LABEL: test_reassoc_mul_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: mul a1, a2, a3 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: ret + %t0 = mul i16 %a0, %a1 + %t1 = mul i16 %t0, %a2 + %t2 = mul i16 %t1, %a3 + ret i16 %t2 +} + +define i32 @test_reassoc_mul_i32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { +; CHECK-LABEL: test_reassoc_mul_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: mulw a0, a0, a1 +; CHECK-NEXT: mulw a1, a2, a3 +; CHECK-NEXT: mulw a0, a0, a1 +; CHECK-NEXT: ret + %t0 = mul i32 %a0, %a1 + %t1 = mul i32 %t0, %a2 + %t2 = mul i32 %t1, %a3 + ret i32 %t2 +} + +define i64 @test_reassoc_mul_i64(i64 %a0, i64 %a1, i64 %a2, i64 %a3) { +; CHECK-LABEL: test_reassoc_mul_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: mul a1, a2, a3 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: ret + %t0 = mul i64 %a0, %a1 + %t1 = mul i64 %t0, %a2 + %t2 = mul i64 %t1, %a3 + ret i64 %t2 +} + +define i8 @test_reassoc_minu_i8(i8 %a0, i8 %a1, i8 %a2, i8 %a3) { +; CHECK-LABEL: test_reassoc_minu_i8: +; CHECK: # %bb.0: +; CHECK-NEXT: andi a3, a3, 255 +; CHECK-NEXT: andi a2, a2, 255 +; CHECK-NEXT: andi a1, a1, 255 +; CHECK-NEXT: andi a0, a0, 255 +; CHECK-NEXT: minu a0, a0, a1 +; CHECK-NEXT: minu a1, a2, a3 +; CHECK-NEXT: minu a0, a0, a1 +; CHECK-NEXT: ret + %t0 = call i8 @llvm.umin.i8(i8 %a0, i8 %a1) + %t1 = call i8 @llvm.umin.i8(i8 %t0, i8 %a2) + %t2 = call i8 @llvm.umin.i8(i8 %t1, i8 %a3) + ret i8 %t2 +} + +define i16 @test_reassoc_minu_i16(i16 %a0, i16 %a1, i16 %a2, i16 %a3) { +; CHECK-LABEL: test_reassoc_minu_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: zext.h a3, a3 +; CHECK-NEXT: zext.h a2, a2 +; CHECK-NEXT: zext.h a1, a1 +; CHECK-NEXT: zext.h a0, a0 +; CHECK-NEXT: minu a0, a0, a1 +; CHECK-NEXT: minu a1, a2, a3 +; CHECK-NEXT: minu a0, a0, a1 +; CHECK-NEXT: ret + %t0 = call i16 @llvm.umin.i16(i16 %a0, i16 %a1) + %t1 = call i16 @llvm.umin.i16(i16 %t0, i16 %a2) + %t2 = call i16 @llvm.umin.i16(i16 %t1, i16 %a3) + ret i16 %t2 +} + +define i32 @test_reassoc_minu_i32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { +; CHECK-LABEL: test_reassoc_minu_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: sext.w a3, a3 +; CHECK-NEXT: sext.w a2, a2 +; CHECK-NEXT: sext.w a1, a1 +; CHECK-NEXT: sext.w a0, a0 +; CHECK-NEXT: minu a0, a0, a1 +; CHECK-NEXT: minu a1, a2, a3 +; CHECK-NEXT: minu a0, a0, a1 +; CHECK-NEXT: ret + %t0 = call i32 @llvm.umin.i32(i32 %a0, i32 %a1) + %t1 = call i32 @llvm.umin.i32(i32 %t0, i32 %a2) + %t2 = call i32 @llvm.umin.i32(i32 %t1, i32 %a3) + ret i32 %t2 +} + +define i64 @test_reassoc_minu_i64(i64 %a0, i64 %a1, i64 %a2, i64 %a3) { +; CHECK-LABEL: test_reassoc_minu_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: minu a0, a0, a1 +; CHECK-NEXT: minu a1, a2, a3 +; CHECK-NEXT: minu a0, a0, a1 +; CHECK-NEXT: ret + %t0 = call i64 @llvm.umin.i64(i64 %a0, i64 %a1) + %t1 = call i64 @llvm.umin.i64(i64 %t0, i64 %a2) + %t2 = call i64 @llvm.umin.i64(i64 %t1, i64 %a3) + ret i64 %t2 +} + +define i8 @test_reassoc_min_i8(i8 %a0, i8 %a1, i8 %a2, i8 %a3) { +; CHECK-LABEL: test_reassoc_min_i8: +; CHECK: # %bb.0: +; CHECK-NEXT: sext.b a3, a3 +; CHECK-NEXT: sext.b a2, a2 +; CHECK-NEXT: sext.b a1, a1 +; CHECK-NEXT: sext.b a0, a0 +; CHECK-NEXT: min a0, a0, a1 +; CHECK-NEXT: min a1, a2, a3 +; CHECK-NEXT: min a0, a0, a1 +; CHECK-NEXT: ret + %t0 = call i8 @llvm.smin.i8(i8 %a0, i8 %a1) + %t1 = call i8 @llvm.smin.i8(i8 %t0, i8 %a2) + %t2 = call i8 @llvm.smin.i8(i8 %t1, i8 %a3) + ret i8 %t2 +} + +define i16 @test_reassoc_min_i16(i16 %a0, i16 %a1, i16 %a2, i16 %a3) { +; CHECK-LABEL: test_reassoc_min_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: sext.h a3, a3 +; CHECK-NEXT: sext.h a2, a2 +; CHECK-NEXT: sext.h a1, a1 +; CHECK-NEXT: sext.h a0, a0 +; CHECK-NEXT: min a0, a0, a1 +; CHECK-NEXT: min a1, a2, a3 +; CHECK-NEXT: min a0, a0, a1 +; CHECK-NEXT: ret + %t0 = call i16 @llvm.smin.i16(i16 %a0, i16 %a1) + %t1 = call i16 @llvm.smin.i16(i16 %t0, i16 %a2) + %t2 = call i16 @llvm.smin.i16(i16 %t1, i16 %a3) + ret i16 %t2 +} + +define i32 @test_reassoc_min_i32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { +; CHECK-LABEL: test_reassoc_min_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: sext.w a3, a3 +; CHECK-NEXT: sext.w a2, a2 +; CHECK-NEXT: sext.w a1, a1 +; CHECK-NEXT: sext.w a0, a0 +; CHECK-NEXT: min a0, a0, a1 +; CHECK-NEXT: min a1, a2, a3 +; CHECK-NEXT: min a0, a0, a1 +; CHECK-NEXT: ret + %t0 = call i32 @llvm.smin.i32(i32 %a0, i32 %a1) + %t1 = call i32 @llvm.smin.i32(i32 %t0, i32 %a2) + %t2 = call i32 @llvm.smin.i32(i32 %t1, i32 %a3) + ret i32 %t2 +} + +define i64 @test_reassoc_min_i64(i64 %a0, i64 %a1, i64 %a2, i64 %a3) { +; CHECK-LABEL: test_reassoc_min_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: min a0, a0, a1 +; CHECK-NEXT: min a1, a2, a3 +; CHECK-NEXT: min a0, a0, a1 +; CHECK-NEXT: ret + %t0 = call i64 @llvm.smin.i64(i64 %a0, i64 %a1) + %t1 = call i64 @llvm.smin.i64(i64 %t0, i64 %a2) + %t2 = call i64 @llvm.smin.i64(i64 %t1, i64 %a3) + ret i64 %t2 +} + +define i8 @test_reassoc_maxu_i8(i8 %a0, i8 %a1, i8 %a2, i8 %a3) { +; CHECK-LABEL: test_reassoc_maxu_i8: +; CHECK: # %bb.0: +; CHECK-NEXT: andi a3, a3, 255 +; CHECK-NEXT: andi a2, a2, 255 +; CHECK-NEXT: andi a1, a1, 255 +; CHECK-NEXT: andi a0, a0, 255 +; CHECK-NEXT: maxu a0, a0, a1 +; CHECK-NEXT: maxu a1, a2, a3 +; CHECK-NEXT: maxu a0, a0, a1 +; CHECK-NEXT: ret + %t0 = call i8 @llvm.umax.i8(i8 %a0, i8 %a1) + %t1 = call i8 @llvm.umax.i8(i8 %t0, i8 %a2) + %t2 = call i8 @llvm.umax.i8(i8 %t1, i8 %a3) + ret i8 %t2 +} + +define i16 @test_reassoc_maxu_i16(i16 %a0, i16 %a1, i16 %a2, i16 %a3) { +; CHECK-LABEL: test_reassoc_maxu_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: zext.h a3, a3 +; CHECK-NEXT: zext.h a2, a2 +; CHECK-NEXT: zext.h a1, a1 +; CHECK-NEXT: zext.h a0, a0 +; CHECK-NEXT: maxu a0, a0, a1 +; CHECK-NEXT: maxu a1, a2, a3 +; CHECK-NEXT: maxu a0, a0, a1 +; CHECK-NEXT: ret + %t0 = call i16 @llvm.umax.i16(i16 %a0, i16 %a1) + %t1 = call i16 @llvm.umax.i16(i16 %t0, i16 %a2) + %t2 = call i16 @llvm.umax.i16(i16 %t1, i16 %a3) + ret i16 %t2 +} + +define i32 @test_reassoc_maxu_i32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { +; CHECK-LABEL: test_reassoc_maxu_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: sext.w a3, a3 +; CHECK-NEXT: sext.w a2, a2 +; CHECK-NEXT: sext.w a1, a1 +; CHECK-NEXT: sext.w a0, a0 +; CHECK-NEXT: maxu a0, a0, a1 +; CHECK-NEXT: maxu a1, a2, a3 +; CHECK-NEXT: maxu a0, a0, a1 +; CHECK-NEXT: ret + %t0 = call i32 @llvm.umax.i32(i32 %a0, i32 %a1) + %t1 = call i32 @llvm.umax.i32(i32 %t0, i32 %a2) + %t2 = call i32 @llvm.umax.i32(i32 %t1, i32 %a3) + ret i32 %t2 +} + +define i64 @test_reassoc_maxu_i64(i64 %a0, i64 %a1, i64 %a2, i64 %a3) { +; CHECK-LABEL: test_reassoc_maxu_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: maxu a0, a0, a1 +; CHECK-NEXT: maxu a1, a2, a3 +; CHECK-NEXT: maxu a0, a0, a1 +; CHECK-NEXT: ret + %t0 = call i64 @llvm.umax.i64(i64 %a0, i64 %a1) + %t1 = call i64 @llvm.umax.i64(i64 %t0, i64 %a2) + %t2 = call i64 @llvm.umax.i64(i64 %t1, i64 %a3) + ret i64 %t2 +} + +define i8 @test_reassoc_max_i8(i8 %a0, i8 %a1, i8 %a2, i8 %a3) { +; CHECK-LABEL: test_reassoc_max_i8: +; CHECK: # %bb.0: +; CHECK-NEXT: sext.b a3, a3 +; CHECK-NEXT: sext.b a2, a2 +; CHECK-NEXT: sext.b a1, a1 +; CHECK-NEXT: sext.b a0, a0 +; CHECK-NEXT: max a0, a0, a1 +; CHECK-NEXT: max a1, a2, a3 +; CHECK-NEXT: max a0, a0, a1 +; CHECK-NEXT: ret + %t0 = call i8 @llvm.smax.i8(i8 %a0, i8 %a1) + %t1 = call i8 @llvm.smax.i8(i8 %t0, i8 %a2) + %t2 = call i8 @llvm.smax.i8(i8 %t1, i8 %a3) + ret i8 %t2 +} + +define i16 @test_reassoc_max_i16(i16 %a0, i16 %a1, i16 %a2, i16 %a3) { +; CHECK-LABEL: test_reassoc_max_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: sext.h a3, a3 +; CHECK-NEXT: sext.h a2, a2 +; CHECK-NEXT: sext.h a1, a1 +; CHECK-NEXT: sext.h a0, a0 +; CHECK-NEXT: max a0, a0, a1 +; CHECK-NEXT: max a1, a2, a3 +; CHECK-NEXT: max a0, a0, a1 +; CHECK-NEXT: ret + %t0 = call i16 @llvm.smax.i16(i16 %a0, i16 %a1) + %t1 = call i16 @llvm.smax.i16(i16 %t0, i16 %a2) + %t2 = call i16 @llvm.smax.i16(i16 %t1, i16 %a3) + ret i16 %t2 +} + +define i32 @test_reassoc_max_i32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { +; CHECK-LABEL: test_reassoc_max_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: sext.w a3, a3 +; CHECK-NEXT: sext.w a2, a2 +; CHECK-NEXT: sext.w a1, a1 +; CHECK-NEXT: sext.w a0, a0 +; CHECK-NEXT: max a0, a0, a1 +; CHECK-NEXT: max a1, a2, a3 +; CHECK-NEXT: max a0, a0, a1 +; CHECK-NEXT: ret + %t0 = call i32 @llvm.smax.i32(i32 %a0, i32 %a1) + %t1 = call i32 @llvm.smax.i32(i32 %t0, i32 %a2) + %t2 = call i32 @llvm.smax.i32(i32 %t1, i32 %a3) + ret i32 %t2 +} + +define i64 @test_reassoc_max_i64(i64 %a0, i64 %a1, i64 %a2, i64 %a3) { +; CHECK-LABEL: test_reassoc_max_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: max a0, a0, a1 +; CHECK-NEXT: max a1, a2, a3 +; CHECK-NEXT: max a0, a0, a1 +; CHECK-NEXT: ret + %t0 = call i64 @llvm.smax.i64(i64 %a0, i64 %a1) + %t1 = call i64 @llvm.smax.i64(i64 %t0, i64 %a2) + %t2 = call i64 @llvm.smax.i64(i64 %t1, i64 %a3) + ret i64 %t2 +} + +declare i8 @llvm.umin.i8(i8 %a, i8 %b) +declare i16 @llvm.umin.i16(i16 %a, i16 %b) +declare i32 @llvm.umin.i32(i32 %a, i32 %b) +declare i64 @llvm.umin.i64(i64 %a, i64 %b) +declare i8 @llvm.smin.i8(i8 %a, i8 %b) +declare i16 @llvm.smin.i16(i16 %a, i16 %b) +declare i32 @llvm.smin.i32(i32 %a, i32 %b) +declare i64 @llvm.smin.i64(i64 %a, i64 %b) +declare i8 @llvm.umax.i8(i8 %a, i8 %b) +declare i16 @llvm.umax.i16(i16 %a, i16 %b) +declare i32 @llvm.umax.i32(i32 %a, i32 %b) +declare i64 @llvm.umax.i64(i64 %a, i64 %b) +declare i8 @llvm.smax.i8(i8 %a, i8 %b) +declare i16 @llvm.smax.i16(i16 %a, i16 %b) +declare i32 @llvm.smax.i32(i32 %a, i32 %b) +declare i64 @llvm.smax.i64(i64 %a, i64 %b) diff --git a/llvm/test/CodeGen/RISCV/mul.ll b/llvm/test/CodeGen/RISCV/mul.ll index c3985de..0743e9f 100644 --- a/llvm/test/CodeGen/RISCV/mul.ll +++ b/llvm/test/CodeGen/RISCV/mul.ll @@ -142,9 +142,9 @@ define i64 @mul64(i64 %a, i64 %b) nounwind { ; RV32IM: # %bb.0: ; RV32IM-NEXT: mul a3, a0, a3 ; RV32IM-NEXT: mulhu a4, a0, a2 -; RV32IM-NEXT: add a3, a4, a3 ; RV32IM-NEXT: mul a1, a1, a2 ; RV32IM-NEXT: add a1, a3, a1 +; RV32IM-NEXT: add a1, a4, a1 ; RV32IM-NEXT: mul a0, a0, a2 ; RV32IM-NEXT: ret ; @@ -169,8 +169,8 @@ define i64 @mul64_constant(i64 %a) nounwind { ; RV32I-NEXT: srli a0, a0, 30 ; RV32I-NEXT: slli a4, a1, 2 ; RV32I-NEXT: or a0, a4, a0 -; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: add a1, a0, a3 +; RV32I-NEXT: add a1, a1, a3 +; RV32I-NEXT: add a1, a0, a1 ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: ret ; @@ -179,8 +179,8 @@ define i64 @mul64_constant(i64 %a) nounwind { ; RV32IM-NEXT: li a2, 5 ; RV32IM-NEXT: mulhu a2, a0, a2 ; RV32IM-NEXT: slli a3, a1, 2 -; RV32IM-NEXT: add a1, a3, a1 ; RV32IM-NEXT: add a1, a2, a1 +; RV32IM-NEXT: add a1, a1, a3 ; RV32IM-NEXT: slli a2, a0, 2 ; RV32IM-NEXT: add a0, a2, a0 ; RV32IM-NEXT: ret @@ -256,8 +256,8 @@ define i32 @mulhs_positive_constant(i32 %a) nounwind { ; RV32I-NEXT: srli a0, a0, 30 ; RV32I-NEXT: slli a3, a1, 2 ; RV32I-NEXT: or a0, a3, a0 +; RV32I-NEXT: add a1, a1, a2 ; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: add a0, a0, a2 ; RV32I-NEXT: ret ; ; RV32IM-LABEL: mulhs_positive_constant: @@ -298,11 +298,11 @@ define i32 @mulhs_negative_constant(i32 %a) nounwind { ; RV32I-NEXT: srli a0, a0, 30 ; RV32I-NEXT: slli a4, a1, 2 ; RV32I-NEXT: or a0, a4, a0 -; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: add a0, a0, a2 -; RV32I-NEXT: snez a1, a3 -; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: neg a0, a0 +; RV32I-NEXT: add a1, a1, a2 +; RV32I-NEXT: snez a2, a3 +; RV32I-NEXT: add a1, a1, a2 +; RV32I-NEXT: neg a1, a1 +; RV32I-NEXT: sub a0, a1, a0 ; RV32I-NEXT: ret ; ; RV32IM-LABEL: mulhs_negative_constant: @@ -315,8 +315,8 @@ define i32 @mulhs_negative_constant(i32 %a) nounwind { ; RV64I: # %bb.0: ; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: slli a1, a0, 2 -; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: neg a0, a0 +; RV64I-NEXT: sub a0, a0, a1 ; RV64I-NEXT: srli a0, a0, 32 ; RV64I-NEXT: ret ; @@ -324,8 +324,8 @@ define i32 @mulhs_negative_constant(i32 %a) nounwind { ; RV64IM: # %bb.0: ; RV64IM-NEXT: sext.w a0, a0 ; RV64IM-NEXT: slli a1, a0, 2 -; RV64IM-NEXT: add a0, a1, a0 ; RV64IM-NEXT: neg a0, a0 +; RV64IM-NEXT: sub a0, a0, a1 ; RV64IM-NEXT: srli a0, a0, 32 ; RV64IM-NEXT: ret %1 = sext i32 %a to i64 @@ -530,8 +530,8 @@ define i64 @muli64_p65(i64 %a) nounwind { ; RV32I-NEXT: srli a0, a0, 26 ; RV32I-NEXT: slli a4, a1, 6 ; RV32I-NEXT: or a0, a4, a0 -; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: add a1, a0, a3 +; RV32I-NEXT: add a1, a1, a3 +; RV32I-NEXT: add a1, a0, a1 ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: ret ; @@ -540,8 +540,8 @@ define i64 @muli64_p65(i64 %a) nounwind { ; RV32IM-NEXT: li a2, 65 ; RV32IM-NEXT: mulhu a2, a0, a2 ; RV32IM-NEXT: slli a3, a1, 6 -; RV32IM-NEXT: add a1, a3, a1 ; RV32IM-NEXT: add a1, a2, a1 +; RV32IM-NEXT: add a1, a1, a3 ; RV32IM-NEXT: slli a2, a0, 6 ; RV32IM-NEXT: add a0, a2, a0 ; RV32IM-NEXT: ret @@ -569,8 +569,8 @@ define i64 @muli64_p63(i64 %a) nounwind { ; RV32I-NEXT: srli a4, a0, 26 ; RV32I-NEXT: slli a5, a1, 6 ; RV32I-NEXT: or a4, a5, a4 +; RV32I-NEXT: add a1, a1, a3 ; RV32I-NEXT: sub a1, a4, a1 -; RV32I-NEXT: sub a1, a1, a3 ; RV32I-NEXT: sub a0, a2, a0 ; RV32I-NEXT: ret ; @@ -579,8 +579,8 @@ define i64 @muli64_p63(i64 %a) nounwind { ; RV32IM-NEXT: li a2, 63 ; RV32IM-NEXT: mulhu a2, a0, a2 ; RV32IM-NEXT: slli a3, a1, 6 -; RV32IM-NEXT: sub a1, a3, a1 -; RV32IM-NEXT: add a1, a2, a1 +; RV32IM-NEXT: sub a1, a2, a1 +; RV32IM-NEXT: add a1, a1, a3 ; RV32IM-NEXT: slli a2, a0, 6 ; RV32IM-NEXT: sub a0, a2, a0 ; RV32IM-NEXT: ret @@ -632,29 +632,29 @@ define i32 @muli32_m65(i32 %a) nounwind { ; RV32I-LABEL: muli32_m65: ; RV32I: # %bb.0: ; RV32I-NEXT: slli a1, a0, 6 -; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: neg a0, a0 +; RV32I-NEXT: sub a0, a0, a1 ; RV32I-NEXT: ret ; ; RV32IM-LABEL: muli32_m65: ; RV32IM: # %bb.0: ; RV32IM-NEXT: slli a1, a0, 6 -; RV32IM-NEXT: add a0, a1, a0 ; RV32IM-NEXT: neg a0, a0 +; RV32IM-NEXT: sub a0, a0, a1 ; RV32IM-NEXT: ret ; ; RV64I-LABEL: muli32_m65: ; RV64I: # %bb.0: ; RV64I-NEXT: slli a1, a0, 6 -; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: negw a0, a0 +; RV64I-NEXT: subw a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64IM-LABEL: muli32_m65: ; RV64IM: # %bb.0: ; RV64IM-NEXT: slli a1, a0, 6 -; RV64IM-NEXT: add a0, a1, a0 ; RV64IM-NEXT: negw a0, a0 +; RV64IM-NEXT: subw a0, a0, a1 ; RV64IM-NEXT: ret %1 = mul i32 %a, -65 ret i32 %1 @@ -668,7 +668,7 @@ define i64 @muli64_m63(i64 %a) nounwind { ; RV32I-NEXT: srli a4, a0, 26 ; RV32I-NEXT: slli a5, a1, 6 ; RV32I-NEXT: or a4, a5, a4 -; RV32I-NEXT: sub a1, a1, a4 +; RV32I-NEXT: add a3, a4, a3 ; RV32I-NEXT: sub a1, a1, a3 ; RV32I-NEXT: sub a0, a0, a2 ; RV32I-NEXT: ret @@ -679,8 +679,8 @@ define i64 @muli64_m63(i64 %a) nounwind { ; RV32IM-NEXT: sub a1, a1, a2 ; RV32IM-NEXT: li a2, -63 ; RV32IM-NEXT: mulhu a2, a0, a2 -; RV32IM-NEXT: sub a2, a2, a0 -; RV32IM-NEXT: add a1, a2, a1 +; RV32IM-NEXT: sub a1, a0, a1 +; RV32IM-NEXT: sub a1, a2, a1 ; RV32IM-NEXT: slli a2, a0, 6 ; RV32IM-NEXT: sub a0, a0, a2 ; RV32IM-NEXT: ret @@ -709,11 +709,11 @@ define i64 @muli64_m65(i64 %a) nounwind { ; RV32I-NEXT: srli a0, a0, 26 ; RV32I-NEXT: slli a4, a1, 6 ; RV32I-NEXT: or a0, a4, a0 -; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: add a0, a0, a2 -; RV32I-NEXT: snez a1, a3 -; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: neg a1, a0 +; RV32I-NEXT: add a1, a1, a2 +; RV32I-NEXT: snez a2, a3 +; RV32I-NEXT: add a1, a1, a2 +; RV32I-NEXT: neg a1, a1 +; RV32I-NEXT: sub a1, a1, a0 ; RV32I-NEXT: neg a0, a3 ; RV32I-NEXT: ret ; @@ -723,25 +723,25 @@ define i64 @muli64_m65(i64 %a) nounwind { ; RV32IM-NEXT: add a1, a2, a1 ; RV32IM-NEXT: li a2, -65 ; RV32IM-NEXT: mulhu a2, a0, a2 -; RV32IM-NEXT: sub a2, a2, a0 +; RV32IM-NEXT: add a1, a0, a1 ; RV32IM-NEXT: sub a1, a2, a1 ; RV32IM-NEXT: slli a2, a0, 6 -; RV32IM-NEXT: add a0, a2, a0 ; RV32IM-NEXT: neg a0, a0 +; RV32IM-NEXT: sub a0, a0, a2 ; RV32IM-NEXT: ret ; ; RV64I-LABEL: muli64_m65: ; RV64I: # %bb.0: ; RV64I-NEXT: slli a1, a0, 6 -; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: neg a0, a0 +; RV64I-NEXT: sub a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64IM-LABEL: muli64_m65: ; RV64IM: # %bb.0: ; RV64IM-NEXT: slli a1, a0, 6 -; RV64IM-NEXT: add a0, a1, a0 ; RV64IM-NEXT: neg a0, a0 +; RV64IM-NEXT: sub a0, a0, a1 ; RV64IM-NEXT: ret %1 = mul i64 %a, -65 ret i64 %1 @@ -949,11 +949,11 @@ define i64 @muli64_p4352(i64 %a) nounwind { ; RV32I-NEXT: srli a3, a0, 20 ; RV32I-NEXT: slli a1, a1, 12 ; RV32I-NEXT: or a1, a1, a3 -; RV32I-NEXT: add a1, a1, a2 -; RV32I-NEXT: slli a2, a0, 8 -; RV32I-NEXT: slli a3, a0, 12 -; RV32I-NEXT: add a0, a3, a2 -; RV32I-NEXT: sltu a2, a0, a3 +; RV32I-NEXT: slli a3, a0, 8 +; RV32I-NEXT: slli a4, a0, 12 +; RV32I-NEXT: add a0, a4, a3 +; RV32I-NEXT: sltu a3, a0, a4 +; RV32I-NEXT: add a2, a2, a3 ; RV32I-NEXT: add a1, a1, a2 ; RV32I-NEXT: ret ; @@ -993,12 +993,12 @@ define i64 @muli64_p3840(i64 %a) nounwind { ; RV32I-NEXT: srli a3, a0, 20 ; RV32I-NEXT: slli a1, a1, 12 ; RV32I-NEXT: or a1, a1, a3 -; RV32I-NEXT: sub a1, a1, a2 -; RV32I-NEXT: slli a2, a0, 8 +; RV32I-NEXT: slli a3, a0, 8 ; RV32I-NEXT: slli a0, a0, 12 -; RV32I-NEXT: sltu a3, a0, a2 -; RV32I-NEXT: sub a1, a1, a3 -; RV32I-NEXT: sub a0, a0, a2 +; RV32I-NEXT: sltu a4, a0, a3 +; RV32I-NEXT: add a2, a2, a4 +; RV32I-NEXT: sub a1, a1, a2 +; RV32I-NEXT: sub a0, a0, a3 ; RV32I-NEXT: ret ; ; RV32IM-LABEL: muli64_p3840: @@ -1047,8 +1047,8 @@ define i64 @muli64_m4352(i64 %a) nounwind { ; RV32IM-NEXT: slli a2, a2, 8 ; RV32IM-NEXT: mul a1, a1, a2 ; RV32IM-NEXT: mulhu a3, a0, a2 -; RV32IM-NEXT: sub a3, a3, a0 -; RV32IM-NEXT: add a1, a3, a1 +; RV32IM-NEXT: sub a1, a0, a1 +; RV32IM-NEXT: sub a1, a3, a1 ; RV32IM-NEXT: mul a0, a0, a2 ; RV32IM-NEXT: ret ; @@ -1077,12 +1077,12 @@ define i64 @muli64_m3840(i64 %a) nounwind { ; RV32I-NEXT: srli a3, a0, 24 ; RV32I-NEXT: slli a1, a1, 8 ; RV32I-NEXT: or a1, a1, a3 -; RV32I-NEXT: sub a1, a1, a2 -; RV32I-NEXT: slli a2, a0, 12 +; RV32I-NEXT: slli a3, a0, 12 ; RV32I-NEXT: slli a0, a0, 8 -; RV32I-NEXT: sltu a3, a0, a2 -; RV32I-NEXT: sub a1, a1, a3 -; RV32I-NEXT: sub a0, a0, a2 +; RV32I-NEXT: sltu a4, a0, a3 +; RV32I-NEXT: add a2, a2, a4 +; RV32I-NEXT: sub a1, a1, a2 +; RV32I-NEXT: sub a0, a0, a3 ; RV32I-NEXT: ret ; ; RV32IM-LABEL: muli64_m3840: @@ -1091,8 +1091,8 @@ define i64 @muli64_m3840(i64 %a) nounwind { ; RV32IM-NEXT: slli a2, a2, 8 ; RV32IM-NEXT: mul a1, a1, a2 ; RV32IM-NEXT: mulhu a3, a0, a2 -; RV32IM-NEXT: sub a3, a3, a0 -; RV32IM-NEXT: add a1, a3, a1 +; RV32IM-NEXT: sub a1, a0, a1 +; RV32IM-NEXT: sub a1, a3, a1 ; RV32IM-NEXT: mul a0, a0, a2 ; RV32IM-NEXT: ret ; @@ -1126,14 +1126,14 @@ define i128 @muli128_m3840(i128 %a) nounwind { ; RV32I-NEXT: srli a2, a4, 24 ; RV32I-NEXT: slli a7, a3, 8 ; RV32I-NEXT: or a2, a7, a2 -; RV32I-NEXT: sltu t0, a2, a1 -; RV32I-NEXT: srli a7, a3, 20 +; RV32I-NEXT: sltu a7, a2, a1 +; RV32I-NEXT: srli t0, a3, 20 ; RV32I-NEXT: slli t1, a5, 12 -; RV32I-NEXT: or a7, t1, a7 +; RV32I-NEXT: or t0, t1, t0 ; RV32I-NEXT: srli a3, a3, 24 ; RV32I-NEXT: slli a5, a5, 8 -; RV32I-NEXT: or a3, a5, a3 -; RV32I-NEXT: sub t1, a3, a7 +; RV32I-NEXT: or t1, a5, a3 +; RV32I-NEXT: add t0, t0, a7 ; RV32I-NEXT: srli a3, a6, 20 ; RV32I-NEXT: slli a5, a4, 12 ; RV32I-NEXT: or a3, a5, a3 @@ -1195,24 +1195,24 @@ define i128 @muli128_m3840(i128 %a) nounwind { ; RV32IM-NEXT: sltu t4, t4, s1 ; RV32IM-NEXT: sltu a7, t1, a7 ; RV32IM-NEXT: mulhu t1, a1, t2 -; RV32IM-NEXT: add a7, t1, a7 ; RV32IM-NEXT: add a7, a7, t4 ; RV32IM-NEXT: sltu t0, t5, t0 ; RV32IM-NEXT: mul a2, a2, a5 -; RV32IM-NEXT: mulhu t1, a3, a5 -; RV32IM-NEXT: sub a3, t1, a3 -; RV32IM-NEXT: add a2, a3, a2 -; RV32IM-NEXT: sub a3, t3, a4 -; RV32IM-NEXT: sub a3, a3, a1 -; RV32IM-NEXT: add a2, a3, a2 -; RV32IM-NEXT: add a2, a2, t0 -; RV32IM-NEXT: add a2, a7, a2 -; RV32IM-NEXT: add a2, a2, s0 -; RV32IM-NEXT: mul a1, a4, a5 -; RV32IM-NEXT: sw a1, 0(a0) +; RV32IM-NEXT: mulhu t2, a3, a5 +; RV32IM-NEXT: sub a3, a3, a2 +; RV32IM-NEXT: sub a2, t2, a3 +; RV32IM-NEXT: add a1, a4, a1 +; RV32IM-NEXT: sub a1, a1, a2 +; RV32IM-NEXT: sub a1, a1, t0 +; RV32IM-NEXT: sub a1, t3, a1 +; RV32IM-NEXT: add a1, a7, a1 +; RV32IM-NEXT: add a1, a1, s0 +; RV32IM-NEXT: add a1, t1, a1 +; RV32IM-NEXT: mul a2, a4, a5 +; RV32IM-NEXT: sw a2, 0(a0) ; RV32IM-NEXT: sw a6, 4(a0) ; RV32IM-NEXT: sw t6, 8(a0) -; RV32IM-NEXT: sw a2, 12(a0) +; RV32IM-NEXT: sw a1, 12(a0) ; RV32IM-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32IM-NEXT: lw s1, 8(sp) # 4-byte Folded Reload ; RV32IM-NEXT: addi sp, sp, 16 @@ -1226,12 +1226,12 @@ define i128 @muli128_m3840(i128 %a) nounwind { ; RV64I-NEXT: srli a3, a0, 56 ; RV64I-NEXT: slli a1, a1, 8 ; RV64I-NEXT: or a1, a1, a3 -; RV64I-NEXT: sub a1, a1, a2 -; RV64I-NEXT: slli a2, a0, 12 +; RV64I-NEXT: slli a3, a0, 12 ; RV64I-NEXT: slli a0, a0, 8 -; RV64I-NEXT: sltu a3, a0, a2 -; RV64I-NEXT: sub a1, a1, a3 -; RV64I-NEXT: sub a0, a0, a2 +; RV64I-NEXT: sltu a4, a0, a3 +; RV64I-NEXT: add a2, a2, a4 +; RV64I-NEXT: sub a1, a1, a2 +; RV64I-NEXT: sub a0, a0, a3 ; RV64I-NEXT: ret ; ; RV64IM-LABEL: muli128_m3840: @@ -1240,8 +1240,8 @@ define i128 @muli128_m3840(i128 %a) nounwind { ; RV64IM-NEXT: slli a2, a2, 8 ; RV64IM-NEXT: mul a1, a1, a2 ; RV64IM-NEXT: mulhu a3, a0, a2 -; RV64IM-NEXT: sub a3, a3, a0 -; RV64IM-NEXT: add a1, a3, a1 +; RV64IM-NEXT: sub a1, a0, a1 +; RV64IM-NEXT: sub a1, a3, a1 ; RV64IM-NEXT: mul a0, a0, a2 ; RV64IM-NEXT: ret %1 = mul i128 %a, -3840 @@ -1275,8 +1275,8 @@ define i128 @muli128_m63(i128 %a) nounwind { ; RV32I-NEXT: slli t4, a5, 6 ; RV32I-NEXT: or a7, t4, a7 ; RV32I-NEXT: sub a5, a5, a7 +; RV32I-NEXT: add t1, t1, t3 ; RV32I-NEXT: sub a5, a5, t1 -; RV32I-NEXT: sub a5, a5, t3 ; RV32I-NEXT: sub a7, t2, t0 ; RV32I-NEXT: sub a3, a3, a6 ; RV32I-NEXT: sub a3, a3, a4 @@ -1314,29 +1314,29 @@ define i128 @muli128_m63(i128 %a) nounwind { ; RV32IM-NEXT: add t1, a7, t1 ; RV32IM-NEXT: sub t4, t1, a4 ; RV32IM-NEXT: slli t5, a1, 6 -; RV32IM-NEXT: sub t5, t5, a1 -; RV32IM-NEXT: add t5, t5, a3 +; RV32IM-NEXT: sub t6, a1, a3 +; RV32IM-NEXT: sub t5, t5, t6 ; RV32IM-NEXT: sub t6, t4, t5 ; RV32IM-NEXT: sltu s0, t6, t4 ; RV32IM-NEXT: neg s1, a4 ; RV32IM-NEXT: sltu t4, t4, s1 ; RV32IM-NEXT: sltu a7, t1, a7 ; RV32IM-NEXT: mulhu t1, a4, t2 -; RV32IM-NEXT: add a7, t1, a7 ; RV32IM-NEXT: add a7, a7, t4 -; RV32IM-NEXT: slli t1, a2, 6 -; RV32IM-NEXT: sub a2, a2, t1 +; RV32IM-NEXT: slli t2, a2, 6 +; RV32IM-NEXT: sub a2, a2, t2 ; RV32IM-NEXT: mulhu a5, a1, a5 +; RV32IM-NEXT: sub a1, a1, a2 ; RV32IM-NEXT: sub a5, a5, a1 -; RV32IM-NEXT: add a2, a5, a2 -; RV32IM-NEXT: sub a1, t3, a3 -; RV32IM-NEXT: sub a1, a1, a4 -; RV32IM-NEXT: add a1, a1, a2 -; RV32IM-NEXT: neg a2, t5 -; RV32IM-NEXT: sltu a2, a2, t0 -; RV32IM-NEXT: add a1, a1, a2 +; RV32IM-NEXT: add a4, a3, a4 +; RV32IM-NEXT: sub a4, a4, a5 +; RV32IM-NEXT: neg a1, t5 +; RV32IM-NEXT: sltu a1, a1, t0 +; RV32IM-NEXT: sub a4, a4, a1 +; RV32IM-NEXT: sub a1, t3, a4 ; RV32IM-NEXT: add a1, a7, a1 ; RV32IM-NEXT: add a1, a1, s0 +; RV32IM-NEXT: add a1, t1, a1 ; RV32IM-NEXT: slli a2, a3, 6 ; RV32IM-NEXT: sub a3, a3, a2 ; RV32IM-NEXT: sw a3, 0(a0) @@ -1355,7 +1355,7 @@ define i128 @muli128_m63(i128 %a) nounwind { ; RV64I-NEXT: srli a4, a0, 58 ; RV64I-NEXT: slli a5, a1, 6 ; RV64I-NEXT: or a4, a5, a4 -; RV64I-NEXT: sub a1, a1, a4 +; RV64I-NEXT: add a3, a4, a3 ; RV64I-NEXT: sub a1, a1, a3 ; RV64I-NEXT: sub a0, a0, a2 ; RV64I-NEXT: ret @@ -1366,8 +1366,8 @@ define i128 @muli128_m63(i128 %a) nounwind { ; RV64IM-NEXT: sub a1, a1, a2 ; RV64IM-NEXT: li a2, -63 ; RV64IM-NEXT: mulhu a2, a0, a2 -; RV64IM-NEXT: sub a2, a2, a0 -; RV64IM-NEXT: add a1, a2, a1 +; RV64IM-NEXT: sub a1, a0, a1 +; RV64IM-NEXT: sub a1, a2, a1 ; RV64IM-NEXT: slli a2, a0, 6 ; RV64IM-NEXT: sub a0, a0, a2 ; RV64IM-NEXT: ret @@ -1441,13 +1441,13 @@ define i64 @mulhsu_i64(i64 %a, i64 %b) nounwind { ; RV32I-NEXT: sltu a3, a2, s9 ; RV32I-NEXT: sltu a4, s9, s5 ; RV32I-NEXT: sltu a5, s8, s7 -; RV32I-NEXT: add a5, s6, a5 ; RV32I-NEXT: add a4, a5, a4 -; RV32I-NEXT: add a1, a1, s3 ; RV32I-NEXT: sltu a0, s2, a0 +; RV32I-NEXT: add a0, s3, a0 ; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: add a0, a4, a0 -; RV32I-NEXT: add a1, a0, a3 +; RV32I-NEXT: add a0, a0, a3 +; RV32I-NEXT: add a1, s6, a0 ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload @@ -1486,14 +1486,14 @@ define i64 @mulhsu_i64(i64 %a, i64 %b) nounwind { ; RV32IM-NEXT: sltu a7, t0, a7 ; RV32IM-NEXT: sltu a5, a5, a6 ; RV32IM-NEXT: mulhu a3, a1, a3 -; RV32IM-NEXT: add a3, a3, a5 -; RV32IM-NEXT: add a3, a3, a7 +; RV32IM-NEXT: add a5, a5, a7 ; RV32IM-NEXT: mul a1, a4, a1 ; RV32IM-NEXT: mulhu a0, a4, a0 +; RV32IM-NEXT: add a1, a1, t1 ; RV32IM-NEXT: add a0, a0, a1 -; RV32IM-NEXT: add a0, a0, t1 -; RV32IM-NEXT: add a0, a3, a0 -; RV32IM-NEXT: add a1, a0, t2 +; RV32IM-NEXT: add a0, a5, a0 +; RV32IM-NEXT: add a0, a0, t2 +; RV32IM-NEXT: add a1, a3, a0 ; RV32IM-NEXT: mv a0, a2 ; RV32IM-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/neg-abs.ll b/llvm/test/CodeGen/RISCV/neg-abs.ll index 7466fe5..0333786 100644 --- a/llvm/test/CodeGen/RISCV/neg-abs.ll +++ b/llvm/test/CodeGen/RISCV/neg-abs.ll @@ -83,8 +83,8 @@ define i64 @neg_abs64(i64 %x) { ; RV32I-NEXT: xor a0, a0, a2 ; RV32I-NEXT: sltu a3, a2, a0 ; RV32I-NEXT: xor a1, a1, a2 +; RV32I-NEXT: add a1, a1, a3 ; RV32I-NEXT: sub a1, a2, a1 -; RV32I-NEXT: sub a1, a1, a3 ; RV32I-NEXT: sub a0, a2, a0 ; RV32I-NEXT: ret ; @@ -94,8 +94,8 @@ define i64 @neg_abs64(i64 %x) { ; RV32ZBB-NEXT: xor a0, a0, a2 ; RV32ZBB-NEXT: sltu a3, a2, a0 ; RV32ZBB-NEXT: xor a1, a1, a2 +; RV32ZBB-NEXT: add a1, a1, a3 ; RV32ZBB-NEXT: sub a1, a2, a1 -; RV32ZBB-NEXT: sub a1, a1, a3 ; RV32ZBB-NEXT: sub a0, a2, a0 ; RV32ZBB-NEXT: ret ; @@ -123,8 +123,8 @@ define i64 @select_neg_abs64(i64 %x) { ; RV32I-NEXT: xor a0, a0, a2 ; RV32I-NEXT: sltu a3, a2, a0 ; RV32I-NEXT: xor a1, a1, a2 +; RV32I-NEXT: add a1, a1, a3 ; RV32I-NEXT: sub a1, a2, a1 -; RV32I-NEXT: sub a1, a1, a3 ; RV32I-NEXT: sub a0, a2, a0 ; RV32I-NEXT: ret ; @@ -134,8 +134,8 @@ define i64 @select_neg_abs64(i64 %x) { ; RV32ZBB-NEXT: xor a0, a0, a2 ; RV32ZBB-NEXT: sltu a3, a2, a0 ; RV32ZBB-NEXT: xor a1, a1, a2 +; RV32ZBB-NEXT: add a1, a1, a3 ; RV32ZBB-NEXT: sub a1, a2, a1 -; RV32ZBB-NEXT: sub a1, a1, a3 ; RV32ZBB-NEXT: sub a0, a2, a0 ; RV32ZBB-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rv32zbb.ll b/llvm/test/CodeGen/RISCV/rv32zbb.ll index 5dc6bf8..7eb00a7 100644 --- a/llvm/test/CodeGen/RISCV/rv32zbb.ll +++ b/llvm/test/CodeGen/RISCV/rv32zbb.ll @@ -774,7 +774,7 @@ define i32 @bswap_i32(i32 %a) nounwind { ; RV32I-NEXT: and a2, a0, a2 ; RV32I-NEXT: slli a2, a2, 8 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, a2 +; RV32I-NEXT: or a1, a2, a1 ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: ret ; @@ -800,7 +800,7 @@ define i64 @bswap_i64(i64 %a) { ; RV32I-NEXT: and a4, a1, a3 ; RV32I-NEXT: slli a4, a4, 8 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, a4 +; RV32I-NEXT: or a2, a4, a2 ; RV32I-NEXT: or a2, a1, a2 ; RV32I-NEXT: srli a1, a0, 8 ; RV32I-NEXT: and a1, a1, a3 @@ -809,7 +809,7 @@ define i64 @bswap_i64(i64 %a) { ; RV32I-NEXT: and a3, a0, a3 ; RV32I-NEXT: slli a3, a3, 8 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, a3 +; RV32I-NEXT: or a1, a3, a1 ; RV32I-NEXT: or a1, a0, a1 ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64zbb.ll index 22ea150..51417f8 100644 --- a/llvm/test/CodeGen/RISCV/rv64zbb.ll +++ b/llvm/test/CodeGen/RISCV/rv64zbb.ll @@ -957,7 +957,7 @@ define signext i32 @bswap_i32(i32 signext %a) nounwind { ; RV64I-NEXT: and a2, a0, a2 ; RV64I-NEXT: slli a2, a2, 8 ; RV64I-NEXT: slliw a0, a0, 24 -; RV64I-NEXT: or a0, a0, a2 +; RV64I-NEXT: or a1, a2, a1 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret ; @@ -983,7 +983,7 @@ define void @bswap_i32_nosext(i32 signext %a, ptr %x) nounwind { ; RV64I-NEXT: and a3, a0, a3 ; RV64I-NEXT: slli a3, a3, 8 ; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: or a2, a3, a2 ; RV64I-NEXT: or a0, a0, a2 ; RV64I-NEXT: sw a0, 0(a1) ; RV64I-NEXT: ret @@ -1016,8 +1016,8 @@ define i64 @bswap_i64(i64 %a) { ; RV64I-NEXT: srli a5, a0, 8 ; RV64I-NEXT: srliw a5, a5, 24 ; RV64I-NEXT: slli a5, a5, 24 -; RV64I-NEXT: or a3, a5, a3 ; RV64I-NEXT: or a1, a3, a1 +; RV64I-NEXT: or a1, a5, a1 ; RV64I-NEXT: and a4, a0, a4 ; RV64I-NEXT: slli a4, a4, 24 ; RV64I-NEXT: srliw a3, a0, 24 @@ -1026,8 +1026,8 @@ define i64 @bswap_i64(i64 %a) { ; RV64I-NEXT: and a2, a0, a2 ; RV64I-NEXT: slli a2, a2, 40 ; RV64I-NEXT: slli a0, a0, 56 -; RV64I-NEXT: or a0, a0, a2 -; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: or a2, a2, a3 +; RV64I-NEXT: or a1, a2, a1 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-elen.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-elen.ll index 8cad9c2..d3c3765 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-elen.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-elen.ll @@ -30,20 +30,20 @@ define void @add_v2i64(ptr %x, ptr %y) { ; RV32-NEXT: lw a3, 12(a0) ; RV32-NEXT: lw a4, 0(a0) ; RV32-NEXT: lw a5, 4(a0) -; RV32-NEXT: lw a6, 4(a1) -; RV32-NEXT: lw a7, 0(a1) -; RV32-NEXT: lw t0, 8(a1) +; RV32-NEXT: lw a6, 0(a1) +; RV32-NEXT: lw a7, 8(a1) +; RV32-NEXT: lw t0, 4(a1) ; RV32-NEXT: lw a1, 12(a1) -; RV32-NEXT: add a5, a5, a6 -; RV32-NEXT: add a7, a4, a7 -; RV32-NEXT: sltu a4, a7, a4 +; RV32-NEXT: add a6, a4, a6 +; RV32-NEXT: sltu a4, a6, a4 +; RV32-NEXT: add a4, t0, a4 ; RV32-NEXT: add a4, a5, a4 -; RV32-NEXT: add a1, a3, a1 -; RV32-NEXT: add t0, a2, t0 -; RV32-NEXT: sltu a2, t0, a2 +; RV32-NEXT: add a7, a2, a7 +; RV32-NEXT: sltu a2, a7, a2 ; RV32-NEXT: add a1, a1, a2 -; RV32-NEXT: sw t0, 8(a0) -; RV32-NEXT: sw a7, 0(a0) +; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: sw a7, 8(a0) +; RV32-NEXT: sw a6, 0(a0) ; RV32-NEXT: sw a1, 12(a0) ; RV32-NEXT: sw a4, 4(a0) ; RV32-NEXT: ret @@ -88,15 +88,15 @@ define void @add_v1i64(ptr %x, ptr %y) { ; RV32-LABEL: add_v1i64: ; RV32: # %bb.0: ; RV32-NEXT: lw a2, 0(a0) -; RV32-NEXT: lw a3, 4(a0) -; RV32-NEXT: lw a4, 4(a1) -; RV32-NEXT: lw a1, 0(a1) -; RV32-NEXT: add a3, a3, a4 -; RV32-NEXT: add a1, a2, a1 -; RV32-NEXT: sltu a2, a1, a2 -; RV32-NEXT: add a2, a3, a2 -; RV32-NEXT: sw a1, 0(a0) -; RV32-NEXT: sw a2, 4(a0) +; RV32-NEXT: lw a3, 0(a1) +; RV32-NEXT: lw a1, 4(a1) +; RV32-NEXT: lw a4, 4(a0) +; RV32-NEXT: add a3, a2, a3 +; RV32-NEXT: sltu a2, a3, a2 +; RV32-NEXT: add a1, a1, a2 +; RV32-NEXT: add a1, a4, a1 +; RV32-NEXT: sw a3, 0(a0) +; RV32-NEXT: sw a1, 4(a0) ; RV32-NEXT: ret ; ; RV64-LABEL: add_v1i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll index b1aaffc..5823232 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll @@ -425,8 +425,8 @@ define void @masked_load_v2i32_align1(ptr %a, <2 x i32> %m, ptr %res_ptr) nounwi ; RV32-NEXT: or a3, a3, a4 ; RV32-NEXT: slli a5, a5, 16 ; RV32-NEXT: slli a6, a6, 24 -; RV32-NEXT: or a4, a6, a5 -; RV32-NEXT: or a3, a4, a3 +; RV32-NEXT: or a3, a5, a3 +; RV32-NEXT: or a3, a6, a3 ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: vmv.v.x v8, a3 ; RV32-NEXT: andi a2, a2, 2 @@ -446,7 +446,7 @@ define void @masked_load_v2i32_align1(ptr %a, <2 x i32> %m, ptr %res_ptr) nounwi ; RV32-NEXT: or a2, a2, a3 ; RV32-NEXT: slli a4, a4, 16 ; RV32-NEXT: slli a0, a0, 24 -; RV32-NEXT: or a0, a0, a4 +; RV32-NEXT: or a2, a4, a2 ; RV32-NEXT: or a0, a0, a2 ; RV32-NEXT: vmv.s.x v9, a0 ; RV32-NEXT: vsetvli zero, zero, e32, mf2, tu, ma @@ -473,8 +473,8 @@ define void @masked_load_v2i32_align1(ptr %a, <2 x i32> %m, ptr %res_ptr) nounwi ; RV64-NEXT: or a3, a3, a4 ; RV64-NEXT: slli a5, a5, 16 ; RV64-NEXT: slli a6, a6, 24 -; RV64-NEXT: or a4, a6, a5 -; RV64-NEXT: or a3, a4, a3 +; RV64-NEXT: or a3, a5, a3 +; RV64-NEXT: or a3, a6, a3 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV64-NEXT: vmv.v.x v8, a3 ; RV64-NEXT: andi a2, a2, 2 @@ -494,7 +494,7 @@ define void @masked_load_v2i32_align1(ptr %a, <2 x i32> %m, ptr %res_ptr) nounwi ; RV64-NEXT: or a2, a2, a3 ; RV64-NEXT: slli a4, a4, 16 ; RV64-NEXT: slli a0, a0, 24 -; RV64-NEXT: or a0, a0, a4 +; RV64-NEXT: or a2, a4, a2 ; RV64-NEXT: or a0, a0, a2 ; RV64-NEXT: vmv.s.x v9, a0 ; RV64-NEXT: vsetvli zero, zero, e32, mf2, tu, ma diff --git a/llvm/test/CodeGen/RISCV/sadd_sat.ll b/llvm/test/CodeGen/RISCV/sadd_sat.ll index 3d7668b..042084d 100644 --- a/llvm/test/CodeGen/RISCV/sadd_sat.ll +++ b/llvm/test/CodeGen/RISCV/sadd_sat.ll @@ -59,10 +59,10 @@ define i64 @func2(i64 %x, i64 %y) nounwind { ; RV32I: # %bb.0: ; RV32I-NEXT: mv a4, a1 ; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: add a5, a4, a3 ; RV32I-NEXT: add a0, a0, a2 ; RV32I-NEXT: sltu a1, a0, a1 -; RV32I-NEXT: add a1, a5, a1 +; RV32I-NEXT: add a1, a3, a1 +; RV32I-NEXT: add a1, a4, a1 ; RV32I-NEXT: xor a2, a4, a1 ; RV32I-NEXT: xor a3, a4, a3 ; RV32I-NEXT: not a3, a3 @@ -94,10 +94,10 @@ define i64 @func2(i64 %x, i64 %y) nounwind { ; RV32IZbb: # %bb.0: ; RV32IZbb-NEXT: mv a4, a1 ; RV32IZbb-NEXT: mv a1, a0 -; RV32IZbb-NEXT: add a5, a4, a3 ; RV32IZbb-NEXT: add a0, a0, a2 ; RV32IZbb-NEXT: sltu a1, a0, a1 -; RV32IZbb-NEXT: add a1, a5, a1 +; RV32IZbb-NEXT: add a1, a3, a1 +; RV32IZbb-NEXT: add a1, a4, a1 ; RV32IZbb-NEXT: xor a2, a4, a1 ; RV32IZbb-NEXT: xor a3, a4, a3 ; RV32IZbb-NEXT: andn a2, a2, a3 diff --git a/llvm/test/CodeGen/RISCV/sadd_sat_plus.ll b/llvm/test/CodeGen/RISCV/sadd_sat_plus.ll index aa94960..3e2ccba 100644 --- a/llvm/test/CodeGen/RISCV/sadd_sat_plus.ll +++ b/llvm/test/CodeGen/RISCV/sadd_sat_plus.ll @@ -65,10 +65,10 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind { ; RV32I: # %bb.0: ; RV32I-NEXT: mv a2, a1 ; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: add a3, a2, a5 ; RV32I-NEXT: add a0, a0, a4 ; RV32I-NEXT: sltu a1, a0, a1 -; RV32I-NEXT: add a1, a3, a1 +; RV32I-NEXT: add a1, a5, a1 +; RV32I-NEXT: add a1, a2, a1 ; RV32I-NEXT: xor a3, a2, a1 ; RV32I-NEXT: xor a2, a2, a5 ; RV32I-NEXT: not a2, a2 @@ -100,10 +100,10 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind { ; RV32IZbb: # %bb.0: ; RV32IZbb-NEXT: mv a2, a1 ; RV32IZbb-NEXT: mv a1, a0 -; RV32IZbb-NEXT: add a3, a2, a5 ; RV32IZbb-NEXT: add a0, a0, a4 ; RV32IZbb-NEXT: sltu a1, a0, a1 -; RV32IZbb-NEXT: add a1, a3, a1 +; RV32IZbb-NEXT: add a1, a5, a1 +; RV32IZbb-NEXT: add a1, a2, a1 ; RV32IZbb-NEXT: xor a3, a2, a1 ; RV32IZbb-NEXT: xor a2, a2, a5 ; RV32IZbb-NEXT: andn a2, a3, a2 diff --git a/llvm/test/CodeGen/RISCV/select-binop-identity.ll b/llvm/test/CodeGen/RISCV/select-binop-identity.ll index 3d3cdd7..00ead4d 100644 --- a/llvm/test/CodeGen/RISCV/select-binop-identity.ll +++ b/llvm/test/CodeGen/RISCV/select-binop-identity.ll @@ -272,7 +272,7 @@ define i64 @add_select_all_zeros_i64(i1 zeroext %c, i64 %x, i64 %y) { ; RV32I-NEXT: and a1, a0, a1 ; RV32I-NEXT: add a0, a1, a3 ; RV32I-NEXT: sltu a1, a0, a1 -; RV32I-NEXT: add a2, a2, a4 +; RV32I-NEXT: add a1, a4, a1 ; RV32I-NEXT: add a1, a2, a1 ; RV32I-NEXT: ret ; @@ -343,7 +343,7 @@ define i64 @sub_select_all_zeros_i64(i1 zeroext %c, i64 %x, i64 %y) { ; RV32I-NEXT: and a2, a0, a2 ; RV32I-NEXT: and a0, a0, a1 ; RV32I-NEXT: sltu a1, a3, a0 -; RV32I-NEXT: sub a4, a4, a2 +; RV32I-NEXT: add a1, a2, a1 ; RV32I-NEXT: sub a1, a4, a1 ; RV32I-NEXT: sub a0, a3, a0 ; RV32I-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/shadowcallstack.ll b/llvm/test/CodeGen/RISCV/shadowcallstack.ll index 51df390..9a4766d 100644 --- a/llvm/test/CodeGen/RISCV/shadowcallstack.ll +++ b/llvm/test/CodeGen/RISCV/shadowcallstack.ll @@ -88,8 +88,8 @@ define i32 @f4() shadowcallstack { ; RV32-NEXT: call bar@plt ; RV32-NEXT: mv s3, a0 ; RV32-NEXT: call bar@plt -; RV32-NEXT: add s0, s0, s1 ; RV32-NEXT: add a0, s3, a0 +; RV32-NEXT: add a0, s1, a0 ; RV32-NEXT: add a0, s0, a0 ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload @@ -121,8 +121,8 @@ define i32 @f4() shadowcallstack { ; RV64-NEXT: call bar@plt ; RV64-NEXT: mv s3, a0 ; RV64-NEXT: call bar@plt -; RV64-NEXT: add s0, s0, s1 ; RV64-NEXT: add a0, s3, a0 +; RV64-NEXT: add a0, s1, a0 ; RV64-NEXT: addw a0, s0, a0 ; RV64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64-NEXT: ld s0, 16(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/split-udiv-by-constant.ll b/llvm/test/CodeGen/RISCV/split-udiv-by-constant.ll index 4f4f808a..c96c57d 100644 --- a/llvm/test/CodeGen/RISCV/split-udiv-by-constant.ll +++ b/llvm/test/CodeGen/RISCV/split-udiv-by-constant.ll @@ -21,11 +21,11 @@ define iXLen2 @test_udiv_3(iXLen2 %x) nounwind { ; RV32-NEXT: addi a3, a3, -1366 ; RV32-NEXT: mul a3, a5, a3 ; RV32-NEXT: mulhu a6, a5, a4 -; RV32-NEXT: add a3, a6, a3 ; RV32-NEXT: sltu a0, a0, a2 ; RV32-NEXT: sub a1, a1, a0 -; RV32-NEXT: mul a1, a1, a4 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: mul a0, a1, a4 +; RV32-NEXT: add a0, a3, a0 +; RV32-NEXT: add a1, a6, a0 ; RV32-NEXT: mul a0, a5, a4 ; RV32-NEXT: ret ; @@ -46,11 +46,11 @@ define iXLen2 @test_udiv_3(iXLen2 %x) nounwind { ; RV64-NEXT: sub a4, a0, a3 ; RV64-NEXT: mul a5, a4, a6 ; RV64-NEXT: mulhu a6, a4, a2 -; RV64-NEXT: add a5, a6, a5 ; RV64-NEXT: sltu a0, a0, a3 ; RV64-NEXT: sub a1, a1, a0 -; RV64-NEXT: mul a1, a1, a2 -; RV64-NEXT: add a1, a5, a1 +; RV64-NEXT: mul a0, a1, a2 +; RV64-NEXT: add a0, a5, a0 +; RV64-NEXT: add a1, a6, a0 ; RV64-NEXT: mul a0, a4, a2 ; RV64-NEXT: ret %a = udiv iXLen2 %x, 3 @@ -74,11 +74,11 @@ define iXLen2 @test_udiv_5(iXLen2 %x) nounwind { ; RV32-NEXT: addi a3, a3, -820 ; RV32-NEXT: mul a3, a5, a3 ; RV32-NEXT: mulhu a6, a5, a4 -; RV32-NEXT: add a3, a6, a3 ; RV32-NEXT: sltu a0, a0, a2 ; RV32-NEXT: sub a1, a1, a0 -; RV32-NEXT: mul a1, a1, a4 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: mul a0, a1, a4 +; RV32-NEXT: add a0, a3, a0 +; RV32-NEXT: add a1, a6, a0 ; RV32-NEXT: mul a0, a5, a4 ; RV32-NEXT: ret ; @@ -99,11 +99,11 @@ define iXLen2 @test_udiv_5(iXLen2 %x) nounwind { ; RV64-NEXT: sub a4, a0, a3 ; RV64-NEXT: mul a5, a4, a6 ; RV64-NEXT: mulhu a6, a4, a2 -; RV64-NEXT: add a5, a6, a5 ; RV64-NEXT: sltu a0, a0, a3 ; RV64-NEXT: sub a1, a1, a0 -; RV64-NEXT: mul a1, a1, a2 -; RV64-NEXT: add a1, a5, a1 +; RV64-NEXT: mul a0, a1, a2 +; RV64-NEXT: add a0, a5, a0 +; RV64-NEXT: add a1, a6, a0 ; RV64-NEXT: mul a0, a4, a2 ; RV64-NEXT: ret %a = udiv iXLen2 %x, 5 @@ -181,11 +181,11 @@ define iXLen2 @test_udiv_15(iXLen2 %x) nounwind { ; RV32-NEXT: mul a5, a3, a5 ; RV32-NEXT: addi a4, a4, -273 ; RV32-NEXT: mulhu a6, a3, a4 -; RV32-NEXT: add a5, a6, a5 ; RV32-NEXT: sltu a0, a0, a2 ; RV32-NEXT: sub a1, a1, a0 -; RV32-NEXT: mul a1, a1, a4 -; RV32-NEXT: add a1, a5, a1 +; RV32-NEXT: mul a0, a1, a4 +; RV32-NEXT: add a0, a5, a0 +; RV32-NEXT: add a1, a6, a0 ; RV32-NEXT: mul a0, a3, a4 ; RV32-NEXT: ret ; @@ -208,11 +208,11 @@ define iXLen2 @test_udiv_15(iXLen2 %x) nounwind { ; RV64-NEXT: sub a3, a0, a2 ; RV64-NEXT: mul a4, a3, a4 ; RV64-NEXT: mulhu a6, a3, a5 -; RV64-NEXT: add a4, a6, a4 ; RV64-NEXT: sltu a0, a0, a2 ; RV64-NEXT: sub a1, a1, a0 -; RV64-NEXT: mul a1, a1, a5 -; RV64-NEXT: add a1, a4, a1 +; RV64-NEXT: mul a0, a1, a5 +; RV64-NEXT: add a0, a4, a0 +; RV64-NEXT: add a1, a6, a0 ; RV64-NEXT: mul a0, a3, a5 ; RV64-NEXT: ret %a = udiv iXLen2 %x, 15 @@ -236,11 +236,11 @@ define iXLen2 @test_udiv_17(iXLen2 %x) nounwind { ; RV32-NEXT: addi a3, a3, 240 ; RV32-NEXT: mul a3, a5, a3 ; RV32-NEXT: mulhu a6, a5, a4 -; RV32-NEXT: add a3, a6, a3 ; RV32-NEXT: sltu a0, a0, a2 ; RV32-NEXT: sub a1, a1, a0 -; RV32-NEXT: mul a1, a1, a4 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: mul a0, a1, a4 +; RV32-NEXT: add a0, a3, a0 +; RV32-NEXT: add a1, a6, a0 ; RV32-NEXT: mul a0, a5, a4 ; RV32-NEXT: ret ; @@ -261,11 +261,11 @@ define iXLen2 @test_udiv_17(iXLen2 %x) nounwind { ; RV64-NEXT: sub a4, a0, a3 ; RV64-NEXT: mul a5, a4, a6 ; RV64-NEXT: mulhu a6, a4, a2 -; RV64-NEXT: add a5, a6, a5 ; RV64-NEXT: sltu a0, a0, a3 ; RV64-NEXT: sub a1, a1, a0 -; RV64-NEXT: mul a1, a1, a2 -; RV64-NEXT: add a1, a5, a1 +; RV64-NEXT: mul a0, a1, a2 +; RV64-NEXT: add a0, a5, a0 +; RV64-NEXT: add a1, a6, a0 ; RV64-NEXT: mul a0, a4, a2 ; RV64-NEXT: ret %a = udiv iXLen2 %x, 17 @@ -291,11 +291,11 @@ define iXLen2 @test_udiv_255(iXLen2 %x) nounwind { ; RV32-NEXT: mul a5, a3, a5 ; RV32-NEXT: addi a4, a4, -257 ; RV32-NEXT: mulhu a6, a3, a4 -; RV32-NEXT: add a5, a6, a5 ; RV32-NEXT: sltu a0, a0, a2 ; RV32-NEXT: sub a1, a1, a0 -; RV32-NEXT: mul a1, a1, a4 -; RV32-NEXT: add a1, a5, a1 +; RV32-NEXT: mul a0, a1, a4 +; RV32-NEXT: add a0, a5, a0 +; RV32-NEXT: add a1, a6, a0 ; RV32-NEXT: mul a0, a3, a4 ; RV32-NEXT: ret ; @@ -318,11 +318,11 @@ define iXLen2 @test_udiv_255(iXLen2 %x) nounwind { ; RV64-NEXT: sub a3, a0, a2 ; RV64-NEXT: mul a4, a3, a4 ; RV64-NEXT: mulhu a6, a3, a5 -; RV64-NEXT: add a4, a6, a4 ; RV64-NEXT: sltu a0, a0, a2 ; RV64-NEXT: sub a1, a1, a0 -; RV64-NEXT: mul a1, a1, a5 -; RV64-NEXT: add a1, a4, a1 +; RV64-NEXT: mul a0, a1, a5 +; RV64-NEXT: add a0, a4, a0 +; RV64-NEXT: add a1, a6, a0 ; RV64-NEXT: mul a0, a3, a5 ; RV64-NEXT: ret %a = udiv iXLen2 %x, 255 @@ -346,11 +346,11 @@ define iXLen2 @test_udiv_257(iXLen2 %x) nounwind { ; RV32-NEXT: addi a3, a3, -256 ; RV32-NEXT: mul a3, a5, a3 ; RV32-NEXT: mulhu a6, a5, a4 -; RV32-NEXT: add a3, a6, a3 ; RV32-NEXT: sltu a0, a0, a2 ; RV32-NEXT: sub a1, a1, a0 -; RV32-NEXT: mul a1, a1, a4 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: mul a0, a1, a4 +; RV32-NEXT: add a0, a3, a0 +; RV32-NEXT: add a1, a6, a0 ; RV32-NEXT: mul a0, a5, a4 ; RV32-NEXT: ret ; @@ -371,11 +371,11 @@ define iXLen2 @test_udiv_257(iXLen2 %x) nounwind { ; RV64-NEXT: sub a4, a0, a3 ; RV64-NEXT: mul a5, a4, a6 ; RV64-NEXT: mulhu a6, a4, a2 -; RV64-NEXT: add a5, a6, a5 ; RV64-NEXT: sltu a0, a0, a3 ; RV64-NEXT: sub a1, a1, a0 -; RV64-NEXT: mul a1, a1, a2 -; RV64-NEXT: add a1, a5, a1 +; RV64-NEXT: mul a0, a1, a2 +; RV64-NEXT: add a0, a5, a0 +; RV64-NEXT: add a1, a6, a0 ; RV64-NEXT: mul a0, a4, a2 ; RV64-NEXT: ret %a = udiv iXLen2 %x, 257 @@ -401,15 +401,15 @@ define iXLen2 @test_udiv_65535(iXLen2 %x) nounwind { ; RV32-NEXT: mul a5, a3, a5 ; RV32-NEXT: addi a4, a4, -1 ; RV32-NEXT: mulhu a4, a3, a4 -; RV32-NEXT: add a4, a4, a5 ; RV32-NEXT: sltu a0, a0, a2 ; RV32-NEXT: sub a1, a1, a0 ; RV32-NEXT: slli a0, a1, 16 ; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: sub a1, a4, a0 +; RV32-NEXT: sub a1, a5, a0 +; RV32-NEXT: add a1, a4, a1 ; RV32-NEXT: slli a0, a3, 16 -; RV32-NEXT: add a0, a0, a3 -; RV32-NEXT: neg a0, a0 +; RV32-NEXT: neg a2, a3 +; RV32-NEXT: sub a0, a2, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: test_udiv_65535: @@ -433,11 +433,11 @@ define iXLen2 @test_udiv_65535(iXLen2 %x) nounwind { ; RV64-NEXT: mul a5, a3, a5 ; RV64-NEXT: addi a4, a4, -1 ; RV64-NEXT: mulhu a6, a3, a4 -; RV64-NEXT: add a5, a6, a5 ; RV64-NEXT: sltu a0, a0, a2 ; RV64-NEXT: sub a1, a1, a0 -; RV64-NEXT: mul a1, a1, a4 -; RV64-NEXT: add a1, a5, a1 +; RV64-NEXT: mul a0, a1, a4 +; RV64-NEXT: add a0, a5, a0 +; RV64-NEXT: add a1, a6, a0 ; RV64-NEXT: mul a0, a3, a4 ; RV64-NEXT: ret %a = udiv iXLen2 %x, 65535 @@ -460,12 +460,12 @@ define iXLen2 @test_udiv_65537(iXLen2 %x) nounwind { ; RV32-NEXT: sub a3, a0, a2 ; RV32-NEXT: mulhu a4, a3, a4 ; RV32-NEXT: slli a5, a3, 16 -; RV32-NEXT: sub a4, a4, a5 ; RV32-NEXT: sltu a0, a0, a2 ; RV32-NEXT: sub a1, a1, a0 ; RV32-NEXT: slli a0, a1, 16 ; RV32-NEXT: sub a1, a1, a0 -; RV32-NEXT: add a1, a4, a1 +; RV32-NEXT: sub a0, a5, a1 +; RV32-NEXT: sub a1, a4, a0 ; RV32-NEXT: sub a0, a3, a5 ; RV32-NEXT: ret ; @@ -488,11 +488,11 @@ define iXLen2 @test_udiv_65537(iXLen2 %x) nounwind { ; RV64-NEXT: sub a5, a0, a2 ; RV64-NEXT: mul a3, a5, a3 ; RV64-NEXT: mulhu a6, a5, a4 -; RV64-NEXT: add a3, a6, a3 ; RV64-NEXT: sltu a0, a0, a2 ; RV64-NEXT: sub a1, a1, a0 -; RV64-NEXT: mul a1, a1, a4 -; RV64-NEXT: add a1, a3, a1 +; RV64-NEXT: mul a0, a1, a4 +; RV64-NEXT: add a0, a3, a0 +; RV64-NEXT: add a1, a6, a0 ; RV64-NEXT: mul a0, a5, a4 ; RV64-NEXT: ret %a = udiv iXLen2 %x, 65537 @@ -520,11 +520,11 @@ define iXLen2 @test_udiv_12(iXLen2 %x) nounwind { ; RV32-NEXT: addi a3, a3, -1366 ; RV32-NEXT: mul a3, a5, a3 ; RV32-NEXT: mulhu a6, a5, a4 -; RV32-NEXT: add a3, a6, a3 ; RV32-NEXT: sltu a0, a0, a2 ; RV32-NEXT: sub a1, a1, a0 -; RV32-NEXT: mul a1, a1, a4 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: mul a0, a1, a4 +; RV32-NEXT: add a0, a3, a0 +; RV32-NEXT: add a1, a6, a0 ; RV32-NEXT: mul a0, a5, a4 ; RV32-NEXT: ret ; @@ -549,11 +549,11 @@ define iXLen2 @test_udiv_12(iXLen2 %x) nounwind { ; RV64-NEXT: sub a4, a0, a3 ; RV64-NEXT: mul a5, a4, a6 ; RV64-NEXT: mulhu a6, a4, a2 -; RV64-NEXT: add a5, a6, a5 ; RV64-NEXT: sltu a0, a0, a3 ; RV64-NEXT: sub a1, a1, a0 -; RV64-NEXT: mul a1, a1, a2 -; RV64-NEXT: add a1, a5, a1 +; RV64-NEXT: mul a0, a1, a2 +; RV64-NEXT: add a0, a5, a0 +; RV64-NEXT: add a1, a6, a0 ; RV64-NEXT: mul a0, a4, a2 ; RV64-NEXT: ret %a = udiv iXLen2 %x, 12 diff --git a/llvm/test/CodeGen/RISCV/srem-lkk.ll b/llvm/test/CodeGen/RISCV/srem-lkk.ll index a71d81f..38e75ac 100644 --- a/llvm/test/CodeGen/RISCV/srem-lkk.ll +++ b/llvm/test/CodeGen/RISCV/srem-lkk.ll @@ -240,8 +240,8 @@ define i32 @combine_srem_sdiv(i32 %x) nounwind { ; RV32IM-NEXT: add a1, a1, a2 ; RV32IM-NEXT: li a2, 95 ; RV32IM-NEXT: mul a2, a1, a2 +; RV32IM-NEXT: sub a2, a2, a1 ; RV32IM-NEXT: sub a0, a0, a2 -; RV32IM-NEXT: add a0, a0, a1 ; RV32IM-NEXT: ret ; ; RV64I-LABEL: combine_srem_sdiv: @@ -278,8 +278,8 @@ define i32 @combine_srem_sdiv(i32 %x) nounwind { ; RV64IM-NEXT: add a1, a1, a2 ; RV64IM-NEXT: li a2, 95 ; RV64IM-NEXT: mulw a2, a1, a2 +; RV64IM-NEXT: subw a2, a2, a1 ; RV64IM-NEXT: subw a0, a0, a2 -; RV64IM-NEXT: addw a0, a0, a1 ; RV64IM-NEXT: ret %1 = srem i32 %x, 95 %2 = sdiv i32 %x, 95 diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll index 226b159..b41d819 100644 --- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll @@ -365,9 +365,9 @@ define void @test_srem_vec(ptr %X) nounwind { ; RV32-NEXT: srli a2, a1, 31 ; RV32-NEXT: andi a1, a1, 1 ; RV32-NEXT: slli a1, a1, 1 -; RV32-NEXT: or a1, a2, a1 ; RV32-NEXT: slli a0, a0, 2 ; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: or a0, a2, a0 ; RV32-NEXT: sw a0, 8(s0) ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload @@ -515,9 +515,9 @@ define void @test_srem_vec(ptr %X) nounwind { ; RV32M-NEXT: srli a2, a1, 31 ; RV32M-NEXT: andi a1, a1, 1 ; RV32M-NEXT: slli a1, a1, 1 -; RV32M-NEXT: or a1, a2, a1 ; RV32M-NEXT: slli a0, a0, 2 ; RV32M-NEXT: or a0, a1, a0 +; RV32M-NEXT: or a0, a2, a0 ; RV32M-NEXT: sw a0, 8(s0) ; RV32M-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32M-NEXT: lw s0, 24(sp) # 4-byte Folded Reload @@ -540,28 +540,28 @@ define void @test_srem_vec(ptr %X) nounwind { ; RV64M-NEXT: or a3, a5, a3 ; RV64M-NEXT: srai a3, a3, 31 ; RV64M-NEXT: slli a4, a4, 32 -; RV64M-NEXT: lui a5, %hi(.LCPI3_0) -; RV64M-NEXT: ld a5, %lo(.LCPI3_0)(a5) ; RV64M-NEXT: or a2, a2, a4 ; RV64M-NEXT: slli a2, a2, 29 +; RV64M-NEXT: lui a4, %hi(.LCPI3_0) +; RV64M-NEXT: ld a4, %lo(.LCPI3_0)(a4) ; RV64M-NEXT: srai a2, a2, 31 -; RV64M-NEXT: mulh a4, a2, a5 +; RV64M-NEXT: slli a1, a1, 31 +; RV64M-NEXT: srai a1, a1, 31 +; RV64M-NEXT: mulh a4, a2, a4 ; RV64M-NEXT: srli a5, a4, 63 ; RV64M-NEXT: srai a4, a4, 1 ; RV64M-NEXT: add a4, a4, a5 -; RV64M-NEXT: slli a5, a4, 2 -; RV64M-NEXT: add a4, a5, a4 ; RV64M-NEXT: lui a5, %hi(.LCPI3_1) ; RV64M-NEXT: ld a5, %lo(.LCPI3_1)(a5) -; RV64M-NEXT: slli a1, a1, 31 -; RV64M-NEXT: srai a1, a1, 31 +; RV64M-NEXT: add a2, a2, a4 +; RV64M-NEXT: slli a4, a4, 2 ; RV64M-NEXT: add a2, a2, a4 ; RV64M-NEXT: mulh a4, a3, a5 ; RV64M-NEXT: srli a5, a4, 63 ; RV64M-NEXT: srai a4, a4, 1 ; RV64M-NEXT: add a4, a4, a5 ; RV64M-NEXT: slli a5, a4, 3 -; RV64M-NEXT: sub a4, a4, a5 +; RV64M-NEXT: sub a3, a3, a5 ; RV64M-NEXT: add a3, a3, a4 ; RV64M-NEXT: addi a3, a3, -1 ; RV64M-NEXT: seqz a3, a3 @@ -690,8 +690,8 @@ define void @test_srem_vec(ptr %X) nounwind { ; RV32MV-NEXT: vmv.x.s a2, v8 ; RV32MV-NEXT: andi a2, a2, 1 ; RV32MV-NEXT: slli a2, a2, 1 -; RV32MV-NEXT: or a1, a1, a2 ; RV32MV-NEXT: slli a0, a0, 2 +; RV32MV-NEXT: or a0, a2, a0 ; RV32MV-NEXT: or a0, a1, a0 ; RV32MV-NEXT: sw a0, 8(s2) ; RV32MV-NEXT: addi sp, s0, -64 @@ -743,7 +743,7 @@ define void @test_srem_vec(ptr %X) nounwind { ; RV64MV-NEXT: slli a4, a3, 3 ; RV64MV-NEXT: lui a5, %hi(.LCPI3_2) ; RV64MV-NEXT: ld a5, %lo(.LCPI3_2)(a5) -; RV64MV-NEXT: sub a3, a3, a4 +; RV64MV-NEXT: sub a2, a2, a4 ; RV64MV-NEXT: add a2, a2, a3 ; RV64MV-NEXT: sd a2, 8(sp) ; RV64MV-NEXT: mulh a2, a1, a5 @@ -751,8 +751,8 @@ define void @test_srem_vec(ptr %X) nounwind { ; RV64MV-NEXT: srai a2, a2, 1 ; RV64MV-NEXT: add a2, a2, a3 ; RV64MV-NEXT: slli a3, a2, 2 -; RV64MV-NEXT: add a2, a3, a2 ; RV64MV-NEXT: add a1, a1, a2 +; RV64MV-NEXT: add a1, a1, a3 ; RV64MV-NEXT: sd a1, 16(sp) ; RV64MV-NEXT: mv a1, sp ; RV64MV-NEXT: vsetivli zero, 4, e64, m2, ta, ma diff --git a/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll b/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll index 304346f..cbb0a65 100644 --- a/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll +++ b/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll @@ -458,32 +458,32 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) nounwind { ; RV32IM-NEXT: add a6, a6, a7 ; RV32IM-NEXT: li a7, 95 ; RV32IM-NEXT: mul t0, a6, a7 -; RV32IM-NEXT: sub a4, a4, t0 -; RV32IM-NEXT: mulh t0, a1, a5 -; RV32IM-NEXT: add t0, t0, a1 -; RV32IM-NEXT: srli t1, t0, 31 -; RV32IM-NEXT: srai t0, t0, 6 -; RV32IM-NEXT: add t0, t0, t1 -; RV32IM-NEXT: mul t1, t0, a7 -; RV32IM-NEXT: sub a1, a1, t1 -; RV32IM-NEXT: mulh t1, a3, a5 -; RV32IM-NEXT: add t1, t1, a3 +; RV32IM-NEXT: mulh t1, a1, a5 +; RV32IM-NEXT: add t1, t1, a1 ; RV32IM-NEXT: srli t2, t1, 31 ; RV32IM-NEXT: srai t1, t1, 6 ; RV32IM-NEXT: add t1, t1, t2 ; RV32IM-NEXT: mul t2, t1, a7 -; RV32IM-NEXT: sub a3, a3, t2 +; RV32IM-NEXT: mulh t3, a3, a5 +; RV32IM-NEXT: add t3, t3, a3 +; RV32IM-NEXT: srli t4, t3, 31 +; RV32IM-NEXT: srai t3, t3, 6 +; RV32IM-NEXT: add t3, t3, t4 +; RV32IM-NEXT: mul t4, t3, a7 ; RV32IM-NEXT: mulh a5, a2, a5 ; RV32IM-NEXT: add a5, a5, a2 -; RV32IM-NEXT: srli t2, a5, 31 +; RV32IM-NEXT: srli t5, a5, 31 ; RV32IM-NEXT: srai a5, a5, 6 -; RV32IM-NEXT: add a5, a5, t2 +; RV32IM-NEXT: add a5, a5, t5 ; RV32IM-NEXT: mul a7, a5, a7 -; RV32IM-NEXT: sub a2, a2, a7 -; RV32IM-NEXT: add a2, a2, a5 -; RV32IM-NEXT: add a3, a3, t1 -; RV32IM-NEXT: add a1, a1, t0 -; RV32IM-NEXT: add a4, a4, a6 +; RV32IM-NEXT: sub a5, a7, a5 +; RV32IM-NEXT: sub a2, a2, a5 +; RV32IM-NEXT: sub a5, t4, t3 +; RV32IM-NEXT: sub a3, a3, a5 +; RV32IM-NEXT: sub a5, t2, t1 +; RV32IM-NEXT: sub a1, a1, a5 +; RV32IM-NEXT: sub a5, t0, a6 +; RV32IM-NEXT: sub a4, a4, a5 ; RV32IM-NEXT: sh a4, 6(a0) ; RV32IM-NEXT: sh a1, 4(a0) ; RV32IM-NEXT: sh a3, 2(a0) @@ -575,36 +575,36 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) nounwind { ; RV64IM-NEXT: add a6, a6, a7 ; RV64IM-NEXT: li a7, 95 ; RV64IM-NEXT: mulw t0, a6, a7 -; RV64IM-NEXT: subw a2, a2, t0 -; RV64IM-NEXT: mulh t0, a1, a3 -; RV64IM-NEXT: add t0, t0, a1 -; RV64IM-NEXT: srli t1, t0, 63 -; RV64IM-NEXT: srai t0, t0, 6 -; RV64IM-NEXT: add t0, t0, t1 -; RV64IM-NEXT: mulw t1, t0, a7 -; RV64IM-NEXT: subw a1, a1, t1 -; RV64IM-NEXT: mulh t1, a5, a3 -; RV64IM-NEXT: add t1, t1, a5 +; RV64IM-NEXT: mulh t1, a1, a3 +; RV64IM-NEXT: add t1, t1, a1 ; RV64IM-NEXT: srli t2, t1, 63 ; RV64IM-NEXT: srai t1, t1, 6 ; RV64IM-NEXT: add t1, t1, t2 ; RV64IM-NEXT: mulw t2, t1, a7 -; RV64IM-NEXT: subw a5, a5, t2 +; RV64IM-NEXT: mulh t3, a5, a3 +; RV64IM-NEXT: add t3, t3, a5 +; RV64IM-NEXT: srli t4, t3, 63 +; RV64IM-NEXT: srai t3, t3, 6 +; RV64IM-NEXT: add t3, t3, t4 +; RV64IM-NEXT: mulw t4, t3, a7 ; RV64IM-NEXT: mulh a3, a4, a3 ; RV64IM-NEXT: add a3, a3, a4 -; RV64IM-NEXT: srli t2, a3, 63 +; RV64IM-NEXT: srli t5, a3, 63 ; RV64IM-NEXT: srai a3, a3, 6 -; RV64IM-NEXT: add a3, a3, t2 +; RV64IM-NEXT: add a3, a3, t5 ; RV64IM-NEXT: mulw a7, a3, a7 -; RV64IM-NEXT: subw a4, a4, a7 -; RV64IM-NEXT: add a3, a4, a3 -; RV64IM-NEXT: add a5, a5, t1 -; RV64IM-NEXT: add a1, a1, t0 -; RV64IM-NEXT: add a2, a2, a6 +; RV64IM-NEXT: subw a3, a7, a3 +; RV64IM-NEXT: subw a4, a4, a3 +; RV64IM-NEXT: subw a3, t4, t3 +; RV64IM-NEXT: subw a5, a5, a3 +; RV64IM-NEXT: subw a3, t2, t1 +; RV64IM-NEXT: subw a1, a1, a3 +; RV64IM-NEXT: subw a3, t0, a6 +; RV64IM-NEXT: subw a2, a2, a3 ; RV64IM-NEXT: sh a2, 6(a0) ; RV64IM-NEXT: sh a1, 4(a0) ; RV64IM-NEXT: sh a5, 2(a0) -; RV64IM-NEXT: sh a3, 0(a0) +; RV64IM-NEXT: sh a4, 0(a0) ; RV64IM-NEXT: ret %1 = srem <4 x i16> %x, %2 = sdiv <4 x i16> %x, diff --git a/llvm/test/CodeGen/RISCV/ssub_sat.ll b/llvm/test/CodeGen/RISCV/ssub_sat.ll index 9571ca3..ed3f240 100644 --- a/llvm/test/CodeGen/RISCV/ssub_sat.ll +++ b/llvm/test/CodeGen/RISCV/ssub_sat.ll @@ -59,8 +59,8 @@ define i64 @func2(i64 %x, i64 %y) nounwind { ; RV32: # %bb.0: ; RV32-NEXT: mv a4, a1 ; RV32-NEXT: sltu a1, a0, a2 -; RV32-NEXT: sub a5, a4, a3 -; RV32-NEXT: sub a1, a5, a1 +; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: sub a1, a4, a1 ; RV32-NEXT: xor a5, a4, a1 ; RV32-NEXT: xor a3, a4, a3 ; RV32-NEXT: and a3, a3, a5 diff --git a/llvm/test/CodeGen/RISCV/ssub_sat_plus.ll b/llvm/test/CodeGen/RISCV/ssub_sat_plus.ll index 32e794fc..064c4f9 100644 --- a/llvm/test/CodeGen/RISCV/ssub_sat_plus.ll +++ b/llvm/test/CodeGen/RISCV/ssub_sat_plus.ll @@ -65,8 +65,8 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind { ; RV32: # %bb.0: ; RV32-NEXT: mv a2, a1 ; RV32-NEXT: sltu a1, a0, a4 -; RV32-NEXT: sub a3, a2, a5 -; RV32-NEXT: sub a1, a3, a1 +; RV32-NEXT: add a1, a5, a1 +; RV32-NEXT: sub a1, a2, a1 ; RV32-NEXT: xor a3, a2, a1 ; RV32-NEXT: xor a2, a2, a5 ; RV32-NEXT: and a2, a2, a3 diff --git a/llvm/test/CodeGen/RISCV/uadd_sat.ll b/llvm/test/CodeGen/RISCV/uadd_sat.ll index dbcb68e..6da9efc 100644 --- a/llvm/test/CodeGen/RISCV/uadd_sat.ll +++ b/llvm/test/CodeGen/RISCV/uadd_sat.ll @@ -47,10 +47,10 @@ define signext i32 @func(i32 signext %x, i32 signext %y) nounwind { define i64 @func2(i64 %x, i64 %y) nounwind { ; RV32I-LABEL: func2: ; RV32I: # %bb.0: -; RV32I-NEXT: add a3, a1, a3 ; RV32I-NEXT: add a2, a0, a2 ; RV32I-NEXT: sltu a0, a2, a0 ; RV32I-NEXT: add a3, a3, a0 +; RV32I-NEXT: add a3, a1, a3 ; RV32I-NEXT: beq a3, a1, .LBB1_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: sltu a0, a3, a1 @@ -70,10 +70,10 @@ define i64 @func2(i64 %x, i64 %y) nounwind { ; ; RV32IZbb-LABEL: func2: ; RV32IZbb: # %bb.0: -; RV32IZbb-NEXT: add a3, a1, a3 ; RV32IZbb-NEXT: add a2, a0, a2 ; RV32IZbb-NEXT: sltu a0, a2, a0 ; RV32IZbb-NEXT: add a3, a3, a0 +; RV32IZbb-NEXT: add a3, a1, a3 ; RV32IZbb-NEXT: beq a3, a1, .LBB1_2 ; RV32IZbb-NEXT: # %bb.1: ; RV32IZbb-NEXT: sltu a0, a3, a1 diff --git a/llvm/test/CodeGen/RISCV/uadd_sat_plus.ll b/llvm/test/CodeGen/RISCV/uadd_sat_plus.ll index 7a8c4c0..9df1957 100644 --- a/llvm/test/CodeGen/RISCV/uadd_sat_plus.ll +++ b/llvm/test/CodeGen/RISCV/uadd_sat_plus.ll @@ -54,10 +54,10 @@ define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind { define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind { ; RV32I-LABEL: func64: ; RV32I: # %bb.0: -; RV32I-NEXT: add a2, a1, a5 ; RV32I-NEXT: add a4, a0, a4 ; RV32I-NEXT: sltu a0, a4, a0 -; RV32I-NEXT: add a2, a2, a0 +; RV32I-NEXT: add a2, a5, a0 +; RV32I-NEXT: add a2, a1, a2 ; RV32I-NEXT: beq a2, a1, .LBB1_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: sltu a0, a2, a1 @@ -77,10 +77,10 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind { ; ; RV32IZbb-LABEL: func64: ; RV32IZbb: # %bb.0: -; RV32IZbb-NEXT: add a2, a1, a5 ; RV32IZbb-NEXT: add a4, a0, a4 ; RV32IZbb-NEXT: sltu a0, a4, a0 -; RV32IZbb-NEXT: add a2, a2, a0 +; RV32IZbb-NEXT: add a2, a5, a0 +; RV32IZbb-NEXT: add a2, a1, a2 ; RV32IZbb-NEXT: beq a2, a1, .LBB1_2 ; RV32IZbb-NEXT: # %bb.1: ; RV32IZbb-NEXT: sltu a0, a2, a1 diff --git a/llvm/test/CodeGen/RISCV/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/RISCV/umulo-128-legalisation-lowering.ll index bf521e3..5e624d6 100644 --- a/llvm/test/CodeGen/RISCV/umulo-128-legalisation-lowering.ll +++ b/llvm/test/CodeGen/RISCV/umulo-128-legalisation-lowering.ll @@ -10,19 +10,19 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) #0 { ; RISCV32-NEXT: sw s2, 20(sp) # 4-byte Folded Spill ; RISCV32-NEXT: sw s3, 16(sp) # 4-byte Folded Spill ; RISCV32-NEXT: sw s4, 12(sp) # 4-byte Folded Spill -; RISCV32-NEXT: lw a3, 12(a1) +; RISCV32-NEXT: lw a4, 12(a1) ; RISCV32-NEXT: lw a7, 12(a2) ; RISCV32-NEXT: lw a6, 8(a1) -; RISCV32-NEXT: lw a4, 0(a2) +; RISCV32-NEXT: lw a3, 0(a2) ; RISCV32-NEXT: lw a5, 0(a1) ; RISCV32-NEXT: lw t3, 4(a1) ; RISCV32-NEXT: lw t0, 8(a2) ; RISCV32-NEXT: lw a2, 4(a2) -; RISCV32-NEXT: mulhu a1, a5, a4 -; RISCV32-NEXT: mul t1, t3, a4 +; RISCV32-NEXT: mulhu a1, a5, a3 +; RISCV32-NEXT: mul t1, t3, a3 ; RISCV32-NEXT: add a1, t1, a1 ; RISCV32-NEXT: sltu t1, a1, t1 -; RISCV32-NEXT: mulhu t2, t3, a4 +; RISCV32-NEXT: mulhu t2, t3, a3 ; RISCV32-NEXT: add t4, t2, t1 ; RISCV32-NEXT: mul t1, a5, a2 ; RISCV32-NEXT: add a1, t1, a1 @@ -33,65 +33,65 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) #0 { ; RISCV32-NEXT: mul t6, t3, a2 ; RISCV32-NEXT: add s0, t6, t5 ; RISCV32-NEXT: mul t1, t0, a5 -; RISCV32-NEXT: mul s3, a6, a4 +; RISCV32-NEXT: mul s3, a6, a3 ; RISCV32-NEXT: add s4, s3, t1 ; RISCV32-NEXT: add t1, s0, s4 ; RISCV32-NEXT: sltu t2, t1, s0 -; RISCV32-NEXT: sltu s0, s0, t6 +; RISCV32-NEXT: sltu t6, s0, t6 ; RISCV32-NEXT: sltu t4, t5, t4 -; RISCV32-NEXT: mulhu t5, t3, a2 -; RISCV32-NEXT: add t4, t5, t4 -; RISCV32-NEXT: add s0, t4, s0 +; RISCV32-NEXT: mulhu s1, t3, a2 +; RISCV32-NEXT: add t4, t4, t6 +; RISCV32-NEXT: add s1, s1, t4 ; RISCV32-NEXT: mul t4, t3, t0 -; RISCV32-NEXT: mul t5, a7, a5 -; RISCV32-NEXT: add t4, t5, t4 -; RISCV32-NEXT: mulhu s1, t0, a5 -; RISCV32-NEXT: add s2, s1, t4 +; RISCV32-NEXT: mul s2, a7, a5 +; RISCV32-NEXT: mulhu s0, t0, a5 +; RISCV32-NEXT: add t4, s0, t4 +; RISCV32-NEXT: add s2, t4, s2 ; RISCV32-NEXT: mul t4, a2, a6 -; RISCV32-NEXT: mul t5, a3, a4 +; RISCV32-NEXT: mul t6, a4, a3 +; RISCV32-NEXT: mulhu t5, a6, a3 ; RISCV32-NEXT: add t4, t5, t4 -; RISCV32-NEXT: mulhu t5, a6, a4 -; RISCV32-NEXT: add t6, t5, t4 -; RISCV32-NEXT: add t4, t6, s2 -; RISCV32-NEXT: sltu s3, s4, s3 -; RISCV32-NEXT: add t4, t4, s3 -; RISCV32-NEXT: add t4, s0, t4 +; RISCV32-NEXT: add t6, t4, t6 +; RISCV32-NEXT: sltu t4, s4, s3 +; RISCV32-NEXT: add t4, s2, t4 +; RISCV32-NEXT: add t4, t6, t4 ; RISCV32-NEXT: add t4, t4, t2 -; RISCV32-NEXT: beq t4, s0, .LBB0_2 +; RISCV32-NEXT: add t4, s1, t4 +; RISCV32-NEXT: beq t4, s1, .LBB0_2 ; RISCV32-NEXT: # %bb.1: # %start -; RISCV32-NEXT: sltu t2, t4, s0 +; RISCV32-NEXT: sltu t2, t4, s1 ; RISCV32-NEXT: .LBB0_2: # %start -; RISCV32-NEXT: sltu s0, s2, s1 +; RISCV32-NEXT: sltu s0, s2, s0 ; RISCV32-NEXT: snez s1, t3 ; RISCV32-NEXT: snez s2, a7 ; RISCV32-NEXT: and s1, s2, s1 ; RISCV32-NEXT: mulhu s2, a7, a5 ; RISCV32-NEXT: snez s2, s2 -; RISCV32-NEXT: or s1, s1, s2 ; RISCV32-NEXT: mulhu t3, t3, t0 ; RISCV32-NEXT: snez t3, t3 -; RISCV32-NEXT: or t3, s1, t3 +; RISCV32-NEXT: or t3, s2, t3 ; RISCV32-NEXT: or t3, t3, s0 +; RISCV32-NEXT: or t3, s1, t3 ; RISCV32-NEXT: sltu t5, t6, t5 ; RISCV32-NEXT: snez t6, a2 -; RISCV32-NEXT: snez s0, a3 +; RISCV32-NEXT: snez s0, a4 ; RISCV32-NEXT: and t6, s0, t6 -; RISCV32-NEXT: mulhu s0, a3, a4 +; RISCV32-NEXT: mulhu s0, a4, a3 ; RISCV32-NEXT: snez s0, s0 -; RISCV32-NEXT: or t6, t6, s0 ; RISCV32-NEXT: mulhu a2, a2, a6 ; RISCV32-NEXT: snez a2, a2 -; RISCV32-NEXT: or a2, t6, a2 +; RISCV32-NEXT: or a2, s0, a2 ; RISCV32-NEXT: or a2, a2, t5 ; RISCV32-NEXT: or a7, t0, a7 ; RISCV32-NEXT: snez a7, a7 -; RISCV32-NEXT: or a3, a6, a3 -; RISCV32-NEXT: snez a3, a3 -; RISCV32-NEXT: and a3, a3, a7 -; RISCV32-NEXT: or a2, a3, a2 -; RISCV32-NEXT: or a2, a2, t3 -; RISCV32-NEXT: or a2, a2, t2 -; RISCV32-NEXT: mul a3, a5, a4 +; RISCV32-NEXT: or a4, a6, a4 +; RISCV32-NEXT: snez a4, a4 +; RISCV32-NEXT: and a4, a4, a7 +; RISCV32-NEXT: or a2, a4, a2 +; RISCV32-NEXT: or a4, t6, t3 +; RISCV32-NEXT: or a4, a4, t2 +; RISCV32-NEXT: or a2, a2, a4 +; RISCV32-NEXT: mul a3, a5, a3 ; RISCV32-NEXT: andi a2, a2, 1 ; RISCV32-NEXT: sw a3, 0(a0) ; RISCV32-NEXT: sw a1, 4(a0) diff --git a/llvm/test/CodeGen/RISCV/unaligned-load-store.ll b/llvm/test/CodeGen/RISCV/unaligned-load-store.ll index d46e6c6..191eeab 100644 --- a/llvm/test/CodeGen/RISCV/unaligned-load-store.ll +++ b/llvm/test/CodeGen/RISCV/unaligned-load-store.ll @@ -40,11 +40,11 @@ define i24 @load_i24(ptr %p) { ; NOMISALIGN-LABEL: load_i24: ; NOMISALIGN: # %bb.0: ; NOMISALIGN-NEXT: lbu a1, 1(a0) -; NOMISALIGN-NEXT: lbu a2, 0(a0) -; NOMISALIGN-NEXT: lb a0, 2(a0) +; NOMISALIGN-NEXT: lb a2, 2(a0) +; NOMISALIGN-NEXT: lbu a0, 0(a0) ; NOMISALIGN-NEXT: slli a1, a1, 8 -; NOMISALIGN-NEXT: or a1, a1, a2 -; NOMISALIGN-NEXT: slli a0, a0, 16 +; NOMISALIGN-NEXT: slli a2, a2, 16 +; NOMISALIGN-NEXT: or a0, a0, a2 ; NOMISALIGN-NEXT: or a0, a1, a0 ; NOMISALIGN-NEXT: ret ; @@ -70,7 +70,7 @@ define i32 @load_i32(ptr %p) { ; RV32I-NEXT: or a1, a1, a2 ; RV32I-NEXT: slli a3, a3, 16 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, a3 +; RV32I-NEXT: or a1, a3, a1 ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: ret ; @@ -84,7 +84,7 @@ define i32 @load_i32(ptr %p) { ; RV64I-NEXT: or a1, a1, a2 ; RV64I-NEXT: slli a3, a3, 16 ; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: or a1, a3, a1 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret ; @@ -107,8 +107,8 @@ define i64 @load_i64(ptr %p) { ; RV32I-NEXT: or a1, a1, a2 ; RV32I-NEXT: slli a3, a3, 16 ; RV32I-NEXT: slli a4, a4, 24 -; RV32I-NEXT: or a2, a4, a3 -; RV32I-NEXT: or a2, a2, a1 +; RV32I-NEXT: or a1, a3, a1 +; RV32I-NEXT: or a2, a4, a1 ; RV32I-NEXT: lbu a1, 5(a0) ; RV32I-NEXT: lbu a3, 4(a0) ; RV32I-NEXT: lbu a4, 6(a0) @@ -117,7 +117,7 @@ define i64 @load_i64(ptr %p) { ; RV32I-NEXT: or a1, a1, a3 ; RV32I-NEXT: slli a4, a4, 16 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, a4 +; RV32I-NEXT: or a1, a4, a1 ; RV32I-NEXT: or a1, a0, a1 ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: ret @@ -127,25 +127,25 @@ define i64 @load_i64(ptr %p) { ; RV64I-NEXT: lbu a1, 1(a0) ; RV64I-NEXT: lbu a2, 0(a0) ; RV64I-NEXT: lbu a3, 2(a0) -; RV64I-NEXT: lbu a4, 3(a0) ; RV64I-NEXT: slli a1, a1, 8 ; RV64I-NEXT: or a1, a1, a2 ; RV64I-NEXT: slli a3, a3, 16 -; RV64I-NEXT: slli a4, a4, 24 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: or a1, a3, a1 ; RV64I-NEXT: lbu a2, 5(a0) +; RV64I-NEXT: lbu a4, 3(a0) +; RV64I-NEXT: or a1, a3, a1 ; RV64I-NEXT: lbu a3, 4(a0) -; RV64I-NEXT: lbu a4, 6(a0) -; RV64I-NEXT: lbu a0, 7(a0) ; RV64I-NEXT: slli a2, a2, 8 +; RV64I-NEXT: lbu a5, 6(a0) +; RV64I-NEXT: lbu a0, 7(a0) ; RV64I-NEXT: or a2, a2, a3 -; RV64I-NEXT: slli a4, a4, 16 +; RV64I-NEXT: slli a4, a4, 24 +; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, a4 +; RV64I-NEXT: or a2, a5, a2 ; RV64I-NEXT: or a0, a0, a2 ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: or a0, a0, a4 ; RV64I-NEXT: ret ; ; MISALIGN-RV32I-LABEL: load_i64: diff --git a/llvm/test/CodeGen/RISCV/urem-lkk.ll b/llvm/test/CodeGen/RISCV/urem-lkk.ll index 715dedd..218281f 100644 --- a/llvm/test/CodeGen/RISCV/urem-lkk.ll +++ b/llvm/test/CodeGen/RISCV/urem-lkk.ll @@ -140,8 +140,8 @@ define i32 @combine_urem_udiv(i32 %x) nounwind { ; RV32IM-NEXT: srli a1, a1, 6 ; RV32IM-NEXT: li a2, 95 ; RV32IM-NEXT: mul a2, a1, a2 +; RV32IM-NEXT: sub a2, a2, a1 ; RV32IM-NEXT: sub a0, a0, a2 -; RV32IM-NEXT: add a0, a0, a1 ; RV32IM-NEXT: ret ; ; RV64I-LABEL: combine_urem_udiv: @@ -180,8 +180,8 @@ define i32 @combine_urem_udiv(i32 %x) nounwind { ; RV64IM-NEXT: srli a1, a1, 6 ; RV64IM-NEXT: li a2, 95 ; RV64IM-NEXT: mulw a2, a1, a2 +; RV64IM-NEXT: subw a2, a2, a1 ; RV64IM-NEXT: subw a0, a0, a2 -; RV64IM-NEXT: addw a0, a0, a1 ; RV64IM-NEXT: ret %1 = urem i32 %x, 95 %2 = udiv i32 %x, 95 diff --git a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll index f6a2b8a..ccd5099 100644 --- a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll @@ -192,8 +192,8 @@ define i1 @test_urem_odd_setne(i4 %X) nounwind { ; RV32-LABEL: test_urem_odd_setne: ; RV32: # %bb.0: ; RV32-NEXT: slli a1, a0, 1 -; RV32-NEXT: add a0, a1, a0 ; RV32-NEXT: neg a0, a0 +; RV32-NEXT: sub a0, a0, a1 ; RV32-NEXT: andi a0, a0, 15 ; RV32-NEXT: sltiu a0, a0, 4 ; RV32-NEXT: xori a0, a0, 1 @@ -202,8 +202,8 @@ define i1 @test_urem_odd_setne(i4 %X) nounwind { ; RV64-LABEL: test_urem_odd_setne: ; RV64: # %bb.0: ; RV64-NEXT: slli a1, a0, 1 -; RV64-NEXT: add a0, a1, a0 ; RV64-NEXT: negw a0, a0 +; RV64-NEXT: subw a0, a0, a1 ; RV64-NEXT: andi a0, a0, 15 ; RV64-NEXT: sltiu a0, a0, 4 ; RV64-NEXT: xori a0, a0, 1 @@ -212,8 +212,8 @@ define i1 @test_urem_odd_setne(i4 %X) nounwind { ; RV32M-LABEL: test_urem_odd_setne: ; RV32M: # %bb.0: ; RV32M-NEXT: slli a1, a0, 1 -; RV32M-NEXT: add a0, a1, a0 ; RV32M-NEXT: neg a0, a0 +; RV32M-NEXT: sub a0, a0, a1 ; RV32M-NEXT: andi a0, a0, 15 ; RV32M-NEXT: sltiu a0, a0, 4 ; RV32M-NEXT: xori a0, a0, 1 @@ -222,8 +222,8 @@ define i1 @test_urem_odd_setne(i4 %X) nounwind { ; RV64M-LABEL: test_urem_odd_setne: ; RV64M: # %bb.0: ; RV64M-NEXT: slli a1, a0, 1 -; RV64M-NEXT: add a0, a1, a0 ; RV64M-NEXT: negw a0, a0 +; RV64M-NEXT: subw a0, a0, a1 ; RV64M-NEXT: andi a0, a0, 15 ; RV64M-NEXT: sltiu a0, a0, 4 ; RV64M-NEXT: xori a0, a0, 1 @@ -232,8 +232,8 @@ define i1 @test_urem_odd_setne(i4 %X) nounwind { ; RV32MV-LABEL: test_urem_odd_setne: ; RV32MV: # %bb.0: ; RV32MV-NEXT: slli a1, a0, 1 -; RV32MV-NEXT: add a0, a1, a0 ; RV32MV-NEXT: neg a0, a0 +; RV32MV-NEXT: sub a0, a0, a1 ; RV32MV-NEXT: andi a0, a0, 15 ; RV32MV-NEXT: sltiu a0, a0, 4 ; RV32MV-NEXT: xori a0, a0, 1 @@ -242,8 +242,8 @@ define i1 @test_urem_odd_setne(i4 %X) nounwind { ; RV64MV-LABEL: test_urem_odd_setne: ; RV64MV: # %bb.0: ; RV64MV-NEXT: slli a1, a0, 1 -; RV64MV-NEXT: add a0, a1, a0 ; RV64MV-NEXT: negw a0, a0 +; RV64MV-NEXT: subw a0, a0, a1 ; RV64MV-NEXT: andi a0, a0, 15 ; RV64MV-NEXT: sltiu a0, a0, 4 ; RV64MV-NEXT: xori a0, a0, 1 @@ -366,9 +366,9 @@ define void @test_urem_vec(ptr %X) nounwind { ; RV32-NEXT: andi a1, s3, 2047 ; RV32-NEXT: andi a0, a0, 2047 ; RV32-NEXT: slli a0, a0, 11 -; RV32-NEXT: or a0, a1, a0 ; RV32-NEXT: slli s1, s1, 22 ; RV32-NEXT: or a0, a0, s1 +; RV32-NEXT: or a0, a1, a0 ; RV32-NEXT: sw a0, 0(s0) ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload @@ -420,8 +420,8 @@ define void @test_urem_vec(ptr %X) nounwind { ; RV64-NEXT: andi a1, s3, 2047 ; RV64-NEXT: andi a2, s2, 2047 ; RV64-NEXT: slli a2, a2, 11 -; RV64-NEXT: or a1, a1, a2 ; RV64-NEXT: slli a0, a0, 22 +; RV64-NEXT: or a0, a2, a0 ; RV64-NEXT: or a0, a1, a0 ; RV64-NEXT: sw a0, 0(s0) ; RV64-NEXT: slli a0, a0, 31 @@ -471,8 +471,8 @@ define void @test_urem_vec(ptr %X) nounwind { ; RV32M-NEXT: andi a2, a2, 2047 ; RV32M-NEXT: andi a3, a3, 2047 ; RV32M-NEXT: slli a3, a3, 11 -; RV32M-NEXT: or a2, a2, a3 ; RV32M-NEXT: slli a1, a1, 22 +; RV32M-NEXT: or a1, a3, a1 ; RV32M-NEXT: or a1, a2, a1 ; RV32M-NEXT: sw a1, 0(a0) ; RV32M-NEXT: ret @@ -510,8 +510,8 @@ define void @test_urem_vec(ptr %X) nounwind { ; RV64M-NEXT: andi a1, a1, 2047 ; RV64M-NEXT: andi a3, a3, 2047 ; RV64M-NEXT: slli a3, a3, 11 -; RV64M-NEXT: or a1, a1, a3 ; RV64M-NEXT: slli a2, a2, 22 +; RV64M-NEXT: or a2, a3, a2 ; RV64M-NEXT: or a1, a1, a2 ; RV64M-NEXT: sw a1, 0(a0) ; RV64M-NEXT: slli a1, a1, 31 @@ -578,8 +578,8 @@ define void @test_urem_vec(ptr %X) nounwind { ; RV32MV-NEXT: vmv.x.s a3, v8 ; RV32MV-NEXT: andi a3, a3, 2047 ; RV32MV-NEXT: slli a3, a3, 11 -; RV32MV-NEXT: or a2, a2, a3 ; RV32MV-NEXT: slli a1, a1, 22 +; RV32MV-NEXT: or a1, a3, a1 ; RV32MV-NEXT: or a1, a2, a1 ; RV32MV-NEXT: sw a1, 0(a0) ; RV32MV-NEXT: addi sp, sp, 16 @@ -638,10 +638,10 @@ define void @test_urem_vec(ptr %X) nounwind { ; RV64MV-NEXT: vmv.x.s a2, v9 ; RV64MV-NEXT: andi a2, a2, 2047 ; RV64MV-NEXT: slli a2, a2, 11 -; RV64MV-NEXT: or a1, a1, a2 ; RV64MV-NEXT: vslidedown.vi v8, v8, 2 -; RV64MV-NEXT: vmv.x.s a2, v8 -; RV64MV-NEXT: slli a2, a2, 22 +; RV64MV-NEXT: vmv.x.s a3, v8 +; RV64MV-NEXT: slli a3, a3, 22 +; RV64MV-NEXT: or a2, a2, a3 ; RV64MV-NEXT: or a1, a1, a2 ; RV64MV-NEXT: sw a1, 0(a0) ; RV64MV-NEXT: slli a1, a1, 31 diff --git a/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll b/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll index f47907d..e8365fe 100644 --- a/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll +++ b/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll @@ -448,32 +448,32 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) nounwind { ; RV32IM-NEXT: srli a6, a6, 6 ; RV32IM-NEXT: li a7, 95 ; RV32IM-NEXT: mul t0, a6, a7 -; RV32IM-NEXT: sub a4, a4, t0 -; RV32IM-NEXT: mulhu t0, a1, a5 -; RV32IM-NEXT: sub t1, a1, t0 -; RV32IM-NEXT: srli t1, t1, 1 -; RV32IM-NEXT: add t0, t1, t0 -; RV32IM-NEXT: srli t0, t0, 6 -; RV32IM-NEXT: mul t1, t0, a7 -; RV32IM-NEXT: sub a1, a1, t1 -; RV32IM-NEXT: mulhu t1, a3, a5 -; RV32IM-NEXT: sub t2, a3, t1 +; RV32IM-NEXT: mulhu t1, a1, a5 +; RV32IM-NEXT: sub t2, a1, t1 ; RV32IM-NEXT: srli t2, t2, 1 ; RV32IM-NEXT: add t1, t2, t1 ; RV32IM-NEXT: srli t1, t1, 6 ; RV32IM-NEXT: mul t2, t1, a7 -; RV32IM-NEXT: sub a3, a3, t2 +; RV32IM-NEXT: mulhu t3, a3, a5 +; RV32IM-NEXT: sub t4, a3, t3 +; RV32IM-NEXT: srli t4, t4, 1 +; RV32IM-NEXT: add t3, t4, t3 +; RV32IM-NEXT: srli t3, t3, 6 +; RV32IM-NEXT: mul t4, t3, a7 ; RV32IM-NEXT: mulhu a5, a2, a5 -; RV32IM-NEXT: sub t2, a2, a5 -; RV32IM-NEXT: srli t2, t2, 1 -; RV32IM-NEXT: add a5, t2, a5 +; RV32IM-NEXT: sub t5, a2, a5 +; RV32IM-NEXT: srli t5, t5, 1 +; RV32IM-NEXT: add a5, t5, a5 ; RV32IM-NEXT: srli a5, a5, 6 ; RV32IM-NEXT: mul a7, a5, a7 -; RV32IM-NEXT: sub a2, a2, a7 -; RV32IM-NEXT: add a2, a2, a5 -; RV32IM-NEXT: add a3, a3, t1 -; RV32IM-NEXT: add a1, a1, t0 -; RV32IM-NEXT: add a4, a4, a6 +; RV32IM-NEXT: sub a5, a7, a5 +; RV32IM-NEXT: sub a2, a2, a5 +; RV32IM-NEXT: sub a5, t4, t3 +; RV32IM-NEXT: sub a3, a3, a5 +; RV32IM-NEXT: sub a5, t2, t1 +; RV32IM-NEXT: sub a1, a1, a5 +; RV32IM-NEXT: sub a5, t0, a6 +; RV32IM-NEXT: sub a4, a4, a5 ; RV32IM-NEXT: sh a4, 6(a0) ; RV32IM-NEXT: sh a1, 4(a0) ; RV32IM-NEXT: sh a3, 2(a0) @@ -565,36 +565,36 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) nounwind { ; RV64IM-NEXT: srli a6, a6, 6 ; RV64IM-NEXT: li a7, 95 ; RV64IM-NEXT: mulw t0, a6, a7 -; RV64IM-NEXT: subw a2, a2, t0 -; RV64IM-NEXT: mulhu t0, a1, a3 -; RV64IM-NEXT: sub t1, a1, t0 -; RV64IM-NEXT: srli t1, t1, 1 -; RV64IM-NEXT: add t0, t1, t0 -; RV64IM-NEXT: srli t0, t0, 6 -; RV64IM-NEXT: mulw t1, t0, a7 -; RV64IM-NEXT: subw a1, a1, t1 -; RV64IM-NEXT: mulhu t1, a5, a3 -; RV64IM-NEXT: sub t2, a5, t1 +; RV64IM-NEXT: mulhu t1, a1, a3 +; RV64IM-NEXT: sub t2, a1, t1 ; RV64IM-NEXT: srli t2, t2, 1 ; RV64IM-NEXT: add t1, t2, t1 ; RV64IM-NEXT: srli t1, t1, 6 ; RV64IM-NEXT: mulw t2, t1, a7 -; RV64IM-NEXT: subw a5, a5, t2 +; RV64IM-NEXT: mulhu t3, a5, a3 +; RV64IM-NEXT: sub t4, a5, t3 +; RV64IM-NEXT: srli t4, t4, 1 +; RV64IM-NEXT: add t3, t4, t3 +; RV64IM-NEXT: srli t3, t3, 6 +; RV64IM-NEXT: mulw t4, t3, a7 ; RV64IM-NEXT: mulhu a3, a4, a3 -; RV64IM-NEXT: sub t2, a4, a3 -; RV64IM-NEXT: srli t2, t2, 1 -; RV64IM-NEXT: add a3, t2, a3 +; RV64IM-NEXT: sub t5, a4, a3 +; RV64IM-NEXT: srli t5, t5, 1 +; RV64IM-NEXT: add a3, t5, a3 ; RV64IM-NEXT: srli a3, a3, 6 ; RV64IM-NEXT: mulw a7, a3, a7 -; RV64IM-NEXT: subw a4, a4, a7 -; RV64IM-NEXT: add a3, a4, a3 -; RV64IM-NEXT: add a5, a5, t1 -; RV64IM-NEXT: add a1, a1, t0 -; RV64IM-NEXT: add a2, a2, a6 +; RV64IM-NEXT: subw a3, a7, a3 +; RV64IM-NEXT: subw a4, a4, a3 +; RV64IM-NEXT: subw a3, t4, t3 +; RV64IM-NEXT: subw a5, a5, a3 +; RV64IM-NEXT: subw a3, t2, t1 +; RV64IM-NEXT: subw a1, a1, a3 +; RV64IM-NEXT: subw a3, t0, a6 +; RV64IM-NEXT: subw a2, a2, a3 ; RV64IM-NEXT: sh a2, 6(a0) ; RV64IM-NEXT: sh a1, 4(a0) ; RV64IM-NEXT: sh a5, 2(a0) -; RV64IM-NEXT: sh a3, 0(a0) +; RV64IM-NEXT: sh a4, 0(a0) ; RV64IM-NEXT: ret %1 = urem <4 x i16> %x, %2 = udiv <4 x i16> %x, diff --git a/llvm/test/CodeGen/RISCV/usub_sat.ll b/llvm/test/CodeGen/RISCV/usub_sat.ll index aab5626..c828886 100644 --- a/llvm/test/CodeGen/RISCV/usub_sat.ll +++ b/llvm/test/CodeGen/RISCV/usub_sat.ll @@ -46,8 +46,8 @@ define i64 @func2(i64 %x, i64 %y) nounwind { ; RV32I-LABEL: func2: ; RV32I: # %bb.0: ; RV32I-NEXT: sltu a4, a0, a2 +; RV32I-NEXT: add a3, a3, a4 ; RV32I-NEXT: sub a3, a1, a3 -; RV32I-NEXT: sub a3, a3, a4 ; RV32I-NEXT: sub a2, a0, a2 ; RV32I-NEXT: beq a3, a1, .LBB1_2 ; RV32I-NEXT: # %bb.1: @@ -72,8 +72,8 @@ define i64 @func2(i64 %x, i64 %y) nounwind { ; RV32IZbb-LABEL: func2: ; RV32IZbb: # %bb.0: ; RV32IZbb-NEXT: sltu a4, a0, a2 +; RV32IZbb-NEXT: add a3, a3, a4 ; RV32IZbb-NEXT: sub a3, a1, a3 -; RV32IZbb-NEXT: sub a3, a3, a4 ; RV32IZbb-NEXT: sub a2, a0, a2 ; RV32IZbb-NEXT: beq a3, a1, .LBB1_2 ; RV32IZbb-NEXT: # %bb.1: diff --git a/llvm/test/CodeGen/RISCV/usub_sat_plus.ll b/llvm/test/CodeGen/RISCV/usub_sat_plus.ll index 6f868b3..50ec352 100644 --- a/llvm/test/CodeGen/RISCV/usub_sat_plus.ll +++ b/llvm/test/CodeGen/RISCV/usub_sat_plus.ll @@ -53,8 +53,8 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind { ; RV32I-LABEL: func64: ; RV32I: # %bb.0: ; RV32I-NEXT: sltu a2, a0, a4 -; RV32I-NEXT: sub a3, a1, a5 -; RV32I-NEXT: sub a2, a3, a2 +; RV32I-NEXT: add a2, a5, a2 +; RV32I-NEXT: sub a2, a1, a2 ; RV32I-NEXT: sub a3, a0, a4 ; RV32I-NEXT: beq a2, a1, .LBB1_2 ; RV32I-NEXT: # %bb.1: @@ -79,8 +79,8 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind { ; RV32IZbb-LABEL: func64: ; RV32IZbb: # %bb.0: ; RV32IZbb-NEXT: sltu a2, a0, a4 -; RV32IZbb-NEXT: sub a3, a1, a5 -; RV32IZbb-NEXT: sub a2, a3, a2 +; RV32IZbb-NEXT: add a2, a5, a2 +; RV32IZbb-NEXT: sub a2, a1, a2 ; RV32IZbb-NEXT: sub a3, a0, a4 ; RV32IZbb-NEXT: beq a2, a1, .LBB1_2 ; RV32IZbb-NEXT: # %bb.1: diff --git a/llvm/test/CodeGen/RISCV/vararg.ll b/llvm/test/CodeGen/RISCV/vararg.ll index df8a670..69703a4 100644 --- a/llvm/test/CodeGen/RISCV/vararg.ll +++ b/llvm/test/CodeGen/RISCV/vararg.ll @@ -808,11 +808,11 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind { ; ILP32-ILP32F-FPELIM-NEXT: andi a0, a0, -8 ; ILP32-ILP32F-FPELIM-NEXT: addi a3, sp, 27 ; ILP32-ILP32F-FPELIM-NEXT: sw a3, 4(sp) -; ILP32-ILP32F-FPELIM-NEXT: lw a3, 4(a0) -; ILP32-ILP32F-FPELIM-NEXT: lw a0, 0(a0) -; ILP32-ILP32F-FPELIM-NEXT: add a2, a2, a3 -; ILP32-ILP32F-FPELIM-NEXT: add a0, a1, a0 +; ILP32-ILP32F-FPELIM-NEXT: lw a3, 0(a0) +; ILP32-ILP32F-FPELIM-NEXT: lw a4, 4(a0) +; ILP32-ILP32F-FPELIM-NEXT: add a0, a1, a3 ; ILP32-ILP32F-FPELIM-NEXT: sltu a1, a0, a1 +; ILP32-ILP32F-FPELIM-NEXT: add a1, a4, a1 ; ILP32-ILP32F-FPELIM-NEXT: add a1, a2, a1 ; ILP32-ILP32F-FPELIM-NEXT: addi sp, sp, 32 ; ILP32-ILP32F-FPELIM-NEXT: ret @@ -832,11 +832,11 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind { ; ILP32-ILP32F-WITHFP-NEXT: andi a0, a0, -8 ; ILP32-ILP32F-WITHFP-NEXT: addi a3, s0, 19 ; ILP32-ILP32F-WITHFP-NEXT: sw a3, -12(s0) -; ILP32-ILP32F-WITHFP-NEXT: lw a3, 4(a0) -; ILP32-ILP32F-WITHFP-NEXT: lw a0, 0(a0) -; ILP32-ILP32F-WITHFP-NEXT: add a2, a2, a3 -; ILP32-ILP32F-WITHFP-NEXT: add a0, a1, a0 +; ILP32-ILP32F-WITHFP-NEXT: lw a3, 0(a0) +; ILP32-ILP32F-WITHFP-NEXT: lw a4, 4(a0) +; ILP32-ILP32F-WITHFP-NEXT: add a0, a1, a3 ; ILP32-ILP32F-WITHFP-NEXT: sltu a1, a0, a1 +; ILP32-ILP32F-WITHFP-NEXT: add a1, a4, a1 ; ILP32-ILP32F-WITHFP-NEXT: add a1, a2, a1 ; ILP32-ILP32F-WITHFP-NEXT: lw ra, 20(sp) # 4-byte Folded Reload ; ILP32-ILP32F-WITHFP-NEXT: lw s0, 16(sp) # 4-byte Folded Reload @@ -855,11 +855,11 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind { ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: andi a0, a0, -8 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a3, sp, 27 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a3, 4(sp) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lw a3, 4(a0) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lw a0, 0(a0) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add a2, a2, a3 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add a0, a1, a0 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lw a3, 0(a0) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lw a4, 4(a0) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add a0, a1, a3 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sltu a1, a0, a1 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add a1, a4, a1 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add a1, a2, a1 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 32 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret @@ -951,7 +951,7 @@ define i64 @va3_va_arg(i32 %a, i64 %b, ...) nounwind { ; ILP32-ILP32F-FPELIM-NEXT: lw a4, 4(a0) ; ILP32-ILP32F-FPELIM-NEXT: add a0, a1, a3 ; ILP32-ILP32F-FPELIM-NEXT: sltu a1, a0, a1 -; ILP32-ILP32F-FPELIM-NEXT: add a2, a2, a4 +; ILP32-ILP32F-FPELIM-NEXT: add a1, a4, a1 ; ILP32-ILP32F-FPELIM-NEXT: add a1, a2, a1 ; ILP32-ILP32F-FPELIM-NEXT: addi sp, sp, 32 ; ILP32-ILP32F-FPELIM-NEXT: ret @@ -977,7 +977,7 @@ define i64 @va3_va_arg(i32 %a, i64 %b, ...) nounwind { ; ILP32-ILP32F-WITHFP-NEXT: lw a4, 4(a0) ; ILP32-ILP32F-WITHFP-NEXT: add a0, a1, a3 ; ILP32-ILP32F-WITHFP-NEXT: sltu a1, a0, a1 -; ILP32-ILP32F-WITHFP-NEXT: add a2, a2, a4 +; ILP32-ILP32F-WITHFP-NEXT: add a1, a4, a1 ; ILP32-ILP32F-WITHFP-NEXT: add a1, a2, a1 ; ILP32-ILP32F-WITHFP-NEXT: lw ra, 20(sp) # 4-byte Folded Reload ; ILP32-ILP32F-WITHFP-NEXT: lw s0, 16(sp) # 4-byte Folded Reload @@ -998,11 +998,11 @@ define i64 @va3_va_arg(i32 %a, i64 %b, ...) nounwind { ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a3, 20(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: fld ft0, 0(a0) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: fsd ft0, 8(sp) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lw a0, 12(sp) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lw a3, 8(sp) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add a2, a2, a0 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add a0, a1, a3 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lw a0, 8(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lw a3, 12(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add a0, a1, a0 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sltu a1, a0, a1 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add a1, a3, a1 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add a1, a2, a1 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret @@ -1164,8 +1164,8 @@ define i32 @va4_va_copy(i32 %argno, ...) nounwind { ; ILP32-ILP32F-FPELIM-NEXT: addi a3, a0, 4 ; ILP32-ILP32F-FPELIM-NEXT: sw a3, 4(sp) ; ILP32-ILP32F-FPELIM-NEXT: lw a0, 0(a0) -; ILP32-ILP32F-FPELIM-NEXT: add a1, a1, s0 -; ILP32-ILP32F-FPELIM-NEXT: add a1, a1, a2 +; ILP32-ILP32F-FPELIM-NEXT: add a2, s0, a2 +; ILP32-ILP32F-FPELIM-NEXT: add a0, a2, a0 ; ILP32-ILP32F-FPELIM-NEXT: add a0, a1, a0 ; ILP32-ILP32F-FPELIM-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; ILP32-ILP32F-FPELIM-NEXT: lw s0, 8(sp) # 4-byte Folded Reload @@ -1207,8 +1207,8 @@ define i32 @va4_va_copy(i32 %argno, ...) nounwind { ; ILP32-ILP32F-WITHFP-NEXT: addi a3, a0, 4 ; ILP32-ILP32F-WITHFP-NEXT: sw a3, -16(s0) ; ILP32-ILP32F-WITHFP-NEXT: lw a0, 0(a0) -; ILP32-ILP32F-WITHFP-NEXT: add a1, a1, s1 -; ILP32-ILP32F-WITHFP-NEXT: add a1, a1, a2 +; ILP32-ILP32F-WITHFP-NEXT: add a2, s1, a2 +; ILP32-ILP32F-WITHFP-NEXT: add a0, a2, a0 ; ILP32-ILP32F-WITHFP-NEXT: add a0, a1, a0 ; ILP32-ILP32F-WITHFP-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; ILP32-ILP32F-WITHFP-NEXT: lw s0, 24(sp) # 4-byte Folded Reload @@ -1249,8 +1249,8 @@ define i32 @va4_va_copy(i32 %argno, ...) nounwind { ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a3, a0, 4 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a3, 4(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lw a0, 0(a0) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add a1, a1, s0 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add a1, a1, a2 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add a2, s0, a2 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add a0, a2, a0 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add a0, a1, a0 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lw s0, 8(sp) # 4-byte Folded Reload @@ -1290,8 +1290,8 @@ define i32 @va4_va_copy(i32 %argno, ...) nounwind { ; LP64-LP64F-LP64D-FPELIM-NEXT: addi a3, a0, 8 ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 8(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: ld a0, 0(a0) -; LP64-LP64F-LP64D-FPELIM-NEXT: add a1, a1, s0 -; LP64-LP64F-LP64D-FPELIM-NEXT: add a1, a1, a2 +; LP64-LP64F-LP64D-FPELIM-NEXT: add a2, s0, a2 +; LP64-LP64F-LP64D-FPELIM-NEXT: add a0, a2, a0 ; LP64-LP64F-LP64D-FPELIM-NEXT: addw a0, a1, a0 ; LP64-LP64F-LP64D-FPELIM-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; LP64-LP64F-LP64D-FPELIM-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -1333,8 +1333,8 @@ define i32 @va4_va_copy(i32 %argno, ...) nounwind { ; LP64-LP64F-LP64D-WITHFP-NEXT: addi a3, a0, 8 ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, -32(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: ld a0, 0(a0) -; LP64-LP64F-LP64D-WITHFP-NEXT: add a1, a1, s1 -; LP64-LP64F-LP64D-WITHFP-NEXT: add a1, a1, a2 +; LP64-LP64F-LP64D-WITHFP-NEXT: add a2, s1, a2 +; LP64-LP64F-LP64D-WITHFP-NEXT: add a0, a2, a0 ; LP64-LP64F-LP64D-WITHFP-NEXT: addw a0, a1, a0 ; LP64-LP64F-LP64D-WITHFP-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 32(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/wide-scalar-shift-by-byte-multiple-legalization.ll b/llvm/test/CodeGen/RISCV/wide-scalar-shift-by-byte-multiple-legalization.ll index 075e6f9..3645aa5 100644 --- a/llvm/test/CodeGen/RISCV/wide-scalar-shift-by-byte-multiple-legalization.ll +++ b/llvm/test/CodeGen/RISCV/wide-scalar-shift-by-byte-multiple-legalization.ll @@ -14,7 +14,7 @@ define void @lshr_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: lbu a1, 0(a1) ; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, a5 +; RV64I-NEXT: or a3, a5, a3 ; RV64I-NEXT: or a0, a0, a3 ; RV64I-NEXT: slli a1, a1, 3 ; RV64I-NEXT: srlw a0, a0, a1 @@ -37,7 +37,7 @@ define void @lshr_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a3, a3, a4 ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, a5 +; RV32I-NEXT: or a3, a5, a3 ; RV32I-NEXT: or a0, a0, a3 ; RV32I-NEXT: lbu a3, 1(a1) ; RV32I-NEXT: lbu a4, 0(a1) @@ -47,7 +47,7 @@ define void @lshr_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a3, a3, a4 ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, a5 +; RV32I-NEXT: or a3, a5, a3 ; RV32I-NEXT: or a1, a1, a3 ; RV32I-NEXT: slli a1, a1, 3 ; RV32I-NEXT: srl a0, a0, a1 @@ -78,7 +78,7 @@ define void @shl_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: lbu a1, 0(a1) ; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, a5 +; RV64I-NEXT: or a3, a5, a3 ; RV64I-NEXT: or a0, a0, a3 ; RV64I-NEXT: slli a1, a1, 3 ; RV64I-NEXT: sllw a0, a0, a1 @@ -101,7 +101,7 @@ define void @shl_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a3, a3, a4 ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, a5 +; RV32I-NEXT: or a3, a5, a3 ; RV32I-NEXT: or a0, a0, a3 ; RV32I-NEXT: lbu a3, 1(a1) ; RV32I-NEXT: lbu a4, 0(a1) @@ -111,7 +111,7 @@ define void @shl_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a3, a3, a4 ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, a5 +; RV32I-NEXT: or a3, a5, a3 ; RV32I-NEXT: or a1, a1, a3 ; RV32I-NEXT: slli a1, a1, 3 ; RV32I-NEXT: sll a0, a0, a1 @@ -142,7 +142,7 @@ define void @ashr_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: lbu a1, 0(a1) ; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, a5 +; RV64I-NEXT: or a3, a5, a3 ; RV64I-NEXT: or a0, a0, a3 ; RV64I-NEXT: slli a1, a1, 3 ; RV64I-NEXT: sraw a0, a0, a1 @@ -165,7 +165,7 @@ define void @ashr_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a3, a3, a4 ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, a5 +; RV32I-NEXT: or a3, a5, a3 ; RV32I-NEXT: or a0, a0, a3 ; RV32I-NEXT: lbu a3, 1(a1) ; RV32I-NEXT: lbu a4, 0(a1) @@ -175,7 +175,7 @@ define void @ashr_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a3, a3, a4 ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, a5 +; RV32I-NEXT: or a3, a5, a3 ; RV32I-NEXT: or a1, a1, a3 ; RV32I-NEXT: slli a1, a1, 3 ; RV32I-NEXT: sra a0, a0, a1 @@ -201,25 +201,25 @@ define void @lshr_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: lbu a3, 1(a0) ; RV64I-NEXT: lbu a4, 0(a0) ; RV64I-NEXT: lbu a5, 2(a0) -; RV64I-NEXT: lbu a6, 3(a0) ; RV64I-NEXT: slli a3, a3, 8 ; RV64I-NEXT: or a3, a3, a4 ; RV64I-NEXT: slli a5, a5, 16 -; RV64I-NEXT: slli a6, a6, 24 -; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a3, a4, a3 ; RV64I-NEXT: lbu a4, 5(a0) +; RV64I-NEXT: lbu a6, 3(a0) +; RV64I-NEXT: or a3, a5, a3 ; RV64I-NEXT: lbu a5, 4(a0) -; RV64I-NEXT: lbu a6, 6(a0) -; RV64I-NEXT: lbu a0, 7(a0) ; RV64I-NEXT: slli a4, a4, 8 +; RV64I-NEXT: lbu a7, 6(a0) +; RV64I-NEXT: lbu a0, 7(a0) ; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a6, a6, 24 +; RV64I-NEXT: slli a7, a7, 16 ; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, a6 +; RV64I-NEXT: or a4, a7, a4 ; RV64I-NEXT: or a0, a0, a4 ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: or a0, a0, a6 ; RV64I-NEXT: lbu a3, 5(a1) ; RV64I-NEXT: lbu a4, 4(a1) ; RV64I-NEXT: lbu a5, 6(a1) @@ -228,8 +228,8 @@ define void @lshr_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: or a3, a3, a4 ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a6, a6, 24 -; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: or a3, a5, a3 +; RV64I-NEXT: or a3, a6, a3 ; RV64I-NEXT: lbu a4, 1(a1) ; RV64I-NEXT: lbu a5, 0(a1) ; RV64I-NEXT: lbu a6, 2(a1) @@ -238,7 +238,7 @@ define void @lshr_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: or a4, a4, a5 ; RV64I-NEXT: slli a6, a6, 16 ; RV64I-NEXT: slli a1, a1, 24 -; RV64I-NEXT: or a1, a1, a6 +; RV64I-NEXT: or a4, a6, a4 ; RV64I-NEXT: or a1, a1, a4 ; RV64I-NEXT: slli a1, a1, 3 ; RV64I-NEXT: slli a3, a3, 35 @@ -266,25 +266,25 @@ define void @lshr_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: lbu a3, 5(a0) ; RV32I-NEXT: lbu a4, 4(a0) ; RV32I-NEXT: lbu a5, 6(a0) -; RV32I-NEXT: lbu a6, 7(a0) ; RV32I-NEXT: slli a3, a3, 8 -; RV32I-NEXT: or a4, a3, a4 +; RV32I-NEXT: or a3, a3, a4 +; RV32I-NEXT: lbu a4, 1(a1) ; RV32I-NEXT: slli a5, a5, 16 -; RV32I-NEXT: slli a6, a6, 24 -; RV32I-NEXT: or a5, a6, a5 -; RV32I-NEXT: lbu a3, 1(a1) ; RV32I-NEXT: lbu a6, 0(a1) -; RV32I-NEXT: lbu a7, 2(a1) +; RV32I-NEXT: or a5, a5, a3 +; RV32I-NEXT: slli a4, a4, 8 +; RV32I-NEXT: lbu a3, 2(a1) +; RV32I-NEXT: or a4, a4, a6 +; RV32I-NEXT: lbu a6, 7(a0) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a3, a3, 8 -; RV32I-NEXT: or a3, a3, a6 -; RV32I-NEXT: slli a7, a7, 16 +; RV32I-NEXT: slli a3, a3, 16 +; RV32I-NEXT: or a3, a3, a4 +; RV32I-NEXT: slli a4, a6, 24 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, a7 ; RV32I-NEXT: or a1, a1, a3 ; RV32I-NEXT: slli a1, a1, 3 ; RV32I-NEXT: addi a3, a1, -32 -; RV32I-NEXT: or a4, a5, a4 +; RV32I-NEXT: or a4, a4, a5 ; RV32I-NEXT: bltz a3, .LBB3_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: srl a0, a4, a3 @@ -298,7 +298,7 @@ define void @lshr_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a5, a5, a6 ; RV32I-NEXT: slli a7, a7, 16 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, a7 +; RV32I-NEXT: or a5, a7, a5 ; RV32I-NEXT: or a0, a0, a5 ; RV32I-NEXT: srl a0, a0, a1 ; RV32I-NEXT: slli a5, a4, 1 @@ -338,25 +338,25 @@ define void @shl_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: lbu a3, 1(a0) ; RV64I-NEXT: lbu a4, 0(a0) ; RV64I-NEXT: lbu a5, 2(a0) -; RV64I-NEXT: lbu a6, 3(a0) ; RV64I-NEXT: slli a3, a3, 8 ; RV64I-NEXT: or a3, a3, a4 ; RV64I-NEXT: slli a5, a5, 16 -; RV64I-NEXT: slli a6, a6, 24 -; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a3, a4, a3 ; RV64I-NEXT: lbu a4, 5(a0) +; RV64I-NEXT: lbu a6, 3(a0) +; RV64I-NEXT: or a3, a5, a3 ; RV64I-NEXT: lbu a5, 4(a0) -; RV64I-NEXT: lbu a6, 6(a0) -; RV64I-NEXT: lbu a0, 7(a0) ; RV64I-NEXT: slli a4, a4, 8 +; RV64I-NEXT: lbu a7, 6(a0) +; RV64I-NEXT: lbu a0, 7(a0) ; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a6, a6, 24 +; RV64I-NEXT: slli a7, a7, 16 ; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, a6 +; RV64I-NEXT: or a4, a7, a4 ; RV64I-NEXT: or a0, a0, a4 ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: or a0, a0, a6 ; RV64I-NEXT: lbu a3, 5(a1) ; RV64I-NEXT: lbu a4, 4(a1) ; RV64I-NEXT: lbu a5, 6(a1) @@ -365,8 +365,8 @@ define void @shl_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: or a3, a3, a4 ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a6, a6, 24 -; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: or a3, a5, a3 +; RV64I-NEXT: or a3, a6, a3 ; RV64I-NEXT: lbu a4, 1(a1) ; RV64I-NEXT: lbu a5, 0(a1) ; RV64I-NEXT: lbu a6, 2(a1) @@ -375,7 +375,7 @@ define void @shl_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: or a4, a4, a5 ; RV64I-NEXT: slli a6, a6, 16 ; RV64I-NEXT: slli a1, a1, 24 -; RV64I-NEXT: or a1, a1, a6 +; RV64I-NEXT: or a4, a6, a4 ; RV64I-NEXT: or a1, a1, a4 ; RV64I-NEXT: slli a1, a1, 3 ; RV64I-NEXT: slli a3, a3, 35 @@ -403,25 +403,25 @@ define void @shl_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: lbu a3, 1(a0) ; RV32I-NEXT: lbu a4, 0(a0) ; RV32I-NEXT: lbu a5, 2(a0) -; RV32I-NEXT: lbu a6, 3(a0) ; RV32I-NEXT: slli a3, a3, 8 -; RV32I-NEXT: or a4, a3, a4 +; RV32I-NEXT: or a3, a3, a4 +; RV32I-NEXT: lbu a4, 1(a1) ; RV32I-NEXT: slli a5, a5, 16 -; RV32I-NEXT: slli a6, a6, 24 -; RV32I-NEXT: or a5, a6, a5 -; RV32I-NEXT: lbu a3, 1(a1) ; RV32I-NEXT: lbu a6, 0(a1) -; RV32I-NEXT: lbu a7, 2(a1) +; RV32I-NEXT: or a5, a5, a3 +; RV32I-NEXT: slli a4, a4, 8 +; RV32I-NEXT: lbu a3, 2(a1) +; RV32I-NEXT: or a4, a4, a6 +; RV32I-NEXT: lbu a6, 3(a0) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a3, a3, 8 -; RV32I-NEXT: or a3, a3, a6 -; RV32I-NEXT: slli a7, a7, 16 +; RV32I-NEXT: slli a3, a3, 16 +; RV32I-NEXT: or a3, a3, a4 +; RV32I-NEXT: slli a4, a6, 24 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, a7 ; RV32I-NEXT: or a1, a1, a3 ; RV32I-NEXT: slli a1, a1, 3 ; RV32I-NEXT: addi a3, a1, -32 -; RV32I-NEXT: or a4, a5, a4 +; RV32I-NEXT: or a4, a4, a5 ; RV32I-NEXT: bltz a3, .LBB4_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: sll a0, a4, a3 @@ -435,7 +435,7 @@ define void @shl_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a5, a5, a6 ; RV32I-NEXT: slli a7, a7, 16 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, a7 +; RV32I-NEXT: or a5, a7, a5 ; RV32I-NEXT: or a0, a0, a5 ; RV32I-NEXT: sll a0, a0, a1 ; RV32I-NEXT: srli a5, a4, 1 @@ -475,25 +475,25 @@ define void @ashr_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: lbu a3, 1(a0) ; RV64I-NEXT: lbu a4, 0(a0) ; RV64I-NEXT: lbu a5, 2(a0) -; RV64I-NEXT: lbu a6, 3(a0) ; RV64I-NEXT: slli a3, a3, 8 ; RV64I-NEXT: or a3, a3, a4 ; RV64I-NEXT: slli a5, a5, 16 -; RV64I-NEXT: slli a6, a6, 24 -; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a3, a4, a3 ; RV64I-NEXT: lbu a4, 5(a0) +; RV64I-NEXT: lbu a6, 3(a0) +; RV64I-NEXT: or a3, a5, a3 ; RV64I-NEXT: lbu a5, 4(a0) -; RV64I-NEXT: lbu a6, 6(a0) -; RV64I-NEXT: lbu a0, 7(a0) ; RV64I-NEXT: slli a4, a4, 8 +; RV64I-NEXT: lbu a7, 6(a0) +; RV64I-NEXT: lbu a0, 7(a0) ; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a6, a6, 24 +; RV64I-NEXT: slli a7, a7, 16 ; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, a6 +; RV64I-NEXT: or a4, a7, a4 ; RV64I-NEXT: or a0, a0, a4 ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: or a0, a0, a6 ; RV64I-NEXT: lbu a3, 5(a1) ; RV64I-NEXT: lbu a4, 4(a1) ; RV64I-NEXT: lbu a5, 6(a1) @@ -502,8 +502,8 @@ define void @ashr_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: or a3, a3, a4 ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a6, a6, 24 -; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: or a3, a5, a3 +; RV64I-NEXT: or a3, a6, a3 ; RV64I-NEXT: lbu a4, 1(a1) ; RV64I-NEXT: lbu a5, 0(a1) ; RV64I-NEXT: lbu a6, 2(a1) @@ -512,7 +512,7 @@ define void @ashr_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: or a4, a4, a5 ; RV64I-NEXT: slli a6, a6, 16 ; RV64I-NEXT: slli a1, a1, 24 -; RV64I-NEXT: or a1, a1, a6 +; RV64I-NEXT: or a4, a6, a4 ; RV64I-NEXT: or a1, a1, a4 ; RV64I-NEXT: slli a1, a1, 3 ; RV64I-NEXT: slli a3, a3, 35 @@ -539,29 +539,29 @@ define void @ashr_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I: # %bb.0: ; RV32I-NEXT: lbu a3, 5(a0) ; RV32I-NEXT: lbu a4, 4(a0) +; RV32I-NEXT: lbu a5, 6(a0) ; RV32I-NEXT: slli a3, a3, 8 -; RV32I-NEXT: lbu a5, 1(a1) -; RV32I-NEXT: or a6, a3, a4 -; RV32I-NEXT: lbu a3, 6(a0) -; RV32I-NEXT: lbu a4, 0(a1) -; RV32I-NEXT: slli a5, a5, 8 +; RV32I-NEXT: or a3, a3, a4 +; RV32I-NEXT: slli a5, a5, 16 +; RV32I-NEXT: lbu a4, 1(a1) +; RV32I-NEXT: lbu a6, 7(a0) +; RV32I-NEXT: or a3, a5, a3 +; RV32I-NEXT: lbu a5, 0(a1) +; RV32I-NEXT: slli a4, a4, 8 ; RV32I-NEXT: lbu a7, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: or a5, a5, a4 -; RV32I-NEXT: lbu a4, 7(a0) +; RV32I-NEXT: or a5, a4, a5 +; RV32I-NEXT: slli a4, a6, 24 ; RV32I-NEXT: slli a7, a7, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, a7 -; RV32I-NEXT: slli a3, a3, 16 -; RV32I-NEXT: slli a4, a4, 24 -; RV32I-NEXT: or a7, a4, a3 +; RV32I-NEXT: or a5, a7, a5 ; RV32I-NEXT: or a1, a1, a5 -; RV32I-NEXT: slli a3, a1, 3 -; RV32I-NEXT: addi a5, a3, -32 -; RV32I-NEXT: or a1, a7, a6 +; RV32I-NEXT: slli a1, a1, 3 +; RV32I-NEXT: addi a5, a1, -32 +; RV32I-NEXT: or a3, a4, a3 ; RV32I-NEXT: bltz a5, .LBB5_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sra a0, a1, a5 +; RV32I-NEXT: sra a0, a3, a5 ; RV32I-NEXT: srai a1, a4, 31 ; RV32I-NEXT: j .LBB5_3 ; RV32I-NEXT: .LBB5_2: @@ -573,14 +573,14 @@ define void @ashr_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a4, a4, a5 ; RV32I-NEXT: slli a6, a6, 16 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, a6 +; RV32I-NEXT: or a4, a6, a4 ; RV32I-NEXT: or a0, a0, a4 -; RV32I-NEXT: srl a0, a0, a3 -; RV32I-NEXT: slli a4, a1, 1 -; RV32I-NEXT: xori a5, a3, 31 +; RV32I-NEXT: srl a0, a0, a1 +; RV32I-NEXT: slli a4, a3, 1 +; RV32I-NEXT: xori a5, a1, 31 ; RV32I-NEXT: sll a4, a4, a5 ; RV32I-NEXT: or a0, a0, a4 -; RV32I-NEXT: sra a1, a1, a3 +; RV32I-NEXT: sra a1, a3, a1 ; RV32I-NEXT: .LBB5_3: ; RV32I-NEXT: sb a1, 4(a2) ; RV32I-NEXT: srli a3, a1, 16 @@ -611,49 +611,49 @@ define void @lshr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: lbu a3, 9(a0) ; RV64I-NEXT: lbu a4, 8(a0) ; RV64I-NEXT: lbu a5, 10(a0) -; RV64I-NEXT: lbu a6, 11(a0) ; RV64I-NEXT: slli a3, a3, 8 ; RV64I-NEXT: or a3, a3, a4 ; RV64I-NEXT: slli a5, a5, 16 -; RV64I-NEXT: slli a6, a6, 24 -; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a4, a4, a3 -; RV64I-NEXT: lbu a3, 13(a0) -; RV64I-NEXT: lbu a5, 12(a0) -; RV64I-NEXT: lbu a6, 14(a0) -; RV64I-NEXT: lbu a7, 15(a0) -; RV64I-NEXT: slli a3, a3, 8 -; RV64I-NEXT: or a3, a3, a5 -; RV64I-NEXT: slli a6, a6, 16 -; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a5, a7, a6 +; RV64I-NEXT: lbu a4, 11(a0) ; RV64I-NEXT: or a3, a5, a3 -; RV64I-NEXT: lbu a5, 5(a1) +; RV64I-NEXT: lbu a5, 13(a0) +; RV64I-NEXT: lbu a6, 12(a0) +; RV64I-NEXT: lbu a7, 14(a0) +; RV64I-NEXT: lbu t0, 15(a0) +; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: or a5, a5, a6 +; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli t0, t0, 24 +; RV64I-NEXT: or a5, a7, a5 +; RV64I-NEXT: or a5, t0, a5 +; RV64I-NEXT: slli a5, a5, 32 +; RV64I-NEXT: or a5, a5, a3 +; RV64I-NEXT: lbu a3, 5(a1) ; RV64I-NEXT: lbu a6, 4(a1) ; RV64I-NEXT: lbu a7, 6(a1) ; RV64I-NEXT: lbu t0, 7(a1) -; RV64I-NEXT: slli a5, a5, 8 -; RV64I-NEXT: or a5, a5, a6 +; RV64I-NEXT: slli a3, a3, 8 +; RV64I-NEXT: or a3, a3, a6 ; RV64I-NEXT: slli a7, a7, 16 ; RV64I-NEXT: slli t0, t0, 24 -; RV64I-NEXT: or a6, t0, a7 -; RV64I-NEXT: or a5, a6, a5 ; RV64I-NEXT: lbu a6, 1(a1) +; RV64I-NEXT: or a3, a7, a3 +; RV64I-NEXT: or a3, t0, a3 ; RV64I-NEXT: lbu a7, 0(a1) +; RV64I-NEXT: slli a6, a6, 8 ; RV64I-NEXT: lbu t0, 2(a1) ; RV64I-NEXT: lbu a1, 3(a1) -; RV64I-NEXT: slli a6, a6, 8 ; RV64I-NEXT: or a6, a6, a7 +; RV64I-NEXT: slli a4, a4, 24 ; RV64I-NEXT: slli t0, t0, 16 ; RV64I-NEXT: slli a1, a1, 24 -; RV64I-NEXT: or a1, a1, t0 -; RV64I-NEXT: slli a7, a3, 32 +; RV64I-NEXT: or a6, t0, a6 ; RV64I-NEXT: or a1, a1, a6 ; RV64I-NEXT: slli a1, a1, 3 -; RV64I-NEXT: slli a5, a5, 35 -; RV64I-NEXT: or a1, a5, a1 +; RV64I-NEXT: slli a3, a3, 35 +; RV64I-NEXT: or a1, a3, a1 ; RV64I-NEXT: addi a3, a1, -64 -; RV64I-NEXT: or a4, a7, a4 +; RV64I-NEXT: or a4, a5, a4 ; RV64I-NEXT: bltz a3, .LBB6_2 ; RV64I-NEXT: # %bb.1: ; RV64I-NEXT: srl a0, a4, a3 @@ -662,25 +662,25 @@ define void @lshr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: lbu a5, 1(a0) ; RV64I-NEXT: lbu a6, 0(a0) ; RV64I-NEXT: lbu a7, 2(a0) -; RV64I-NEXT: lbu t0, 3(a0) ; RV64I-NEXT: slli a5, a5, 8 ; RV64I-NEXT: or a5, a5, a6 ; RV64I-NEXT: slli a7, a7, 16 -; RV64I-NEXT: slli t0, t0, 24 -; RV64I-NEXT: or a6, t0, a7 -; RV64I-NEXT: or a5, a6, a5 ; RV64I-NEXT: lbu a6, 5(a0) +; RV64I-NEXT: lbu t0, 3(a0) +; RV64I-NEXT: or a5, a7, a5 ; RV64I-NEXT: lbu a7, 4(a0) -; RV64I-NEXT: lbu t0, 6(a0) -; RV64I-NEXT: lbu a0, 7(a0) ; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: lbu t1, 6(a0) +; RV64I-NEXT: lbu a0, 7(a0) ; RV64I-NEXT: or a6, a6, a7 -; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t0, t0, 24 +; RV64I-NEXT: slli t1, t1, 16 ; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, t0 +; RV64I-NEXT: or a6, t1, a6 ; RV64I-NEXT: or a0, a0, a6 ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: or a0, a0, a5 +; RV64I-NEXT: or a0, a0, t0 ; RV64I-NEXT: srl a0, a0, a1 ; RV64I-NEXT: xori a5, a1, 63 ; RV64I-NEXT: slli a6, a4, 1 @@ -754,8 +754,8 @@ define void @lshr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a3, a3, a4 ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli a6, a6, 24 -; RV32I-NEXT: or a4, a6, a5 -; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: or a3, a5, a3 +; RV32I-NEXT: or a3, a6, a3 ; RV32I-NEXT: lbu a4, 9(a0) ; RV32I-NEXT: lbu a5, 8(a0) ; RV32I-NEXT: lbu a6, 10(a0) @@ -764,8 +764,8 @@ define void @lshr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a4, a4, a5 ; RV32I-NEXT: slli a6, a6, 16 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a5, a0, a6 -; RV32I-NEXT: or a5, a5, a4 +; RV32I-NEXT: or a5, a6, a4 +; RV32I-NEXT: or a5, a0, a5 ; RV32I-NEXT: lbu a0, 1(a1) ; RV32I-NEXT: lbu a4, 0(a1) ; RV32I-NEXT: lbu a6, 2(a1) @@ -774,7 +774,7 @@ define void @lshr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a0, a0, a4 ; RV32I-NEXT: slli a6, a6, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, a6 +; RV32I-NEXT: or a0, a6, a0 ; RV32I-NEXT: or a0, a1, a0 ; RV32I-NEXT: slli a4, a0, 3 ; RV32I-NEXT: addi t1, a4, -64 @@ -910,49 +910,49 @@ define void @shl_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: lbu a3, 1(a0) ; RV64I-NEXT: lbu a4, 0(a0) ; RV64I-NEXT: lbu a5, 2(a0) -; RV64I-NEXT: lbu a6, 3(a0) ; RV64I-NEXT: slli a3, a3, 8 ; RV64I-NEXT: or a3, a3, a4 ; RV64I-NEXT: slli a5, a5, 16 -; RV64I-NEXT: slli a6, a6, 24 -; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a4, a4, a3 -; RV64I-NEXT: lbu a3, 5(a0) -; RV64I-NEXT: lbu a5, 4(a0) -; RV64I-NEXT: lbu a6, 6(a0) -; RV64I-NEXT: lbu a7, 7(a0) -; RV64I-NEXT: slli a3, a3, 8 -; RV64I-NEXT: or a3, a3, a5 -; RV64I-NEXT: slli a6, a6, 16 -; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a5, a7, a6 +; RV64I-NEXT: lbu a4, 3(a0) ; RV64I-NEXT: or a3, a5, a3 -; RV64I-NEXT: lbu a5, 5(a1) +; RV64I-NEXT: lbu a5, 5(a0) +; RV64I-NEXT: lbu a6, 4(a0) +; RV64I-NEXT: lbu a7, 6(a0) +; RV64I-NEXT: lbu t0, 7(a0) +; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: or a5, a5, a6 +; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli t0, t0, 24 +; RV64I-NEXT: or a5, a7, a5 +; RV64I-NEXT: or a5, t0, a5 +; RV64I-NEXT: slli a5, a5, 32 +; RV64I-NEXT: or a5, a5, a3 +; RV64I-NEXT: lbu a3, 5(a1) ; RV64I-NEXT: lbu a6, 4(a1) ; RV64I-NEXT: lbu a7, 6(a1) ; RV64I-NEXT: lbu t0, 7(a1) -; RV64I-NEXT: slli a5, a5, 8 -; RV64I-NEXT: or a5, a5, a6 +; RV64I-NEXT: slli a3, a3, 8 +; RV64I-NEXT: or a3, a3, a6 ; RV64I-NEXT: slli a7, a7, 16 ; RV64I-NEXT: slli t0, t0, 24 -; RV64I-NEXT: or a6, t0, a7 -; RV64I-NEXT: or a5, a6, a5 ; RV64I-NEXT: lbu a6, 1(a1) +; RV64I-NEXT: or a3, a7, a3 +; RV64I-NEXT: or a3, t0, a3 ; RV64I-NEXT: lbu a7, 0(a1) +; RV64I-NEXT: slli a6, a6, 8 ; RV64I-NEXT: lbu t0, 2(a1) ; RV64I-NEXT: lbu a1, 3(a1) -; RV64I-NEXT: slli a6, a6, 8 ; RV64I-NEXT: or a6, a6, a7 +; RV64I-NEXT: slli a4, a4, 24 ; RV64I-NEXT: slli t0, t0, 16 ; RV64I-NEXT: slli a1, a1, 24 -; RV64I-NEXT: or a1, a1, t0 -; RV64I-NEXT: slli a7, a3, 32 +; RV64I-NEXT: or a6, t0, a6 ; RV64I-NEXT: or a1, a1, a6 ; RV64I-NEXT: slli a1, a1, 3 -; RV64I-NEXT: slli a5, a5, 35 -; RV64I-NEXT: or a1, a5, a1 +; RV64I-NEXT: slli a3, a3, 35 +; RV64I-NEXT: or a1, a3, a1 ; RV64I-NEXT: addi a3, a1, -64 -; RV64I-NEXT: or a4, a7, a4 +; RV64I-NEXT: or a4, a5, a4 ; RV64I-NEXT: bltz a3, .LBB7_2 ; RV64I-NEXT: # %bb.1: ; RV64I-NEXT: sll a0, a4, a3 @@ -961,25 +961,25 @@ define void @shl_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: lbu a5, 9(a0) ; RV64I-NEXT: lbu a6, 8(a0) ; RV64I-NEXT: lbu a7, 10(a0) -; RV64I-NEXT: lbu t0, 11(a0) ; RV64I-NEXT: slli a5, a5, 8 ; RV64I-NEXT: or a5, a5, a6 ; RV64I-NEXT: slli a7, a7, 16 -; RV64I-NEXT: slli t0, t0, 24 -; RV64I-NEXT: or a6, t0, a7 -; RV64I-NEXT: or a5, a6, a5 ; RV64I-NEXT: lbu a6, 13(a0) +; RV64I-NEXT: lbu t0, 11(a0) +; RV64I-NEXT: or a5, a7, a5 ; RV64I-NEXT: lbu a7, 12(a0) -; RV64I-NEXT: lbu t0, 14(a0) -; RV64I-NEXT: lbu a0, 15(a0) ; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: lbu t1, 14(a0) +; RV64I-NEXT: lbu a0, 15(a0) ; RV64I-NEXT: or a6, a6, a7 -; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t0, t0, 24 +; RV64I-NEXT: slli t1, t1, 16 ; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, t0 +; RV64I-NEXT: or a6, t1, a6 ; RV64I-NEXT: or a0, a0, a6 ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: or a0, a0, a5 +; RV64I-NEXT: or a0, a0, t0 ; RV64I-NEXT: sll a0, a0, a1 ; RV64I-NEXT: xori a5, a1, 63 ; RV64I-NEXT: srli a6, a4, 1 @@ -1053,8 +1053,8 @@ define void @shl_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a3, a3, a4 ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli a6, a6, 24 -; RV32I-NEXT: or a4, a6, a5 -; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: or a3, a5, a3 +; RV32I-NEXT: or a3, a6, a3 ; RV32I-NEXT: lbu a4, 5(a0) ; RV32I-NEXT: lbu a5, 4(a0) ; RV32I-NEXT: lbu a6, 6(a0) @@ -1063,8 +1063,8 @@ define void @shl_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a4, a4, a5 ; RV32I-NEXT: slli a6, a6, 16 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a5, a0, a6 -; RV32I-NEXT: or a5, a5, a4 +; RV32I-NEXT: or a5, a6, a4 +; RV32I-NEXT: or a5, a0, a5 ; RV32I-NEXT: lbu a0, 1(a1) ; RV32I-NEXT: lbu a4, 0(a1) ; RV32I-NEXT: lbu a6, 2(a1) @@ -1073,7 +1073,7 @@ define void @shl_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a0, a0, a4 ; RV32I-NEXT: slli a6, a6, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, a6 +; RV32I-NEXT: or a0, a6, a0 ; RV32I-NEXT: or a0, a1, a0 ; RV32I-NEXT: slli a4, a0, 3 ; RV32I-NEXT: addi t1, a4, -64 @@ -1209,23 +1209,22 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: lbu a3, 9(a0) ; RV64I-NEXT: lbu a4, 8(a0) ; RV64I-NEXT: lbu a5, 10(a0) -; RV64I-NEXT: lbu a6, 11(a0) ; RV64I-NEXT: slli a3, a3, 8 ; RV64I-NEXT: or a3, a3, a4 ; RV64I-NEXT: slli a5, a5, 16 -; RV64I-NEXT: slli a6, a6, 24 -; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 13(a0) -; RV64I-NEXT: lbu a5, 12(a0) +; RV64I-NEXT: or a5, a5, a3 +; RV64I-NEXT: lbu a3, 13(a0) +; RV64I-NEXT: lbu a4, 12(a0) ; RV64I-NEXT: lbu a6, 14(a0) ; RV64I-NEXT: lbu a7, 15(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, a5 +; RV64I-NEXT: slli a3, a3, 8 +; RV64I-NEXT: or a3, a3, a4 ; RV64I-NEXT: slli a6, a6, 16 ; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a5, a7, a6 -; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: or a3, a6, a3 +; RV64I-NEXT: or a3, a7, a3 +; RV64I-NEXT: slli a4, a3, 32 +; RV64I-NEXT: or a4, a4, a5 ; RV64I-NEXT: lbu a5, 5(a1) ; RV64I-NEXT: lbu a6, 4(a1) ; RV64I-NEXT: lbu a7, 6(a1) @@ -1234,58 +1233,59 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: or a5, a5, a6 ; RV64I-NEXT: slli a7, a7, 16 ; RV64I-NEXT: slli t0, t0, 24 -; RV64I-NEXT: or a6, t0, a7 -; RV64I-NEXT: or a5, a6, a5 ; RV64I-NEXT: lbu a6, 1(a1) +; RV64I-NEXT: or a5, a7, a5 ; RV64I-NEXT: lbu a7, 0(a1) -; RV64I-NEXT: lbu t0, 2(a1) -; RV64I-NEXT: lbu a1, 3(a1) +; RV64I-NEXT: or a5, t0, a5 ; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: lbu t0, 2(a1) ; RV64I-NEXT: or a6, a6, a7 +; RV64I-NEXT: lbu a7, 11(a0) +; RV64I-NEXT: lbu a1, 3(a1) ; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: or a6, t0, a6 +; RV64I-NEXT: slli a7, a7, 24 ; RV64I-NEXT: slli a1, a1, 24 -; RV64I-NEXT: or a1, a1, t0 -; RV64I-NEXT: slli a7, a4, 32 ; RV64I-NEXT: or a1, a1, a6 ; RV64I-NEXT: slli a1, a1, 3 ; RV64I-NEXT: slli a5, a5, 35 ; RV64I-NEXT: or a1, a5, a1 ; RV64I-NEXT: addi a5, a1, -64 -; RV64I-NEXT: or a3, a7, a3 +; RV64I-NEXT: or a4, a4, a7 ; RV64I-NEXT: bltz a5, .LBB8_2 ; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: sra a0, a3, a5 -; RV64I-NEXT: sraiw a1, a4, 31 +; RV64I-NEXT: sra a0, a4, a5 +; RV64I-NEXT: sraiw a1, a3, 31 ; RV64I-NEXT: j .LBB8_3 ; RV64I-NEXT: .LBB8_2: -; RV64I-NEXT: lbu a4, 1(a0) +; RV64I-NEXT: lbu a3, 1(a0) ; RV64I-NEXT: lbu a5, 0(a0) ; RV64I-NEXT: lbu a6, 2(a0) -; RV64I-NEXT: lbu a7, 3(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, a5 +; RV64I-NEXT: slli a3, a3, 8 +; RV64I-NEXT: or a3, a3, a5 ; RV64I-NEXT: slli a6, a6, 16 -; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a5, a7, a6 -; RV64I-NEXT: or a4, a5, a4 ; RV64I-NEXT: lbu a5, 5(a0) +; RV64I-NEXT: lbu a7, 3(a0) +; RV64I-NEXT: or a3, a6, a3 ; RV64I-NEXT: lbu a6, 4(a0) -; RV64I-NEXT: lbu a7, 6(a0) -; RV64I-NEXT: lbu a0, 7(a0) ; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: lbu t0, 6(a0) +; RV64I-NEXT: lbu a0, 7(a0) ; RV64I-NEXT: or a5, a5, a6 -; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli a7, a7, 24 +; RV64I-NEXT: slli t0, t0, 16 ; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, a7 +; RV64I-NEXT: or a5, t0, a5 ; RV64I-NEXT: or a0, a0, a5 ; RV64I-NEXT: slli a0, a0, 32 -; RV64I-NEXT: or a0, a0, a4 +; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: or a0, a0, a7 ; RV64I-NEXT: srl a0, a0, a1 -; RV64I-NEXT: xori a4, a1, 63 -; RV64I-NEXT: slli a5, a3, 1 -; RV64I-NEXT: sll a4, a5, a4 -; RV64I-NEXT: or a0, a0, a4 -; RV64I-NEXT: sra a1, a3, a1 +; RV64I-NEXT: xori a3, a1, 63 +; RV64I-NEXT: slli a5, a4, 1 +; RV64I-NEXT: sll a3, a5, a3 +; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: sra a1, a4, a1 ; RV64I-NEXT: .LBB8_3: ; RV64I-NEXT: sb a1, 8(a2) ; RV64I-NEXT: srli a3, a1, 56 @@ -1351,8 +1351,8 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a3, a3, a4 ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli t0, t0, 24 -; RV32I-NEXT: or a4, t0, a5 -; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: or a3, a5, a3 +; RV32I-NEXT: or a3, t0, a3 ; RV32I-NEXT: lbu a4, 9(a0) ; RV32I-NEXT: lbu a5, 8(a0) ; RV32I-NEXT: lbu a6, 10(a0) @@ -1361,8 +1361,8 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a4, a4, a5 ; RV32I-NEXT: slli a6, a6, 16 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a5, a0, a6 -; RV32I-NEXT: or a5, a5, a4 +; RV32I-NEXT: or a5, a6, a4 +; RV32I-NEXT: or a5, a0, a5 ; RV32I-NEXT: lbu a0, 1(a1) ; RV32I-NEXT: lbu a4, 0(a1) ; RV32I-NEXT: lbu a6, 2(a1) @@ -1371,7 +1371,7 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a0, a0, a4 ; RV32I-NEXT: slli a6, a6, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, a6 +; RV32I-NEXT: or a0, a6, a0 ; RV32I-NEXT: or a0, a1, a0 ; RV32I-NEXT: slli a4, a0, 3 ; RV32I-NEXT: addi t3, a4, -64 @@ -1530,8 +1530,8 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: or a3, a3, a4 ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a6, a6, 24 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: or a5, a5, a3 +; RV64I-NEXT: or a3, a5, a3 +; RV64I-NEXT: or a5, a6, a3 ; RV64I-NEXT: lbu a3, 13(a0) ; RV64I-NEXT: lbu a4, 12(a0) ; RV64I-NEXT: lbu a6, 14(a0) @@ -1540,112 +1540,112 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: or a3, a3, a4 ; RV64I-NEXT: slli a6, a6, 16 ; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a4, a7, a6 -; RV64I-NEXT: or a6, a4, a3 +; RV64I-NEXT: or a3, a6, a3 +; RV64I-NEXT: or t3, a7, a3 ; RV64I-NEXT: lbu a3, 1(a0) ; RV64I-NEXT: lbu a4, 0(a0) -; RV64I-NEXT: lbu a7, 2(a0) -; RV64I-NEXT: lbu t0, 3(a0) +; RV64I-NEXT: lbu a6, 2(a0) +; RV64I-NEXT: lbu a7, 3(a0) ; RV64I-NEXT: slli a3, a3, 8 ; RV64I-NEXT: or a3, a3, a4 -; RV64I-NEXT: slli a7, a7, 16 -; RV64I-NEXT: slli t0, t0, 24 -; RV64I-NEXT: or a4, t0, a7 -; RV64I-NEXT: or a7, a4, a3 +; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a7, a7, 24 +; RV64I-NEXT: or a3, a6, a3 +; RV64I-NEXT: or a7, a7, a3 ; RV64I-NEXT: lbu a3, 5(a0) ; RV64I-NEXT: lbu a4, 4(a0) -; RV64I-NEXT: lbu t0, 6(a0) -; RV64I-NEXT: lbu t1, 7(a0) +; RV64I-NEXT: lbu a6, 6(a0) +; RV64I-NEXT: lbu t0, 7(a0) ; RV64I-NEXT: slli a3, a3, 8 ; RV64I-NEXT: or a3, a3, a4 -; RV64I-NEXT: slli t0, t0, 16 -; RV64I-NEXT: slli t1, t1, 24 -; RV64I-NEXT: or a4, t1, t0 -; RV64I-NEXT: or t0, a4, a3 -; RV64I-NEXT: lbu a3, 25(a0) -; RV64I-NEXT: lbu a4, 24(a0) +; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli t0, t0, 24 +; RV64I-NEXT: or a3, a6, a3 +; RV64I-NEXT: lbu a4, 25(a0) +; RV64I-NEXT: lbu a6, 24(a0) ; RV64I-NEXT: lbu t1, 26(a0) -; RV64I-NEXT: lbu t2, 27(a0) -; RV64I-NEXT: slli a3, a3, 8 -; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: or t4, t0, a3 +; RV64I-NEXT: slli a4, a4, 8 +; RV64I-NEXT: or a3, a4, a6 ; RV64I-NEXT: slli t1, t1, 16 -; RV64I-NEXT: slli t2, t2, 24 -; RV64I-NEXT: or a4, t2, t1 -; RV64I-NEXT: or a3, a4, a3 ; RV64I-NEXT: lbu a4, 29(a0) -; RV64I-NEXT: lbu t1, 28(a0) -; RV64I-NEXT: lbu t2, 30(a0) -; RV64I-NEXT: lbu t3, 31(a0) +; RV64I-NEXT: lbu a6, 28(a0) +; RV64I-NEXT: lbu t0, 27(a0) +; RV64I-NEXT: or a3, t1, a3 ; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, t1 -; RV64I-NEXT: slli t2, t2, 16 -; RV64I-NEXT: slli t3, t3, 24 -; RV64I-NEXT: or t1, t3, t2 +; RV64I-NEXT: or a4, a4, a6 +; RV64I-NEXT: lbu a6, 30(a0) +; RV64I-NEXT: lbu t1, 31(a0) +; RV64I-NEXT: slli t3, t3, 32 +; RV64I-NEXT: slli t0, t0, 24 +; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli t1, t1, 24 +; RV64I-NEXT: or a4, a6, a4 ; RV64I-NEXT: or a4, t1, a4 ; RV64I-NEXT: slli a4, a4, 32 ; RV64I-NEXT: or a3, a4, a3 ; RV64I-NEXT: lbu a4, 17(a0) -; RV64I-NEXT: lbu t1, 16(a0) -; RV64I-NEXT: lbu t2, 18(a0) -; RV64I-NEXT: lbu t3, 19(a0) +; RV64I-NEXT: lbu a6, 16(a0) +; RV64I-NEXT: lbu t1, 18(a0) +; RV64I-NEXT: or a3, a3, t0 ; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, t1 -; RV64I-NEXT: slli t2, t2, 16 -; RV64I-NEXT: slli t3, t3, 24 -; RV64I-NEXT: or t1, t3, t2 +; RV64I-NEXT: or a4, a4, a6 +; RV64I-NEXT: slli t1, t1, 16 +; RV64I-NEXT: lbu a6, 21(a0) +; RV64I-NEXT: lbu t0, 20(a0) ; RV64I-NEXT: or a4, t1, a4 -; RV64I-NEXT: lbu t1, 21(a0) -; RV64I-NEXT: lbu t2, 20(a0) -; RV64I-NEXT: lbu t3, 22(a0) +; RV64I-NEXT: lbu t1, 19(a0) +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a6, a6, t0 +; RV64I-NEXT: lbu t0, 22(a0) ; RV64I-NEXT: lbu a0, 23(a0) -; RV64I-NEXT: slli t1, t1, 8 -; RV64I-NEXT: or t1, t1, t2 -; RV64I-NEXT: slli t3, t3, 16 +; RV64I-NEXT: slli t4, t4, 32 +; RV64I-NEXT: slli t1, t1, 24 +; RV64I-NEXT: slli t0, t0, 16 ; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, t3 -; RV64I-NEXT: or a0, a0, t1 +; RV64I-NEXT: or a6, t0, a6 +; RV64I-NEXT: or a0, a0, a6 ; RV64I-NEXT: slli a0, a0, 32 -; RV64I-NEXT: or a4, a0, a4 +; RV64I-NEXT: or a0, a0, a4 +; RV64I-NEXT: or a4, a0, t1 ; RV64I-NEXT: lbu a0, 5(a1) -; RV64I-NEXT: lbu t1, 4(a1) -; RV64I-NEXT: lbu t2, 6(a1) -; RV64I-NEXT: lbu t3, 7(a1) +; RV64I-NEXT: lbu a6, 4(a1) +; RV64I-NEXT: lbu t0, 6(a1) +; RV64I-NEXT: lbu t1, 7(a1) ; RV64I-NEXT: slli a0, a0, 8 -; RV64I-NEXT: or a0, a0, t1 -; RV64I-NEXT: slli t2, t2, 16 -; RV64I-NEXT: slli t3, t3, 24 -; RV64I-NEXT: lbu t1, 1(a1) -; RV64I-NEXT: lbu t4, 0(a1) -; RV64I-NEXT: or t2, t3, t2 -; RV64I-NEXT: or t2, t2, a0 -; RV64I-NEXT: slli t1, t1, 8 -; RV64I-NEXT: or t1, t1, t4 -; RV64I-NEXT: lbu t3, 2(a1) -; RV64I-NEXT: lbu t4, 3(a1) -; RV64I-NEXT: slli a0, a6, 32 -; RV64I-NEXT: slli a1, t0, 32 -; RV64I-NEXT: slli t3, t3, 16 -; RV64I-NEXT: slli t4, t4, 24 -; RV64I-NEXT: or a6, t4, t3 -; RV64I-NEXT: or a6, a6, t1 -; RV64I-NEXT: slli a6, a6, 3 -; RV64I-NEXT: slli t2, t2, 35 -; RV64I-NEXT: or a6, t2, a6 +; RV64I-NEXT: or a0, a0, a6 +; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t1, t1, 24 +; RV64I-NEXT: or a0, t0, a0 +; RV64I-NEXT: or a0, t1, a0 +; RV64I-NEXT: lbu a6, 1(a1) +; RV64I-NEXT: lbu t0, 0(a1) +; RV64I-NEXT: lbu t1, 2(a1) +; RV64I-NEXT: lbu a1, 3(a1) +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a6, a6, t0 +; RV64I-NEXT: slli t1, t1, 16 +; RV64I-NEXT: slli a1, a1, 24 +; RV64I-NEXT: or a6, t1, a6 +; RV64I-NEXT: or a1, a1, a6 +; RV64I-NEXT: slli a1, a1, 3 +; RV64I-NEXT: slli a0, a0, 35 +; RV64I-NEXT: or a6, a0, a1 ; RV64I-NEXT: addiw t1, a6, -128 ; RV64I-NEXT: addi t2, a6, -192 ; RV64I-NEXT: slli t0, a3, 1 ; RV64I-NEXT: bltz t2, .LBB9_2 ; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: srl t3, a3, t2 +; RV64I-NEXT: srl t5, a3, t2 ; RV64I-NEXT: j .LBB9_3 ; RV64I-NEXT: .LBB9_2: -; RV64I-NEXT: srl t3, a4, t1 -; RV64I-NEXT: xori t4, t1, 63 -; RV64I-NEXT: sll t4, t0, t4 -; RV64I-NEXT: or t3, t3, t4 +; RV64I-NEXT: srl a0, a4, t1 +; RV64I-NEXT: xori a1, t1, 63 +; RV64I-NEXT: sll a1, t0, a1 +; RV64I-NEXT: or t5, a0, a1 ; RV64I-NEXT: .LBB9_3: -; RV64I-NEXT: or a0, a0, a5 -; RV64I-NEXT: or a1, a1, a7 +; RV64I-NEXT: or a0, t3, a5 +; RV64I-NEXT: or a1, t4, a7 ; RV64I-NEXT: addi a7, a6, -64 ; RV64I-NEXT: xori a5, a6, 63 ; RV64I-NEXT: bltz a7, .LBB9_5 @@ -1653,32 +1653,32 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: srl s1, a0, a7 ; RV64I-NEXT: j .LBB9_6 ; RV64I-NEXT: .LBB9_5: -; RV64I-NEXT: srl t4, a1, a6 -; RV64I-NEXT: slli t5, a0, 1 -; RV64I-NEXT: sll t5, t5, a5 -; RV64I-NEXT: or s1, t4, t5 +; RV64I-NEXT: srl t3, a1, a6 +; RV64I-NEXT: slli t4, a0, 1 +; RV64I-NEXT: sll t4, t4, a5 +; RV64I-NEXT: or s1, t3, t4 ; RV64I-NEXT: .LBB9_6: -; RV64I-NEXT: negw t6, a6 -; RV64I-NEXT: sll t4, a4, t6 +; RV64I-NEXT: negw t3, a6 +; RV64I-NEXT: sll t4, a4, t3 ; RV64I-NEXT: li s0, 64 -; RV64I-NEXT: li t5, 128 +; RV64I-NEXT: li t6, 128 ; RV64I-NEXT: sub s0, s0, a6 -; RV64I-NEXT: bltu a6, t5, .LBB9_12 +; RV64I-NEXT: bltu a6, t6, .LBB9_12 ; RV64I-NEXT: # %bb.7: ; RV64I-NEXT: bnez a6, .LBB9_13 ; RV64I-NEXT: .LBB9_8: ; RV64I-NEXT: bgez s0, .LBB9_10 ; RV64I-NEXT: .LBB9_9: -; RV64I-NEXT: sll t3, a3, t6 +; RV64I-NEXT: sll t3, a3, t3 ; RV64I-NEXT: srli t4, a4, 1 -; RV64I-NEXT: subw t6, t5, a6 -; RV64I-NEXT: xori t6, t6, 63 -; RV64I-NEXT: srl t4, t4, t6 +; RV64I-NEXT: subw t5, t6, a6 +; RV64I-NEXT: xori t5, t5, 63 +; RV64I-NEXT: srl t4, t4, t5 ; RV64I-NEXT: or t4, t3, t4 ; RV64I-NEXT: .LBB9_10: ; RV64I-NEXT: slti t3, a7, 0 ; RV64I-NEXT: neg t3, t3 -; RV64I-NEXT: bltu a6, t5, .LBB9_14 +; RV64I-NEXT: bltu a6, t6, .LBB9_14 ; RV64I-NEXT: # %bb.11: ; RV64I-NEXT: srl t1, a3, t1 ; RV64I-NEXT: slti t2, t2, 0 @@ -1687,13 +1687,13 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: bnez a6, .LBB9_15 ; RV64I-NEXT: j .LBB9_16 ; RV64I-NEXT: .LBB9_12: -; RV64I-NEXT: slti t3, s0, 0 -; RV64I-NEXT: neg t3, t3 -; RV64I-NEXT: and t3, t3, t4 -; RV64I-NEXT: or t3, s1, t3 +; RV64I-NEXT: slti t5, s0, 0 +; RV64I-NEXT: neg t5, t5 +; RV64I-NEXT: and t5, t5, t4 +; RV64I-NEXT: or t5, s1, t5 ; RV64I-NEXT: beqz a6, .LBB9_8 ; RV64I-NEXT: .LBB9_13: -; RV64I-NEXT: mv a1, t3 +; RV64I-NEXT: mv a1, t5 ; RV64I-NEXT: bltz s0, .LBB9_9 ; RV64I-NEXT: j .LBB9_10 ; RV64I-NEXT: .LBB9_14: @@ -1824,8 +1824,8 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a3, a3, a4 ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli s7, s7, 24 -; RV32I-NEXT: or a4, s7, a5 -; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: or a3, a5, a3 +; RV32I-NEXT: or a3, s7, a3 ; RV32I-NEXT: lbu a4, 17(a0) ; RV32I-NEXT: lbu a5, 16(a0) ; RV32I-NEXT: lbu s8, 18(a0) @@ -1843,8 +1843,8 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a4, a4, a5 ; RV32I-NEXT: slli s8, s8, 16 ; RV32I-NEXT: slli s10, s10, 24 -; RV32I-NEXT: or a5, s10, s8 -; RV32I-NEXT: or a4, a5, a4 +; RV32I-NEXT: or a4, s8, a4 +; RV32I-NEXT: or a4, s10, a4 ; RV32I-NEXT: lbu a5, 25(a0) ; RV32I-NEXT: lbu s8, 24(a0) ; RV32I-NEXT: lbu s10, 26(a0) @@ -1853,7 +1853,7 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a5, a5, s8 ; RV32I-NEXT: slli s10, s10, 16 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, s10 +; RV32I-NEXT: or a5, s10, a5 ; RV32I-NEXT: or ra, a0, a5 ; RV32I-NEXT: lbu a0, 1(a1) ; RV32I-NEXT: lbu a5, 0(a1) @@ -1863,7 +1863,7 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a0, a0, a5 ; RV32I-NEXT: slli s8, s8, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, s8 +; RV32I-NEXT: or a0, s8, a0 ; RV32I-NEXT: or a0, a1, a0 ; RV32I-NEXT: slli a0, a0, 3 ; RV32I-NEXT: addi a5, a0, -192 @@ -2406,8 +2406,8 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: or a3, a3, a4 ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a6, a6, 24 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: or a5, a5, a3 +; RV64I-NEXT: or a3, a5, a3 +; RV64I-NEXT: or a5, a6, a3 ; RV64I-NEXT: lbu a3, 21(a0) ; RV64I-NEXT: lbu a4, 20(a0) ; RV64I-NEXT: lbu a6, 22(a0) @@ -2416,112 +2416,112 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: or a3, a3, a4 ; RV64I-NEXT: slli a6, a6, 16 ; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a4, a7, a6 -; RV64I-NEXT: or a6, a4, a3 +; RV64I-NEXT: or a3, a6, a3 +; RV64I-NEXT: or t3, a7, a3 ; RV64I-NEXT: lbu a3, 25(a0) ; RV64I-NEXT: lbu a4, 24(a0) -; RV64I-NEXT: lbu a7, 26(a0) -; RV64I-NEXT: lbu t0, 27(a0) +; RV64I-NEXT: lbu a6, 26(a0) +; RV64I-NEXT: lbu a7, 27(a0) ; RV64I-NEXT: slli a3, a3, 8 ; RV64I-NEXT: or a3, a3, a4 -; RV64I-NEXT: slli a7, a7, 16 -; RV64I-NEXT: slli t0, t0, 24 -; RV64I-NEXT: or a4, t0, a7 -; RV64I-NEXT: or a7, a4, a3 +; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a7, a7, 24 +; RV64I-NEXT: or a3, a6, a3 +; RV64I-NEXT: or a7, a7, a3 ; RV64I-NEXT: lbu a3, 29(a0) ; RV64I-NEXT: lbu a4, 28(a0) -; RV64I-NEXT: lbu t0, 30(a0) -; RV64I-NEXT: lbu t1, 31(a0) +; RV64I-NEXT: lbu a6, 30(a0) +; RV64I-NEXT: lbu t0, 31(a0) ; RV64I-NEXT: slli a3, a3, 8 ; RV64I-NEXT: or a3, a3, a4 -; RV64I-NEXT: slli t0, t0, 16 -; RV64I-NEXT: slli t1, t1, 24 -; RV64I-NEXT: or a4, t1, t0 -; RV64I-NEXT: or t0, a4, a3 -; RV64I-NEXT: lbu a3, 1(a0) -; RV64I-NEXT: lbu a4, 0(a0) +; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli t0, t0, 24 +; RV64I-NEXT: or a3, a6, a3 +; RV64I-NEXT: lbu a4, 1(a0) +; RV64I-NEXT: lbu a6, 0(a0) ; RV64I-NEXT: lbu t1, 2(a0) -; RV64I-NEXT: lbu t2, 3(a0) -; RV64I-NEXT: slli a3, a3, 8 -; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: or t4, t0, a3 +; RV64I-NEXT: slli a4, a4, 8 +; RV64I-NEXT: or a3, a4, a6 ; RV64I-NEXT: slli t1, t1, 16 -; RV64I-NEXT: slli t2, t2, 24 -; RV64I-NEXT: or a4, t2, t1 -; RV64I-NEXT: or a3, a4, a3 ; RV64I-NEXT: lbu a4, 5(a0) -; RV64I-NEXT: lbu t1, 4(a0) -; RV64I-NEXT: lbu t2, 6(a0) -; RV64I-NEXT: lbu t3, 7(a0) +; RV64I-NEXT: lbu a6, 4(a0) +; RV64I-NEXT: lbu t0, 3(a0) +; RV64I-NEXT: or a3, t1, a3 ; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, t1 -; RV64I-NEXT: slli t2, t2, 16 -; RV64I-NEXT: slli t3, t3, 24 -; RV64I-NEXT: or t1, t3, t2 +; RV64I-NEXT: or a4, a4, a6 +; RV64I-NEXT: lbu a6, 6(a0) +; RV64I-NEXT: lbu t1, 7(a0) +; RV64I-NEXT: slli t3, t3, 32 +; RV64I-NEXT: slli t0, t0, 24 +; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli t1, t1, 24 +; RV64I-NEXT: or a4, a6, a4 ; RV64I-NEXT: or a4, t1, a4 ; RV64I-NEXT: slli a4, a4, 32 ; RV64I-NEXT: or a3, a4, a3 ; RV64I-NEXT: lbu a4, 9(a0) -; RV64I-NEXT: lbu t1, 8(a0) -; RV64I-NEXT: lbu t2, 10(a0) -; RV64I-NEXT: lbu t3, 11(a0) +; RV64I-NEXT: lbu a6, 8(a0) +; RV64I-NEXT: lbu t1, 10(a0) +; RV64I-NEXT: or a3, a3, t0 ; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, t1 -; RV64I-NEXT: slli t2, t2, 16 -; RV64I-NEXT: slli t3, t3, 24 -; RV64I-NEXT: or t1, t3, t2 +; RV64I-NEXT: or a4, a4, a6 +; RV64I-NEXT: slli t1, t1, 16 +; RV64I-NEXT: lbu a6, 13(a0) +; RV64I-NEXT: lbu t0, 12(a0) ; RV64I-NEXT: or a4, t1, a4 -; RV64I-NEXT: lbu t1, 13(a0) -; RV64I-NEXT: lbu t2, 12(a0) -; RV64I-NEXT: lbu t3, 14(a0) +; RV64I-NEXT: lbu t1, 11(a0) +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a6, a6, t0 +; RV64I-NEXT: lbu t0, 14(a0) ; RV64I-NEXT: lbu a0, 15(a0) -; RV64I-NEXT: slli t1, t1, 8 -; RV64I-NEXT: or t1, t1, t2 -; RV64I-NEXT: slli t3, t3, 16 +; RV64I-NEXT: slli t4, t4, 32 +; RV64I-NEXT: slli t1, t1, 24 +; RV64I-NEXT: slli t0, t0, 16 ; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, t3 -; RV64I-NEXT: or a0, a0, t1 +; RV64I-NEXT: or a6, t0, a6 +; RV64I-NEXT: or a0, a0, a6 ; RV64I-NEXT: slli a0, a0, 32 -; RV64I-NEXT: or a4, a0, a4 +; RV64I-NEXT: or a0, a0, a4 +; RV64I-NEXT: or a4, a0, t1 ; RV64I-NEXT: lbu a0, 5(a1) -; RV64I-NEXT: lbu t1, 4(a1) -; RV64I-NEXT: lbu t2, 6(a1) -; RV64I-NEXT: lbu t3, 7(a1) +; RV64I-NEXT: lbu a6, 4(a1) +; RV64I-NEXT: lbu t0, 6(a1) +; RV64I-NEXT: lbu t1, 7(a1) ; RV64I-NEXT: slli a0, a0, 8 -; RV64I-NEXT: or a0, a0, t1 -; RV64I-NEXT: slli t2, t2, 16 -; RV64I-NEXT: slli t3, t3, 24 -; RV64I-NEXT: lbu t1, 1(a1) -; RV64I-NEXT: lbu t4, 0(a1) -; RV64I-NEXT: or t2, t3, t2 -; RV64I-NEXT: or t2, t2, a0 -; RV64I-NEXT: slli t1, t1, 8 -; RV64I-NEXT: or t1, t1, t4 -; RV64I-NEXT: lbu t3, 2(a1) -; RV64I-NEXT: lbu t4, 3(a1) -; RV64I-NEXT: slli a0, a6, 32 -; RV64I-NEXT: slli a1, t0, 32 -; RV64I-NEXT: slli t3, t3, 16 -; RV64I-NEXT: slli t4, t4, 24 -; RV64I-NEXT: or a6, t4, t3 -; RV64I-NEXT: or a6, a6, t1 -; RV64I-NEXT: slli a6, a6, 3 -; RV64I-NEXT: slli t2, t2, 35 -; RV64I-NEXT: or a6, t2, a6 +; RV64I-NEXT: or a0, a0, a6 +; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t1, t1, 24 +; RV64I-NEXT: or a0, t0, a0 +; RV64I-NEXT: or a0, t1, a0 +; RV64I-NEXT: lbu a6, 1(a1) +; RV64I-NEXT: lbu t0, 0(a1) +; RV64I-NEXT: lbu t1, 2(a1) +; RV64I-NEXT: lbu a1, 3(a1) +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a6, a6, t0 +; RV64I-NEXT: slli t1, t1, 16 +; RV64I-NEXT: slli a1, a1, 24 +; RV64I-NEXT: or a6, t1, a6 +; RV64I-NEXT: or a1, a1, a6 +; RV64I-NEXT: slli a1, a1, 3 +; RV64I-NEXT: slli a0, a0, 35 +; RV64I-NEXT: or a6, a0, a1 ; RV64I-NEXT: addiw t1, a6, -128 ; RV64I-NEXT: addi t2, a6, -192 ; RV64I-NEXT: srli t0, a3, 1 ; RV64I-NEXT: bltz t2, .LBB10_2 ; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: sll t3, a3, t2 +; RV64I-NEXT: sll t5, a3, t2 ; RV64I-NEXT: j .LBB10_3 ; RV64I-NEXT: .LBB10_2: -; RV64I-NEXT: sll t3, a4, t1 -; RV64I-NEXT: xori t4, t1, 63 -; RV64I-NEXT: srl t4, t0, t4 -; RV64I-NEXT: or t3, t3, t4 +; RV64I-NEXT: sll a0, a4, t1 +; RV64I-NEXT: xori a1, t1, 63 +; RV64I-NEXT: srl a1, t0, a1 +; RV64I-NEXT: or t5, a0, a1 ; RV64I-NEXT: .LBB10_3: -; RV64I-NEXT: or a0, a0, a5 -; RV64I-NEXT: or a1, a1, a7 +; RV64I-NEXT: or a0, t3, a5 +; RV64I-NEXT: or a1, t4, a7 ; RV64I-NEXT: addi a7, a6, -64 ; RV64I-NEXT: xori a5, a6, 63 ; RV64I-NEXT: bltz a7, .LBB10_5 @@ -2529,32 +2529,32 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: sll s1, a0, a7 ; RV64I-NEXT: j .LBB10_6 ; RV64I-NEXT: .LBB10_5: -; RV64I-NEXT: sll t4, a1, a6 -; RV64I-NEXT: srli t5, a0, 1 -; RV64I-NEXT: srl t5, t5, a5 -; RV64I-NEXT: or s1, t4, t5 +; RV64I-NEXT: sll t3, a1, a6 +; RV64I-NEXT: srli t4, a0, 1 +; RV64I-NEXT: srl t4, t4, a5 +; RV64I-NEXT: or s1, t3, t4 ; RV64I-NEXT: .LBB10_6: -; RV64I-NEXT: negw t6, a6 -; RV64I-NEXT: srl t4, a4, t6 +; RV64I-NEXT: negw t3, a6 +; RV64I-NEXT: srl t4, a4, t3 ; RV64I-NEXT: li s0, 64 -; RV64I-NEXT: li t5, 128 +; RV64I-NEXT: li t6, 128 ; RV64I-NEXT: sub s0, s0, a6 -; RV64I-NEXT: bltu a6, t5, .LBB10_12 +; RV64I-NEXT: bltu a6, t6, .LBB10_12 ; RV64I-NEXT: # %bb.7: ; RV64I-NEXT: bnez a6, .LBB10_13 ; RV64I-NEXT: .LBB10_8: ; RV64I-NEXT: bgez s0, .LBB10_10 ; RV64I-NEXT: .LBB10_9: -; RV64I-NEXT: srl t3, a3, t6 +; RV64I-NEXT: srl t3, a3, t3 ; RV64I-NEXT: slli t4, a4, 1 -; RV64I-NEXT: subw t6, t5, a6 -; RV64I-NEXT: xori t6, t6, 63 -; RV64I-NEXT: sll t4, t4, t6 +; RV64I-NEXT: subw t5, t6, a6 +; RV64I-NEXT: xori t5, t5, 63 +; RV64I-NEXT: sll t4, t4, t5 ; RV64I-NEXT: or t4, t3, t4 ; RV64I-NEXT: .LBB10_10: ; RV64I-NEXT: slti t3, a7, 0 ; RV64I-NEXT: neg t3, t3 -; RV64I-NEXT: bltu a6, t5, .LBB10_14 +; RV64I-NEXT: bltu a6, t6, .LBB10_14 ; RV64I-NEXT: # %bb.11: ; RV64I-NEXT: sll t1, a3, t1 ; RV64I-NEXT: slti t2, t2, 0 @@ -2563,13 +2563,13 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: bnez a6, .LBB10_15 ; RV64I-NEXT: j .LBB10_16 ; RV64I-NEXT: .LBB10_12: -; RV64I-NEXT: slti t3, s0, 0 -; RV64I-NEXT: neg t3, t3 -; RV64I-NEXT: and t3, t3, t4 -; RV64I-NEXT: or t3, s1, t3 +; RV64I-NEXT: slti t5, s0, 0 +; RV64I-NEXT: neg t5, t5 +; RV64I-NEXT: and t5, t5, t4 +; RV64I-NEXT: or t5, s1, t5 ; RV64I-NEXT: beqz a6, .LBB10_8 ; RV64I-NEXT: .LBB10_13: -; RV64I-NEXT: mv a1, t3 +; RV64I-NEXT: mv a1, t5 ; RV64I-NEXT: bltz s0, .LBB10_9 ; RV64I-NEXT: j .LBB10_10 ; RV64I-NEXT: .LBB10_14: @@ -2700,8 +2700,8 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a3, a3, a4 ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli s7, s7, 24 -; RV32I-NEXT: or a4, s7, a5 -; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: or a3, a5, a3 +; RV32I-NEXT: or a3, s7, a3 ; RV32I-NEXT: lbu a4, 13(a0) ; RV32I-NEXT: lbu a5, 12(a0) ; RV32I-NEXT: lbu s7, 14(a0) @@ -2719,8 +2719,8 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a4, a4, a5 ; RV32I-NEXT: slli s7, s7, 16 ; RV32I-NEXT: slli s10, s10, 24 -; RV32I-NEXT: or a5, s10, s7 -; RV32I-NEXT: or a4, a5, a4 +; RV32I-NEXT: or a4, s7, a4 +; RV32I-NEXT: or a4, s10, a4 ; RV32I-NEXT: lbu a5, 5(a0) ; RV32I-NEXT: lbu s7, 4(a0) ; RV32I-NEXT: lbu s10, 6(a0) @@ -2729,7 +2729,7 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a5, a5, s7 ; RV32I-NEXT: slli s10, s10, 16 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, s10 +; RV32I-NEXT: or a5, s10, a5 ; RV32I-NEXT: or s10, a0, a5 ; RV32I-NEXT: lbu a0, 1(a1) ; RV32I-NEXT: lbu a5, 0(a1) @@ -2739,7 +2739,7 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a0, a0, a5 ; RV32I-NEXT: slli s7, s7, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, s7 +; RV32I-NEXT: or a0, s7, a0 ; RV32I-NEXT: or a0, a1, a0 ; RV32I-NEXT: slli a0, a0, 3 ; RV32I-NEXT: addi a5, a0, -192 @@ -3287,8 +3287,8 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: or a3, a3, a4 ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a6, a6, 24 -; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a7, a4, a3 +; RV64I-NEXT: or a3, a5, a3 +; RV64I-NEXT: or a7, a6, a3 ; RV64I-NEXT: lbu a3, 13(a0) ; RV64I-NEXT: lbu a4, 12(a0) ; RV64I-NEXT: lbu a5, 14(a0) @@ -3297,112 +3297,112 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: or a3, a3, a4 ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a6, a6, 24 -; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a4, a4, a3 +; RV64I-NEXT: or a3, a5, a3 +; RV64I-NEXT: or t0, a6, a3 +; RV64I-NEXT: slli t0, t0, 32 ; RV64I-NEXT: lbu a3, 1(a0) -; RV64I-NEXT: lbu a5, 0(a0) -; RV64I-NEXT: lbu a6, 2(a0) -; RV64I-NEXT: lbu t0, 3(a0) +; RV64I-NEXT: lbu a4, 0(a0) +; RV64I-NEXT: lbu a5, 2(a0) +; RV64I-NEXT: lbu a6, 3(a0) ; RV64I-NEXT: slli a3, a3, 8 -; RV64I-NEXT: or a3, a3, a5 -; RV64I-NEXT: slli a6, a6, 16 -; RV64I-NEXT: slli t0, t0, 24 -; RV64I-NEXT: or a5, t0, a6 -; RV64I-NEXT: or t1, a5, a3 +; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: slli a5, a5, 16 +; RV64I-NEXT: slli a6, a6, 24 +; RV64I-NEXT: or a3, a5, a3 +; RV64I-NEXT: or t2, a6, a3 ; RV64I-NEXT: lbu a3, 5(a0) -; RV64I-NEXT: lbu a5, 4(a0) -; RV64I-NEXT: lbu a6, 6(a0) -; RV64I-NEXT: lbu t0, 7(a0) +; RV64I-NEXT: lbu a4, 4(a0) +; RV64I-NEXT: lbu a5, 6(a0) +; RV64I-NEXT: lbu a6, 7(a0) ; RV64I-NEXT: slli a3, a3, 8 -; RV64I-NEXT: or a3, a3, a5 -; RV64I-NEXT: slli a6, a6, 16 -; RV64I-NEXT: slli t0, t0, 24 -; RV64I-NEXT: or a5, t0, a6 -; RV64I-NEXT: or t0, a5, a3 +; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: slli a5, a5, 16 +; RV64I-NEXT: slli a6, a6, 24 +; RV64I-NEXT: or a3, a5, a3 +; RV64I-NEXT: or t5, a6, a3 ; RV64I-NEXT: lbu a3, 25(a0) -; RV64I-NEXT: lbu a5, 24(a0) -; RV64I-NEXT: lbu a6, 26(a0) -; RV64I-NEXT: lbu t2, 27(a0) +; RV64I-NEXT: lbu a4, 24(a0) +; RV64I-NEXT: lbu a5, 26(a0) +; RV64I-NEXT: lbu a6, 27(a0) ; RV64I-NEXT: slli a3, a3, 8 -; RV64I-NEXT: or a3, a3, a5 -; RV64I-NEXT: slli a6, a6, 16 -; RV64I-NEXT: slli t2, t2, 24 -; RV64I-NEXT: or a5, t2, a6 +; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: slli a5, a5, 16 +; RV64I-NEXT: slli a4, a6, 24 ; RV64I-NEXT: or a3, a5, a3 ; RV64I-NEXT: lbu a5, 29(a0) ; RV64I-NEXT: lbu a6, 28(a0) -; RV64I-NEXT: lbu t2, 30(a0) +; RV64I-NEXT: lbu t1, 30(a0) ; RV64I-NEXT: lbu t3, 31(a0) ; RV64I-NEXT: slli a5, a5, 8 ; RV64I-NEXT: or a5, a5, a6 -; RV64I-NEXT: slli t2, t2, 16 +; RV64I-NEXT: slli t1, t1, 16 ; RV64I-NEXT: slli t3, t3, 24 -; RV64I-NEXT: or a6, t3, t2 -; RV64I-NEXT: or a6, a6, a5 +; RV64I-NEXT: or a5, t1, a5 +; RV64I-NEXT: or a6, t3, a5 ; RV64I-NEXT: slli a5, a6, 32 ; RV64I-NEXT: or a3, a5, a3 ; RV64I-NEXT: lbu a5, 17(a0) -; RV64I-NEXT: lbu t2, 16(a0) +; RV64I-NEXT: lbu t1, 16(a0) ; RV64I-NEXT: lbu t3, 18(a0) -; RV64I-NEXT: lbu t4, 19(a0) +; RV64I-NEXT: or a3, a3, a4 ; RV64I-NEXT: slli a5, a5, 8 -; RV64I-NEXT: or a5, a5, t2 +; RV64I-NEXT: or a4, a5, t1 ; RV64I-NEXT: slli t3, t3, 16 -; RV64I-NEXT: slli t4, t4, 24 -; RV64I-NEXT: or t2, t4, t3 -; RV64I-NEXT: or a5, t2, a5 -; RV64I-NEXT: lbu t2, 21(a0) -; RV64I-NEXT: lbu t3, 20(a0) -; RV64I-NEXT: lbu t4, 22(a0) +; RV64I-NEXT: lbu a5, 21(a0) +; RV64I-NEXT: lbu t1, 20(a0) +; RV64I-NEXT: or a4, t3, a4 +; RV64I-NEXT: lbu t3, 19(a0) +; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: or a5, a5, t1 +; RV64I-NEXT: lbu t1, 22(a0) ; RV64I-NEXT: lbu a0, 23(a0) -; RV64I-NEXT: slli t2, t2, 8 -; RV64I-NEXT: or t2, t2, t3 -; RV64I-NEXT: slli t4, t4, 16 +; RV64I-NEXT: slli t5, t5, 32 +; RV64I-NEXT: slli t3, t3, 24 +; RV64I-NEXT: slli t1, t1, 16 ; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, t4 -; RV64I-NEXT: or a0, a0, t2 +; RV64I-NEXT: or a5, t1, a5 +; RV64I-NEXT: or a0, a0, a5 ; RV64I-NEXT: slli a0, a0, 32 -; RV64I-NEXT: or a5, a0, a5 +; RV64I-NEXT: or a0, a0, a4 +; RV64I-NEXT: or a5, a0, t3 ; RV64I-NEXT: lbu a0, 5(a1) -; RV64I-NEXT: lbu t2, 4(a1) -; RV64I-NEXT: lbu t3, 6(a1) -; RV64I-NEXT: lbu t4, 7(a1) +; RV64I-NEXT: lbu a4, 4(a1) +; RV64I-NEXT: lbu t1, 6(a1) +; RV64I-NEXT: lbu t3, 7(a1) ; RV64I-NEXT: slli a0, a0, 8 -; RV64I-NEXT: or a0, a0, t2 +; RV64I-NEXT: or a0, a0, a4 +; RV64I-NEXT: slli t1, t1, 16 +; RV64I-NEXT: slli t3, t3, 24 +; RV64I-NEXT: or a0, t1, a0 +; RV64I-NEXT: or a0, t3, a0 +; RV64I-NEXT: lbu a4, 1(a1) +; RV64I-NEXT: lbu t1, 0(a1) +; RV64I-NEXT: lbu t3, 2(a1) +; RV64I-NEXT: lbu a1, 3(a1) +; RV64I-NEXT: slli a4, a4, 8 +; RV64I-NEXT: or a4, a4, t1 ; RV64I-NEXT: slli t3, t3, 16 -; RV64I-NEXT: slli t4, t4, 24 -; RV64I-NEXT: lbu t2, 1(a1) -; RV64I-NEXT: lbu t5, 0(a1) -; RV64I-NEXT: or t3, t4, t3 -; RV64I-NEXT: or t3, t3, a0 -; RV64I-NEXT: slli t2, t2, 8 -; RV64I-NEXT: or t2, t2, t5 -; RV64I-NEXT: lbu t4, 2(a1) -; RV64I-NEXT: lbu t5, 3(a1) -; RV64I-NEXT: slli a0, a4, 32 -; RV64I-NEXT: slli a1, t0, 32 -; RV64I-NEXT: slli t4, t4, 16 -; RV64I-NEXT: slli t5, t5, 24 -; RV64I-NEXT: or a4, t5, t4 -; RV64I-NEXT: or a4, a4, t2 -; RV64I-NEXT: slli a4, a4, 3 -; RV64I-NEXT: slli t3, t3, 35 +; RV64I-NEXT: slli a1, a1, 24 ; RV64I-NEXT: or a4, t3, a4 +; RV64I-NEXT: or a1, a1, a4 +; RV64I-NEXT: slli a1, a1, 3 +; RV64I-NEXT: slli a0, a0, 35 +; RV64I-NEXT: or a4, a0, a1 ; RV64I-NEXT: addiw t3, a4, -128 ; RV64I-NEXT: addi t4, a4, -192 -; RV64I-NEXT: slli t0, a3, 1 +; RV64I-NEXT: slli t1, a3, 1 ; RV64I-NEXT: bltz t4, .LBB11_2 ; RV64I-NEXT: # %bb.1: ; RV64I-NEXT: sra t6, a3, t4 ; RV64I-NEXT: j .LBB11_3 ; RV64I-NEXT: .LBB11_2: -; RV64I-NEXT: srl t2, a5, t3 -; RV64I-NEXT: xori t5, t3, 63 -; RV64I-NEXT: sll t5, t0, t5 -; RV64I-NEXT: or t6, t2, t5 +; RV64I-NEXT: srl a0, a5, t3 +; RV64I-NEXT: xori a1, t3, 63 +; RV64I-NEXT: sll a1, t1, a1 +; RV64I-NEXT: or t6, a0, a1 ; RV64I-NEXT: .LBB11_3: -; RV64I-NEXT: or a0, a0, a7 -; RV64I-NEXT: or a1, a1, t1 +; RV64I-NEXT: or a0, t0, a7 +; RV64I-NEXT: or a1, t5, t2 ; RV64I-NEXT: addi a7, a4, -64 ; RV64I-NEXT: xori t2, a4, 63 ; RV64I-NEXT: bltz a7, .LBB11_5 @@ -3410,17 +3410,17 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: srl s2, a0, a7 ; RV64I-NEXT: j .LBB11_6 ; RV64I-NEXT: .LBB11_5: -; RV64I-NEXT: srl t1, a1, a4 +; RV64I-NEXT: srl t0, a1, a4 ; RV64I-NEXT: slli t5, a0, 1 ; RV64I-NEXT: sll t5, t5, t2 -; RV64I-NEXT: or s2, t1, t5 +; RV64I-NEXT: or s2, t0, t5 ; RV64I-NEXT: .LBB11_6: ; RV64I-NEXT: negw s0, a4 ; RV64I-NEXT: sll t5, a5, s0 ; RV64I-NEXT: li s1, 64 -; RV64I-NEXT: li t1, 128 +; RV64I-NEXT: li t0, 128 ; RV64I-NEXT: sub s1, s1, a4 -; RV64I-NEXT: bltu a4, t1, .LBB11_11 +; RV64I-NEXT: bltu a4, t0, .LBB11_11 ; RV64I-NEXT: # %bb.7: ; RV64I-NEXT: bnez a4, .LBB11_12 ; RV64I-NEXT: .LBB11_8: @@ -3430,7 +3430,7 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: bltz t4, .LBB11_14 ; RV64I-NEXT: .LBB11_10: ; RV64I-NEXT: mv t3, a6 -; RV64I-NEXT: bltu a4, t1, .LBB11_15 +; RV64I-NEXT: bltu a4, t0, .LBB11_15 ; RV64I-NEXT: j .LBB11_16 ; RV64I-NEXT: .LBB11_11: ; RV64I-NEXT: slti t6, s1, 0 @@ -3444,7 +3444,7 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: .LBB11_13: ; RV64I-NEXT: sll t5, a3, s0 ; RV64I-NEXT: srli t6, a5, 1 -; RV64I-NEXT: subw s0, t1, a4 +; RV64I-NEXT: subw s0, t0, a4 ; RV64I-NEXT: xori s0, s0, 63 ; RV64I-NEXT: srl t6, t6, s0 ; RV64I-NEXT: or t5, t5, t6 @@ -3452,7 +3452,7 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: bgez t4, .LBB11_10 ; RV64I-NEXT: .LBB11_14: ; RV64I-NEXT: sra t3, a3, t3 -; RV64I-NEXT: bgeu a4, t1, .LBB11_16 +; RV64I-NEXT: bgeu a4, t0, .LBB11_16 ; RV64I-NEXT: .LBB11_15: ; RV64I-NEXT: slti t3, a7, 0 ; RV64I-NEXT: srl t4, a0, a4 @@ -3465,27 +3465,27 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: bltz a7, .LBB11_20 ; RV64I-NEXT: .LBB11_18: ; RV64I-NEXT: sra a5, a3, a7 -; RV64I-NEXT: bgeu a4, t1, .LBB11_21 +; RV64I-NEXT: bgeu a4, t0, .LBB11_21 ; RV64I-NEXT: j .LBB11_22 ; RV64I-NEXT: .LBB11_19: ; RV64I-NEXT: mv a0, t3 ; RV64I-NEXT: bgez a7, .LBB11_18 ; RV64I-NEXT: .LBB11_20: ; RV64I-NEXT: srl a5, a5, a4 -; RV64I-NEXT: sll t0, t0, t2 -; RV64I-NEXT: or a5, a5, t0 -; RV64I-NEXT: bltu a4, t1, .LBB11_22 +; RV64I-NEXT: sll t1, t1, t2 +; RV64I-NEXT: or a5, a5, t1 +; RV64I-NEXT: bltu a4, t0, .LBB11_22 ; RV64I-NEXT: .LBB11_21: ; RV64I-NEXT: mv a5, a6 ; RV64I-NEXT: .LBB11_22: ; RV64I-NEXT: bltz a7, .LBB11_24 ; RV64I-NEXT: # %bb.23: ; RV64I-NEXT: mv a3, a6 -; RV64I-NEXT: bgeu a4, t1, .LBB11_25 +; RV64I-NEXT: bgeu a4, t0, .LBB11_25 ; RV64I-NEXT: j .LBB11_26 ; RV64I-NEXT: .LBB11_24: ; RV64I-NEXT: sra a3, a3, a4 -; RV64I-NEXT: bltu a4, t1, .LBB11_26 +; RV64I-NEXT: bltu a4, t0, .LBB11_26 ; RV64I-NEXT: .LBB11_25: ; RV64I-NEXT: mv a3, a6 ; RV64I-NEXT: .LBB11_26: @@ -3595,8 +3595,8 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a3, a3, a4 ; RV32I-NEXT: slli a6, a6, 16 ; RV32I-NEXT: slli t5, t5, 24 -; RV32I-NEXT: or a4, t5, a6 -; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: or a3, a6, a3 +; RV32I-NEXT: or a3, t5, a3 ; RV32I-NEXT: lbu a4, 17(a0) ; RV32I-NEXT: lbu a6, 16(a0) ; RV32I-NEXT: lbu t5, 18(a0) @@ -3614,8 +3614,8 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a4, a4, t5 ; RV32I-NEXT: slli t5, s4, 16 ; RV32I-NEXT: slli s4, s10, 24 -; RV32I-NEXT: or t5, s4, t5 ; RV32I-NEXT: or a4, t5, a4 +; RV32I-NEXT: or a4, s4, a4 ; RV32I-NEXT: lbu t5, 25(a0) ; RV32I-NEXT: lbu s10, 24(a0) ; RV32I-NEXT: lbu s11, 26(a0) @@ -3624,7 +3624,7 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or t5, t5, s10 ; RV32I-NEXT: slli s11, s11, 16 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, s11 +; RV32I-NEXT: or t5, s11, t5 ; RV32I-NEXT: or s11, a0, t5 ; RV32I-NEXT: lbu a0, 1(a1) ; RV32I-NEXT: lbu t5, 0(a1) @@ -3634,7 +3634,7 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a0, a0, t5 ; RV32I-NEXT: slli s10, s10, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, s10 +; RV32I-NEXT: or a0, s10, a0 ; RV32I-NEXT: or a0, a1, a0 ; RV32I-NEXT: slli a1, a0, 3 ; RV32I-NEXT: addi t5, a1, -192 diff --git a/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll b/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll index 90af50f..b111eee 100644 --- a/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll +++ b/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll @@ -14,7 +14,7 @@ define void @lshr_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: lbu a1, 0(a1) ; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, a5 +; RV64I-NEXT: or a3, a5, a3 ; RV64I-NEXT: or a0, a0, a3 ; RV64I-NEXT: srlw a0, a0, a1 ; RV64I-NEXT: sb a0, 0(a2) @@ -36,7 +36,7 @@ define void @lshr_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a3, a3, a4 ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, a5 +; RV32I-NEXT: or a3, a5, a3 ; RV32I-NEXT: or a0, a0, a3 ; RV32I-NEXT: lbu a3, 1(a1) ; RV32I-NEXT: lbu a4, 0(a1) @@ -46,7 +46,7 @@ define void @lshr_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a3, a3, a4 ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, a5 +; RV32I-NEXT: or a3, a5, a3 ; RV32I-NEXT: or a1, a1, a3 ; RV32I-NEXT: srl a0, a0, a1 ; RV32I-NEXT: sb a0, 0(a2) @@ -75,7 +75,7 @@ define void @shl_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: lbu a1, 0(a1) ; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, a5 +; RV64I-NEXT: or a3, a5, a3 ; RV64I-NEXT: or a0, a0, a3 ; RV64I-NEXT: sllw a0, a0, a1 ; RV64I-NEXT: sb a0, 0(a2) @@ -97,7 +97,7 @@ define void @shl_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a3, a3, a4 ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, a5 +; RV32I-NEXT: or a3, a5, a3 ; RV32I-NEXT: or a0, a0, a3 ; RV32I-NEXT: lbu a3, 1(a1) ; RV32I-NEXT: lbu a4, 0(a1) @@ -107,7 +107,7 @@ define void @shl_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a3, a3, a4 ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, a5 +; RV32I-NEXT: or a3, a5, a3 ; RV32I-NEXT: or a1, a1, a3 ; RV32I-NEXT: sll a0, a0, a1 ; RV32I-NEXT: sb a0, 0(a2) @@ -136,7 +136,7 @@ define void @ashr_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: lbu a1, 0(a1) ; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, a5 +; RV64I-NEXT: or a3, a5, a3 ; RV64I-NEXT: or a0, a0, a3 ; RV64I-NEXT: sraw a0, a0, a1 ; RV64I-NEXT: sb a0, 0(a2) @@ -158,7 +158,7 @@ define void @ashr_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a3, a3, a4 ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, a5 +; RV32I-NEXT: or a3, a5, a3 ; RV32I-NEXT: or a0, a0, a3 ; RV32I-NEXT: lbu a3, 1(a1) ; RV32I-NEXT: lbu a4, 0(a1) @@ -168,7 +168,7 @@ define void @ashr_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a3, a3, a4 ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, a5 +; RV32I-NEXT: or a3, a5, a3 ; RV32I-NEXT: or a1, a1, a3 ; RV32I-NEXT: sra a0, a0, a1 ; RV32I-NEXT: sb a0, 0(a2) @@ -197,42 +197,42 @@ define void @lshr_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: or a3, a3, a4 ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a6, a6, 24 -; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: or a3, a5, a3 ; RV64I-NEXT: lbu a4, 5(a0) ; RV64I-NEXT: lbu a5, 4(a0) -; RV64I-NEXT: lbu a6, 6(a0) +; RV64I-NEXT: lbu a7, 6(a0) ; RV64I-NEXT: lbu a0, 7(a0) ; RV64I-NEXT: slli a4, a4, 8 ; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a7, a7, 16 ; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, a6 +; RV64I-NEXT: or a4, a7, a4 ; RV64I-NEXT: or a0, a0, a4 ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: or a0, a0, a3 ; RV64I-NEXT: lbu a3, 1(a1) ; RV64I-NEXT: lbu a4, 0(a1) ; RV64I-NEXT: lbu a5, 2(a1) -; RV64I-NEXT: lbu a6, 3(a1) +; RV64I-NEXT: or a0, a0, a6 ; RV64I-NEXT: slli a3, a3, 8 ; RV64I-NEXT: or a3, a3, a4 ; RV64I-NEXT: slli a5, a5, 16 -; RV64I-NEXT: slli a6, a6, 24 -; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a3, a4, a3 ; RV64I-NEXT: lbu a4, 5(a1) +; RV64I-NEXT: lbu a6, 3(a1) +; RV64I-NEXT: or a3, a5, a3 ; RV64I-NEXT: lbu a5, 4(a1) -; RV64I-NEXT: lbu a6, 6(a1) -; RV64I-NEXT: lbu a1, 7(a1) ; RV64I-NEXT: slli a4, a4, 8 +; RV64I-NEXT: lbu a7, 6(a1) +; RV64I-NEXT: lbu a1, 7(a1) ; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a6, a6, 24 +; RV64I-NEXT: slli a7, a7, 16 ; RV64I-NEXT: slli a1, a1, 24 -; RV64I-NEXT: or a1, a1, a6 +; RV64I-NEXT: or a4, a7, a4 ; RV64I-NEXT: or a1, a1, a4 ; RV64I-NEXT: slli a1, a1, 32 ; RV64I-NEXT: or a1, a1, a3 +; RV64I-NEXT: or a1, a1, a6 ; RV64I-NEXT: srl a0, a0, a1 ; RV64I-NEXT: sb a0, 0(a2) ; RV64I-NEXT: srli a1, a0, 48 @@ -256,24 +256,24 @@ define void @lshr_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: lbu a3, 5(a0) ; RV32I-NEXT: lbu a4, 4(a0) ; RV32I-NEXT: lbu a5, 6(a0) -; RV32I-NEXT: lbu a6, 7(a0) ; RV32I-NEXT: slli a3, a3, 8 -; RV32I-NEXT: or a4, a3, a4 +; RV32I-NEXT: or a3, a3, a4 +; RV32I-NEXT: lbu a4, 1(a1) ; RV32I-NEXT: slli a5, a5, 16 -; RV32I-NEXT: slli a6, a6, 24 -; RV32I-NEXT: or a5, a6, a5 -; RV32I-NEXT: lbu a3, 1(a1) ; RV32I-NEXT: lbu a6, 0(a1) -; RV32I-NEXT: lbu a7, 2(a1) +; RV32I-NEXT: or a5, a5, a3 +; RV32I-NEXT: slli a4, a4, 8 +; RV32I-NEXT: lbu a3, 2(a1) +; RV32I-NEXT: or a4, a4, a6 +; RV32I-NEXT: lbu a6, 7(a0) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a3, a3, 8 -; RV32I-NEXT: or a3, a3, a6 -; RV32I-NEXT: slli a7, a7, 16 +; RV32I-NEXT: slli a3, a3, 16 +; RV32I-NEXT: or a3, a3, a4 +; RV32I-NEXT: slli a4, a6, 24 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, a7 ; RV32I-NEXT: or a1, a1, a3 ; RV32I-NEXT: addi a3, a1, -32 -; RV32I-NEXT: or a4, a5, a4 +; RV32I-NEXT: or a4, a4, a5 ; RV32I-NEXT: bltz a3, .LBB3_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: srl a0, a4, a3 @@ -287,7 +287,7 @@ define void @lshr_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a5, a5, a6 ; RV32I-NEXT: slli a7, a7, 16 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, a7 +; RV32I-NEXT: or a5, a7, a5 ; RV32I-NEXT: or a0, a0, a5 ; RV32I-NEXT: srl a0, a0, a1 ; RV32I-NEXT: xori a5, a1, 31 @@ -331,42 +331,42 @@ define void @shl_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: or a3, a3, a4 ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a6, a6, 24 -; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: or a3, a5, a3 ; RV64I-NEXT: lbu a4, 5(a0) ; RV64I-NEXT: lbu a5, 4(a0) -; RV64I-NEXT: lbu a6, 6(a0) +; RV64I-NEXT: lbu a7, 6(a0) ; RV64I-NEXT: lbu a0, 7(a0) ; RV64I-NEXT: slli a4, a4, 8 ; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a7, a7, 16 ; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, a6 +; RV64I-NEXT: or a4, a7, a4 ; RV64I-NEXT: or a0, a0, a4 ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: or a0, a0, a3 ; RV64I-NEXT: lbu a3, 1(a1) ; RV64I-NEXT: lbu a4, 0(a1) ; RV64I-NEXT: lbu a5, 2(a1) -; RV64I-NEXT: lbu a6, 3(a1) +; RV64I-NEXT: or a0, a0, a6 ; RV64I-NEXT: slli a3, a3, 8 ; RV64I-NEXT: or a3, a3, a4 ; RV64I-NEXT: slli a5, a5, 16 -; RV64I-NEXT: slli a6, a6, 24 -; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a3, a4, a3 ; RV64I-NEXT: lbu a4, 5(a1) +; RV64I-NEXT: lbu a6, 3(a1) +; RV64I-NEXT: or a3, a5, a3 ; RV64I-NEXT: lbu a5, 4(a1) -; RV64I-NEXT: lbu a6, 6(a1) -; RV64I-NEXT: lbu a1, 7(a1) ; RV64I-NEXT: slli a4, a4, 8 +; RV64I-NEXT: lbu a7, 6(a1) +; RV64I-NEXT: lbu a1, 7(a1) ; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a6, a6, 24 +; RV64I-NEXT: slli a7, a7, 16 ; RV64I-NEXT: slli a1, a1, 24 -; RV64I-NEXT: or a1, a1, a6 +; RV64I-NEXT: or a4, a7, a4 ; RV64I-NEXT: or a1, a1, a4 ; RV64I-NEXT: slli a1, a1, 32 ; RV64I-NEXT: or a1, a1, a3 +; RV64I-NEXT: or a1, a1, a6 ; RV64I-NEXT: sll a0, a0, a1 ; RV64I-NEXT: sb a0, 0(a2) ; RV64I-NEXT: srli a1, a0, 48 @@ -390,24 +390,24 @@ define void @shl_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: lbu a3, 1(a0) ; RV32I-NEXT: lbu a4, 0(a0) ; RV32I-NEXT: lbu a5, 2(a0) -; RV32I-NEXT: lbu a6, 3(a0) ; RV32I-NEXT: slli a3, a3, 8 -; RV32I-NEXT: or a4, a3, a4 +; RV32I-NEXT: or a3, a3, a4 +; RV32I-NEXT: lbu a4, 1(a1) ; RV32I-NEXT: slli a5, a5, 16 -; RV32I-NEXT: slli a6, a6, 24 -; RV32I-NEXT: or a5, a6, a5 -; RV32I-NEXT: lbu a3, 1(a1) ; RV32I-NEXT: lbu a6, 0(a1) -; RV32I-NEXT: lbu a7, 2(a1) +; RV32I-NEXT: or a5, a5, a3 +; RV32I-NEXT: slli a4, a4, 8 +; RV32I-NEXT: lbu a3, 2(a1) +; RV32I-NEXT: or a4, a4, a6 +; RV32I-NEXT: lbu a6, 3(a0) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a3, a3, 8 -; RV32I-NEXT: or a3, a3, a6 -; RV32I-NEXT: slli a7, a7, 16 +; RV32I-NEXT: slli a3, a3, 16 +; RV32I-NEXT: or a3, a3, a4 +; RV32I-NEXT: slli a4, a6, 24 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, a7 ; RV32I-NEXT: or a1, a1, a3 ; RV32I-NEXT: addi a3, a1, -32 -; RV32I-NEXT: or a4, a5, a4 +; RV32I-NEXT: or a4, a4, a5 ; RV32I-NEXT: bltz a3, .LBB4_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: sll a0, a4, a3 @@ -421,7 +421,7 @@ define void @shl_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a5, a5, a6 ; RV32I-NEXT: slli a7, a7, 16 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, a7 +; RV32I-NEXT: or a5, a7, a5 ; RV32I-NEXT: or a0, a0, a5 ; RV32I-NEXT: sll a0, a0, a1 ; RV32I-NEXT: xori a5, a1, 31 @@ -465,42 +465,42 @@ define void @ashr_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: or a3, a3, a4 ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a6, a6, 24 -; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: or a3, a5, a3 ; RV64I-NEXT: lbu a4, 5(a0) ; RV64I-NEXT: lbu a5, 4(a0) -; RV64I-NEXT: lbu a6, 6(a0) +; RV64I-NEXT: lbu a7, 6(a0) ; RV64I-NEXT: lbu a0, 7(a0) ; RV64I-NEXT: slli a4, a4, 8 ; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a7, a7, 16 ; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, a6 +; RV64I-NEXT: or a4, a7, a4 ; RV64I-NEXT: or a0, a0, a4 ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: or a0, a0, a3 ; RV64I-NEXT: lbu a3, 1(a1) ; RV64I-NEXT: lbu a4, 0(a1) ; RV64I-NEXT: lbu a5, 2(a1) -; RV64I-NEXT: lbu a6, 3(a1) +; RV64I-NEXT: or a0, a0, a6 ; RV64I-NEXT: slli a3, a3, 8 ; RV64I-NEXT: or a3, a3, a4 ; RV64I-NEXT: slli a5, a5, 16 -; RV64I-NEXT: slli a6, a6, 24 -; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a3, a4, a3 ; RV64I-NEXT: lbu a4, 5(a1) +; RV64I-NEXT: lbu a6, 3(a1) +; RV64I-NEXT: or a3, a5, a3 ; RV64I-NEXT: lbu a5, 4(a1) -; RV64I-NEXT: lbu a6, 6(a1) -; RV64I-NEXT: lbu a1, 7(a1) ; RV64I-NEXT: slli a4, a4, 8 +; RV64I-NEXT: lbu a7, 6(a1) +; RV64I-NEXT: lbu a1, 7(a1) ; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a6, a6, 24 +; RV64I-NEXT: slli a7, a7, 16 ; RV64I-NEXT: slli a1, a1, 24 -; RV64I-NEXT: or a1, a1, a6 +; RV64I-NEXT: or a4, a7, a4 ; RV64I-NEXT: or a1, a1, a4 ; RV64I-NEXT: slli a1, a1, 32 ; RV64I-NEXT: or a1, a1, a3 +; RV64I-NEXT: or a1, a1, a6 ; RV64I-NEXT: sra a0, a0, a1 ; RV64I-NEXT: sb a0, 0(a2) ; RV64I-NEXT: srli a1, a0, 48 @@ -523,28 +523,28 @@ define void @ashr_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I: # %bb.0: ; RV32I-NEXT: lbu a3, 5(a0) ; RV32I-NEXT: lbu a4, 4(a0) +; RV32I-NEXT: lbu a5, 6(a0) ; RV32I-NEXT: slli a3, a3, 8 -; RV32I-NEXT: lbu a5, 1(a1) -; RV32I-NEXT: or a6, a3, a4 -; RV32I-NEXT: lbu a3, 6(a0) -; RV32I-NEXT: lbu a4, 0(a1) -; RV32I-NEXT: slli a5, a5, 8 +; RV32I-NEXT: or a3, a3, a4 +; RV32I-NEXT: slli a5, a5, 16 +; RV32I-NEXT: lbu a4, 1(a1) +; RV32I-NEXT: lbu a6, 7(a0) +; RV32I-NEXT: or a3, a5, a3 +; RV32I-NEXT: lbu a5, 0(a1) +; RV32I-NEXT: slli a4, a4, 8 ; RV32I-NEXT: lbu a7, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: or a5, a5, a4 -; RV32I-NEXT: lbu a4, 7(a0) +; RV32I-NEXT: or a5, a4, a5 +; RV32I-NEXT: slli a4, a6, 24 ; RV32I-NEXT: slli a7, a7, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, a7 -; RV32I-NEXT: slli a3, a3, 16 -; RV32I-NEXT: slli a4, a4, 24 -; RV32I-NEXT: or a7, a4, a3 -; RV32I-NEXT: or a3, a1, a5 -; RV32I-NEXT: addi a5, a3, -32 -; RV32I-NEXT: or a1, a7, a6 +; RV32I-NEXT: or a5, a7, a5 +; RV32I-NEXT: or a1, a1, a5 +; RV32I-NEXT: addi a5, a1, -32 +; RV32I-NEXT: or a3, a4, a3 ; RV32I-NEXT: bltz a5, .LBB5_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sra a0, a1, a5 +; RV32I-NEXT: sra a0, a3, a5 ; RV32I-NEXT: srai a1, a4, 31 ; RV32I-NEXT: j .LBB5_3 ; RV32I-NEXT: .LBB5_2: @@ -556,14 +556,14 @@ define void @ashr_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a4, a4, a5 ; RV32I-NEXT: slli a6, a6, 16 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, a6 +; RV32I-NEXT: or a4, a6, a4 ; RV32I-NEXT: or a0, a0, a4 -; RV32I-NEXT: srl a0, a0, a3 -; RV32I-NEXT: xori a4, a3, 31 -; RV32I-NEXT: slli a5, a1, 1 +; RV32I-NEXT: srl a0, a0, a1 +; RV32I-NEXT: xori a4, a1, 31 +; RV32I-NEXT: slli a5, a3, 1 ; RV32I-NEXT: sll a4, a5, a4 ; RV32I-NEXT: or a0, a0, a4 -; RV32I-NEXT: sra a1, a1, a3 +; RV32I-NEXT: sra a1, a3, a1 ; RV32I-NEXT: .LBB5_3: ; RV32I-NEXT: sb a1, 4(a2) ; RV32I-NEXT: srli a3, a1, 16 @@ -593,48 +593,48 @@ define void @lshr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: lbu a3, 9(a0) ; RV64I-NEXT: lbu a4, 8(a0) ; RV64I-NEXT: lbu a5, 10(a0) -; RV64I-NEXT: lbu a6, 11(a0) ; RV64I-NEXT: slli a3, a3, 8 ; RV64I-NEXT: or a3, a3, a4 ; RV64I-NEXT: slli a5, a5, 16 -; RV64I-NEXT: slli a6, a6, 24 -; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a4, a4, a3 -; RV64I-NEXT: lbu a3, 13(a0) -; RV64I-NEXT: lbu a5, 12(a0) -; RV64I-NEXT: lbu a6, 14(a0) -; RV64I-NEXT: lbu a7, 15(a0) -; RV64I-NEXT: slli a3, a3, 8 -; RV64I-NEXT: or a3, a3, a5 -; RV64I-NEXT: slli a6, a6, 16 -; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a5, a7, a6 +; RV64I-NEXT: lbu a4, 11(a0) ; RV64I-NEXT: or a3, a5, a3 -; RV64I-NEXT: lbu a5, 1(a1) -; RV64I-NEXT: lbu a6, 0(a1) -; RV64I-NEXT: lbu a7, 2(a1) -; RV64I-NEXT: lbu t0, 3(a1) +; RV64I-NEXT: lbu a5, 13(a0) +; RV64I-NEXT: lbu a6, 12(a0) +; RV64I-NEXT: lbu a7, 14(a0) +; RV64I-NEXT: lbu t0, 15(a0) ; RV64I-NEXT: slli a5, a5, 8 ; RV64I-NEXT: or a5, a5, a6 ; RV64I-NEXT: slli a7, a7, 16 ; RV64I-NEXT: slli t0, t0, 24 -; RV64I-NEXT: or a6, t0, a7 -; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: or a5, a7, a5 +; RV64I-NEXT: or a5, t0, a5 +; RV64I-NEXT: slli a5, a5, 32 +; RV64I-NEXT: lbu a6, 1(a1) +; RV64I-NEXT: lbu a7, 0(a1) +; RV64I-NEXT: lbu t0, 2(a1) +; RV64I-NEXT: or a5, a5, a3 +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a3, a6, a7 +; RV64I-NEXT: slli t0, t0, 16 ; RV64I-NEXT: lbu a6, 5(a1) ; RV64I-NEXT: lbu a7, 4(a1) +; RV64I-NEXT: or a3, t0, a3 ; RV64I-NEXT: lbu t0, 6(a1) -; RV64I-NEXT: lbu a1, 7(a1) ; RV64I-NEXT: slli a6, a6, 8 ; RV64I-NEXT: or a6, a6, a7 +; RV64I-NEXT: lbu a7, 3(a1) ; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: lbu a1, 7(a1) +; RV64I-NEXT: or a6, t0, a6 +; RV64I-NEXT: slli a4, a4, 24 +; RV64I-NEXT: slli a7, a7, 24 ; RV64I-NEXT: slli a1, a1, 24 -; RV64I-NEXT: or a1, a1, t0 -; RV64I-NEXT: slli a7, a3, 32 ; RV64I-NEXT: or a1, a1, a6 ; RV64I-NEXT: slli a1, a1, 32 -; RV64I-NEXT: or a1, a1, a5 +; RV64I-NEXT: or a1, a1, a3 +; RV64I-NEXT: or a1, a1, a7 ; RV64I-NEXT: addi a3, a1, -64 -; RV64I-NEXT: or a4, a7, a4 +; RV64I-NEXT: or a4, a5, a4 ; RV64I-NEXT: bltz a3, .LBB6_2 ; RV64I-NEXT: # %bb.1: ; RV64I-NEXT: srl a0, a4, a3 @@ -643,25 +643,25 @@ define void @lshr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: lbu a5, 1(a0) ; RV64I-NEXT: lbu a6, 0(a0) ; RV64I-NEXT: lbu a7, 2(a0) -; RV64I-NEXT: lbu t0, 3(a0) ; RV64I-NEXT: slli a5, a5, 8 ; RV64I-NEXT: or a5, a5, a6 ; RV64I-NEXT: slli a7, a7, 16 -; RV64I-NEXT: slli t0, t0, 24 -; RV64I-NEXT: or a6, t0, a7 -; RV64I-NEXT: or a5, a6, a5 ; RV64I-NEXT: lbu a6, 5(a0) +; RV64I-NEXT: lbu t0, 3(a0) +; RV64I-NEXT: or a5, a7, a5 ; RV64I-NEXT: lbu a7, 4(a0) -; RV64I-NEXT: lbu t0, 6(a0) -; RV64I-NEXT: lbu a0, 7(a0) ; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: lbu t1, 6(a0) +; RV64I-NEXT: lbu a0, 7(a0) ; RV64I-NEXT: or a6, a6, a7 -; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t0, t0, 24 +; RV64I-NEXT: slli t1, t1, 16 ; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, t0 +; RV64I-NEXT: or a6, t1, a6 ; RV64I-NEXT: or a0, a0, a6 ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: or a0, a0, a5 +; RV64I-NEXT: or a0, a0, t0 ; RV64I-NEXT: srl a0, a0, a1 ; RV64I-NEXT: xori a5, a1, 63 ; RV64I-NEXT: slli a6, a4, 1 @@ -735,8 +735,8 @@ define void @lshr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a3, a3, a4 ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli a6, a6, 24 -; RV32I-NEXT: or a4, a6, a5 -; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: or a3, a5, a3 +; RV32I-NEXT: or a3, a6, a3 ; RV32I-NEXT: lbu a4, 9(a0) ; RV32I-NEXT: lbu a5, 8(a0) ; RV32I-NEXT: lbu a6, 10(a0) @@ -745,8 +745,8 @@ define void @lshr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a4, a4, a5 ; RV32I-NEXT: slli a6, a6, 16 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a5, a0, a6 -; RV32I-NEXT: or a5, a5, a4 +; RV32I-NEXT: or a5, a6, a4 +; RV32I-NEXT: or a5, a0, a5 ; RV32I-NEXT: lbu a0, 1(a1) ; RV32I-NEXT: lbu a4, 0(a1) ; RV32I-NEXT: lbu a6, 2(a1) @@ -755,8 +755,8 @@ define void @lshr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a0, a0, a4 ; RV32I-NEXT: slli a6, a6, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a4, a1, a6 -; RV32I-NEXT: or a4, a4, a0 +; RV32I-NEXT: or a4, a6, a0 +; RV32I-NEXT: or a4, a1, a4 ; RV32I-NEXT: addi t1, a4, -64 ; RV32I-NEXT: addi t2, a4, -96 ; RV32I-NEXT: slli a6, a3, 1 @@ -889,48 +889,48 @@ define void @shl_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: lbu a3, 1(a0) ; RV64I-NEXT: lbu a4, 0(a0) ; RV64I-NEXT: lbu a5, 2(a0) -; RV64I-NEXT: lbu a6, 3(a0) ; RV64I-NEXT: slli a3, a3, 8 ; RV64I-NEXT: or a3, a3, a4 ; RV64I-NEXT: slli a5, a5, 16 -; RV64I-NEXT: slli a6, a6, 24 -; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a4, a4, a3 -; RV64I-NEXT: lbu a3, 5(a0) -; RV64I-NEXT: lbu a5, 4(a0) -; RV64I-NEXT: lbu a6, 6(a0) -; RV64I-NEXT: lbu a7, 7(a0) -; RV64I-NEXT: slli a3, a3, 8 -; RV64I-NEXT: or a3, a3, a5 -; RV64I-NEXT: slli a6, a6, 16 -; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a5, a7, a6 +; RV64I-NEXT: lbu a4, 3(a0) ; RV64I-NEXT: or a3, a5, a3 -; RV64I-NEXT: lbu a5, 1(a1) -; RV64I-NEXT: lbu a6, 0(a1) -; RV64I-NEXT: lbu a7, 2(a1) -; RV64I-NEXT: lbu t0, 3(a1) +; RV64I-NEXT: lbu a5, 5(a0) +; RV64I-NEXT: lbu a6, 4(a0) +; RV64I-NEXT: lbu a7, 6(a0) +; RV64I-NEXT: lbu t0, 7(a0) ; RV64I-NEXT: slli a5, a5, 8 ; RV64I-NEXT: or a5, a5, a6 ; RV64I-NEXT: slli a7, a7, 16 ; RV64I-NEXT: slli t0, t0, 24 -; RV64I-NEXT: or a6, t0, a7 -; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: or a5, a7, a5 +; RV64I-NEXT: or a5, t0, a5 +; RV64I-NEXT: slli a5, a5, 32 +; RV64I-NEXT: lbu a6, 1(a1) +; RV64I-NEXT: lbu a7, 0(a1) +; RV64I-NEXT: lbu t0, 2(a1) +; RV64I-NEXT: or a5, a5, a3 +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a3, a6, a7 +; RV64I-NEXT: slli t0, t0, 16 ; RV64I-NEXT: lbu a6, 5(a1) ; RV64I-NEXT: lbu a7, 4(a1) +; RV64I-NEXT: or a3, t0, a3 ; RV64I-NEXT: lbu t0, 6(a1) -; RV64I-NEXT: lbu a1, 7(a1) ; RV64I-NEXT: slli a6, a6, 8 ; RV64I-NEXT: or a6, a6, a7 +; RV64I-NEXT: lbu a7, 3(a1) ; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: lbu a1, 7(a1) +; RV64I-NEXT: or a6, t0, a6 +; RV64I-NEXT: slli a4, a4, 24 +; RV64I-NEXT: slli a7, a7, 24 ; RV64I-NEXT: slli a1, a1, 24 -; RV64I-NEXT: or a1, a1, t0 -; RV64I-NEXT: slli a7, a3, 32 ; RV64I-NEXT: or a1, a1, a6 ; RV64I-NEXT: slli a1, a1, 32 -; RV64I-NEXT: or a1, a1, a5 +; RV64I-NEXT: or a1, a1, a3 +; RV64I-NEXT: or a1, a1, a7 ; RV64I-NEXT: addi a3, a1, -64 -; RV64I-NEXT: or a4, a7, a4 +; RV64I-NEXT: or a4, a5, a4 ; RV64I-NEXT: bltz a3, .LBB7_2 ; RV64I-NEXT: # %bb.1: ; RV64I-NEXT: sll a0, a4, a3 @@ -939,25 +939,25 @@ define void @shl_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: lbu a5, 9(a0) ; RV64I-NEXT: lbu a6, 8(a0) ; RV64I-NEXT: lbu a7, 10(a0) -; RV64I-NEXT: lbu t0, 11(a0) ; RV64I-NEXT: slli a5, a5, 8 ; RV64I-NEXT: or a5, a5, a6 ; RV64I-NEXT: slli a7, a7, 16 -; RV64I-NEXT: slli t0, t0, 24 -; RV64I-NEXT: or a6, t0, a7 -; RV64I-NEXT: or a5, a6, a5 ; RV64I-NEXT: lbu a6, 13(a0) +; RV64I-NEXT: lbu t0, 11(a0) +; RV64I-NEXT: or a5, a7, a5 ; RV64I-NEXT: lbu a7, 12(a0) -; RV64I-NEXT: lbu t0, 14(a0) -; RV64I-NEXT: lbu a0, 15(a0) ; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: lbu t1, 14(a0) +; RV64I-NEXT: lbu a0, 15(a0) ; RV64I-NEXT: or a6, a6, a7 -; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t0, t0, 24 +; RV64I-NEXT: slli t1, t1, 16 ; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, t0 +; RV64I-NEXT: or a6, t1, a6 ; RV64I-NEXT: or a0, a0, a6 ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: or a0, a0, a5 +; RV64I-NEXT: or a0, a0, t0 ; RV64I-NEXT: sll a0, a0, a1 ; RV64I-NEXT: xori a5, a1, 63 ; RV64I-NEXT: srli a6, a4, 1 @@ -1031,8 +1031,8 @@ define void @shl_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a3, a3, a4 ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli a6, a6, 24 -; RV32I-NEXT: or a4, a6, a5 -; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: or a3, a5, a3 +; RV32I-NEXT: or a3, a6, a3 ; RV32I-NEXT: lbu a4, 5(a0) ; RV32I-NEXT: lbu a5, 4(a0) ; RV32I-NEXT: lbu a6, 6(a0) @@ -1041,8 +1041,8 @@ define void @shl_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a4, a4, a5 ; RV32I-NEXT: slli a6, a6, 16 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a5, a0, a6 -; RV32I-NEXT: or a5, a5, a4 +; RV32I-NEXT: or a5, a6, a4 +; RV32I-NEXT: or a5, a0, a5 ; RV32I-NEXT: lbu a0, 1(a1) ; RV32I-NEXT: lbu a4, 0(a1) ; RV32I-NEXT: lbu a6, 2(a1) @@ -1051,8 +1051,8 @@ define void @shl_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a0, a0, a4 ; RV32I-NEXT: slli a6, a6, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a4, a1, a6 -; RV32I-NEXT: or a4, a4, a0 +; RV32I-NEXT: or a4, a6, a0 +; RV32I-NEXT: or a4, a1, a4 ; RV32I-NEXT: addi t1, a4, -64 ; RV32I-NEXT: addi t2, a4, -96 ; RV32I-NEXT: srli a6, a3, 1 @@ -1185,48 +1185,48 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: lbu a3, 9(a0) ; RV64I-NEXT: lbu a4, 8(a0) ; RV64I-NEXT: lbu a5, 10(a0) -; RV64I-NEXT: lbu a6, 11(a0) ; RV64I-NEXT: slli a3, a3, 8 ; RV64I-NEXT: or a3, a3, a4 ; RV64I-NEXT: slli a5, a5, 16 -; RV64I-NEXT: slli a6, a6, 24 -; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a6, 11(a0) +; RV64I-NEXT: or a3, a5, a3 ; RV64I-NEXT: lbu a4, 13(a0) ; RV64I-NEXT: lbu a5, 12(a0) -; RV64I-NEXT: lbu a6, 14(a0) -; RV64I-NEXT: lbu a7, 15(a0) +; RV64I-NEXT: lbu a7, 14(a0) +; RV64I-NEXT: lbu t0, 15(a0) ; RV64I-NEXT: slli a4, a4, 8 ; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 -; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a5, a7, a6 -; RV64I-NEXT: or a4, a5, a4 -; RV64I-NEXT: lbu a5, 1(a1) -; RV64I-NEXT: lbu a6, 0(a1) -; RV64I-NEXT: lbu a7, 2(a1) -; RV64I-NEXT: lbu t0, 3(a1) -; RV64I-NEXT: slli a5, a5, 8 -; RV64I-NEXT: or a5, a5, a6 ; RV64I-NEXT: slli a7, a7, 16 ; RV64I-NEXT: slli t0, t0, 24 -; RV64I-NEXT: or a6, t0, a7 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: lbu a6, 5(a1) -; RV64I-NEXT: lbu a7, 4(a1) -; RV64I-NEXT: lbu t0, 6(a1) +; RV64I-NEXT: or a4, a7, a4 +; RV64I-NEXT: or a4, t0, a4 +; RV64I-NEXT: slli a5, a4, 32 +; RV64I-NEXT: lbu a7, 1(a1) +; RV64I-NEXT: lbu t0, 0(a1) +; RV64I-NEXT: lbu t1, 2(a1) +; RV64I-NEXT: or a3, a5, a3 +; RV64I-NEXT: slli a7, a7, 8 +; RV64I-NEXT: or a5, a7, t0 +; RV64I-NEXT: slli t1, t1, 16 +; RV64I-NEXT: lbu a7, 5(a1) +; RV64I-NEXT: lbu t0, 4(a1) +; RV64I-NEXT: or a5, t1, a5 +; RV64I-NEXT: lbu t1, 6(a1) +; RV64I-NEXT: slli a7, a7, 8 +; RV64I-NEXT: or a7, a7, t0 +; RV64I-NEXT: lbu t0, 3(a1) +; RV64I-NEXT: slli t1, t1, 16 ; RV64I-NEXT: lbu a1, 7(a1) -; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: or a6, a6, a7 -; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: or a7, t1, a7 +; RV64I-NEXT: slli a6, a6, 24 +; RV64I-NEXT: slli t0, t0, 24 ; RV64I-NEXT: slli a1, a1, 24 -; RV64I-NEXT: or a1, a1, t0 -; RV64I-NEXT: slli a7, a4, 32 -; RV64I-NEXT: or a1, a1, a6 +; RV64I-NEXT: or a1, a1, a7 ; RV64I-NEXT: slli a1, a1, 32 ; RV64I-NEXT: or a1, a1, a5 +; RV64I-NEXT: or a1, a1, t0 ; RV64I-NEXT: addi a5, a1, -64 -; RV64I-NEXT: or a3, a7, a3 +; RV64I-NEXT: or a3, a3, a6 ; RV64I-NEXT: bltz a5, .LBB8_2 ; RV64I-NEXT: # %bb.1: ; RV64I-NEXT: sra a0, a3, a5 @@ -1236,25 +1236,25 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: lbu a4, 1(a0) ; RV64I-NEXT: lbu a5, 0(a0) ; RV64I-NEXT: lbu a6, 2(a0) -; RV64I-NEXT: lbu a7, 3(a0) ; RV64I-NEXT: slli a4, a4, 8 ; RV64I-NEXT: or a4, a4, a5 ; RV64I-NEXT: slli a6, a6, 16 -; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a5, a7, a6 -; RV64I-NEXT: or a4, a5, a4 ; RV64I-NEXT: lbu a5, 5(a0) +; RV64I-NEXT: lbu a7, 3(a0) +; RV64I-NEXT: or a4, a6, a4 ; RV64I-NEXT: lbu a6, 4(a0) -; RV64I-NEXT: lbu a7, 6(a0) -; RV64I-NEXT: lbu a0, 7(a0) ; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: lbu t0, 6(a0) +; RV64I-NEXT: lbu a0, 7(a0) ; RV64I-NEXT: or a5, a5, a6 -; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli a7, a7, 24 +; RV64I-NEXT: slli t0, t0, 16 ; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, a7 +; RV64I-NEXT: or a5, t0, a5 ; RV64I-NEXT: or a0, a0, a5 ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: or a0, a0, a4 +; RV64I-NEXT: or a0, a0, a7 ; RV64I-NEXT: srl a0, a0, a1 ; RV64I-NEXT: xori a4, a1, 63 ; RV64I-NEXT: slli a5, a3, 1 @@ -1326,8 +1326,8 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a3, a3, a4 ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli t0, t0, 24 -; RV32I-NEXT: or a4, t0, a5 -; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: or a3, a5, a3 +; RV32I-NEXT: or a3, t0, a3 ; RV32I-NEXT: lbu a4, 9(a0) ; RV32I-NEXT: lbu a5, 8(a0) ; RV32I-NEXT: lbu a6, 10(a0) @@ -1336,8 +1336,8 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a4, a4, a5 ; RV32I-NEXT: slli a6, a6, 16 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a5, a0, a6 -; RV32I-NEXT: or a5, a5, a4 +; RV32I-NEXT: or a5, a6, a4 +; RV32I-NEXT: or a5, a0, a5 ; RV32I-NEXT: lbu a0, 1(a1) ; RV32I-NEXT: lbu a4, 0(a1) ; RV32I-NEXT: lbu a6, 2(a1) @@ -1346,8 +1346,8 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a0, a0, a4 ; RV32I-NEXT: slli a6, a6, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a4, a1, a6 -; RV32I-NEXT: or a4, a4, a0 +; RV32I-NEXT: or a4, a6, a0 +; RV32I-NEXT: or a4, a1, a4 ; RV32I-NEXT: addi t3, a4, -64 ; RV32I-NEXT: addi t4, a4, -96 ; RV32I-NEXT: slli a6, a3, 1 @@ -1503,105 +1503,105 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: or a3, a3, a4 ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a6, a6, 24 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: or a5, a5, a3 +; RV64I-NEXT: or a3, a5, a3 +; RV64I-NEXT: or a4, a6, a3 ; RV64I-NEXT: lbu a3, 13(a0) -; RV64I-NEXT: lbu a4, 12(a0) +; RV64I-NEXT: lbu a5, 12(a0) ; RV64I-NEXT: lbu a6, 14(a0) ; RV64I-NEXT: lbu a7, 15(a0) ; RV64I-NEXT: slli a3, a3, 8 -; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: or a3, a3, a5 ; RV64I-NEXT: slli a6, a6, 16 ; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a4, a7, a6 -; RV64I-NEXT: or a6, a4, a3 +; RV64I-NEXT: or a3, a6, a3 +; RV64I-NEXT: or a5, a7, a3 ; RV64I-NEXT: lbu a3, 1(a0) -; RV64I-NEXT: lbu a4, 0(a0) +; RV64I-NEXT: lbu a6, 0(a0) ; RV64I-NEXT: lbu a7, 2(a0) ; RV64I-NEXT: lbu t0, 3(a0) ; RV64I-NEXT: slli a3, a3, 8 -; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: or a3, a3, a6 ; RV64I-NEXT: slli a7, a7, 16 ; RV64I-NEXT: slli t0, t0, 24 -; RV64I-NEXT: or a4, t0, a7 -; RV64I-NEXT: or a7, a4, a3 +; RV64I-NEXT: or a3, a7, a3 +; RV64I-NEXT: or a7, t0, a3 ; RV64I-NEXT: lbu a3, 5(a0) -; RV64I-NEXT: lbu a4, 4(a0) +; RV64I-NEXT: lbu a6, 4(a0) ; RV64I-NEXT: lbu t0, 6(a0) ; RV64I-NEXT: lbu t1, 7(a0) ; RV64I-NEXT: slli a3, a3, 8 -; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: or a3, a3, a6 ; RV64I-NEXT: slli t0, t0, 16 ; RV64I-NEXT: slli t1, t1, 24 -; RV64I-NEXT: or a4, t1, t0 -; RV64I-NEXT: or t0, a4, a3 -; RV64I-NEXT: lbu a3, 25(a0) -; RV64I-NEXT: lbu a4, 24(a0) -; RV64I-NEXT: lbu t1, 26(a0) -; RV64I-NEXT: lbu t2, 27(a0) -; RV64I-NEXT: slli a3, a3, 8 -; RV64I-NEXT: or a3, a3, a4 -; RV64I-NEXT: slli t1, t1, 16 -; RV64I-NEXT: slli t2, t2, 24 -; RV64I-NEXT: or a4, t2, t1 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 29(a0) -; RV64I-NEXT: lbu t1, 28(a0) -; RV64I-NEXT: lbu t2, 30(a0) -; RV64I-NEXT: lbu t3, 31(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, t1 +; RV64I-NEXT: or a3, t0, a3 +; RV64I-NEXT: lbu a6, 25(a0) +; RV64I-NEXT: lbu t0, 24(a0) +; RV64I-NEXT: lbu t2, 26(a0) +; RV64I-NEXT: or t1, t1, a3 +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a3, a6, t0 ; RV64I-NEXT: slli t2, t2, 16 -; RV64I-NEXT: slli t3, t3, 24 -; RV64I-NEXT: or t1, t3, t2 -; RV64I-NEXT: or a4, t1, a4 -; RV64I-NEXT: slli a4, a4, 32 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 17(a0) -; RV64I-NEXT: lbu t1, 16(a0) -; RV64I-NEXT: lbu t2, 18(a0) +; RV64I-NEXT: lbu a6, 29(a0) +; RV64I-NEXT: lbu t0, 27(a0) +; RV64I-NEXT: or a3, t2, a3 +; RV64I-NEXT: lbu t2, 28(a0) +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: lbu t3, 30(a0) +; RV64I-NEXT: lbu t4, 31(a0) +; RV64I-NEXT: or a6, a6, t2 +; RV64I-NEXT: slli t0, t0, 24 +; RV64I-NEXT: slli t3, t3, 16 +; RV64I-NEXT: slli t4, t4, 24 +; RV64I-NEXT: or a6, t3, a6 +; RV64I-NEXT: or a6, t4, a6 +; RV64I-NEXT: slli a6, a6, 32 +; RV64I-NEXT: or a3, a6, a3 +; RV64I-NEXT: lbu a6, 17(a0) +; RV64I-NEXT: lbu t2, 16(a0) +; RV64I-NEXT: lbu t3, 18(a0) +; RV64I-NEXT: or a3, a3, t0 +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a6, a6, t2 +; RV64I-NEXT: slli t3, t3, 16 +; RV64I-NEXT: lbu t0, 21(a0) +; RV64I-NEXT: lbu t2, 20(a0) +; RV64I-NEXT: or a6, t3, a6 ; RV64I-NEXT: lbu t3, 19(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, t1 -; RV64I-NEXT: slli t2, t2, 16 +; RV64I-NEXT: slli t0, t0, 8 +; RV64I-NEXT: or t0, t0, t2 +; RV64I-NEXT: lbu t2, 22(a0) +; RV64I-NEXT: lbu t4, 23(a0) +; RV64I-NEXT: slli a0, a5, 32 ; RV64I-NEXT: slli t3, t3, 24 -; RV64I-NEXT: or t1, t3, t2 -; RV64I-NEXT: or a4, t1, a4 -; RV64I-NEXT: lbu t1, 21(a0) -; RV64I-NEXT: lbu t2, 20(a0) -; RV64I-NEXT: lbu t3, 22(a0) -; RV64I-NEXT: lbu a0, 23(a0) -; RV64I-NEXT: slli t1, t1, 8 -; RV64I-NEXT: or t1, t1, t2 -; RV64I-NEXT: slli t3, t3, 16 -; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, t3 -; RV64I-NEXT: or a0, a0, t1 -; RV64I-NEXT: slli a0, a0, 32 -; RV64I-NEXT: or a4, a0, a4 -; RV64I-NEXT: lbu a0, 1(a1) -; RV64I-NEXT: lbu t1, 0(a1) +; RV64I-NEXT: slli t2, t2, 16 +; RV64I-NEXT: slli t4, t4, 24 +; RV64I-NEXT: or a5, t2, t0 +; RV64I-NEXT: or a5, t4, a5 +; RV64I-NEXT: slli a5, a5, 32 +; RV64I-NEXT: or a5, a5, a6 +; RV64I-NEXT: lbu a6, 1(a1) +; RV64I-NEXT: lbu t0, 0(a1) ; RV64I-NEXT: lbu t2, 2(a1) -; RV64I-NEXT: lbu t3, 3(a1) -; RV64I-NEXT: slli a0, a0, 8 -; RV64I-NEXT: or a0, a0, t1 +; RV64I-NEXT: or a5, a5, t3 +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a6, a6, t0 ; RV64I-NEXT: slli t2, t2, 16 -; RV64I-NEXT: slli t3, t3, 24 -; RV64I-NEXT: lbu t1, 5(a1) -; RV64I-NEXT: lbu t4, 4(a1) -; RV64I-NEXT: or t2, t3, t2 -; RV64I-NEXT: or t2, t2, a0 -; RV64I-NEXT: slli t1, t1, 8 -; RV64I-NEXT: or t1, t1, t4 +; RV64I-NEXT: lbu t0, 5(a1) +; RV64I-NEXT: lbu t3, 4(a1) +; RV64I-NEXT: or a6, t2, a6 +; RV64I-NEXT: lbu t2, 3(a1) +; RV64I-NEXT: slli t0, t0, 8 +; RV64I-NEXT: or t0, t0, t3 ; RV64I-NEXT: lbu t3, 6(a1) ; RV64I-NEXT: lbu t4, 7(a1) -; RV64I-NEXT: slli a0, a6, 32 -; RV64I-NEXT: slli a1, t0, 32 +; RV64I-NEXT: slli a1, t1, 32 +; RV64I-NEXT: slli t2, t2, 24 ; RV64I-NEXT: slli t3, t3, 16 ; RV64I-NEXT: slli t4, t4, 24 -; RV64I-NEXT: or a6, t4, t3 -; RV64I-NEXT: or a6, a6, t1 -; RV64I-NEXT: slli a6, a6, 32 +; RV64I-NEXT: or t0, t3, t0 +; RV64I-NEXT: or t0, t4, t0 +; RV64I-NEXT: slli t0, t0, 32 +; RV64I-NEXT: or a6, t0, a6 ; RV64I-NEXT: or a6, a6, t2 ; RV64I-NEXT: addiw t1, a6, -128 ; RV64I-NEXT: addi t2, a6, -192 @@ -1611,15 +1611,15 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: srl t3, a3, t2 ; RV64I-NEXT: j .LBB9_3 ; RV64I-NEXT: .LBB9_2: -; RV64I-NEXT: srl t3, a4, t1 +; RV64I-NEXT: srl t3, a5, t1 ; RV64I-NEXT: xori t4, t1, 63 ; RV64I-NEXT: sll t4, t0, t4 ; RV64I-NEXT: or t3, t3, t4 ; RV64I-NEXT: .LBB9_3: -; RV64I-NEXT: or a0, a0, a5 +; RV64I-NEXT: or a0, a0, a4 ; RV64I-NEXT: or a1, a1, a7 ; RV64I-NEXT: addi a7, a6, -64 -; RV64I-NEXT: xori a5, a6, 63 +; RV64I-NEXT: xori a4, a6, 63 ; RV64I-NEXT: bltz a7, .LBB9_5 ; RV64I-NEXT: # %bb.4: ; RV64I-NEXT: srl s1, a0, a7 @@ -1627,11 +1627,11 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: .LBB9_5: ; RV64I-NEXT: srl t4, a1, a6 ; RV64I-NEXT: slli t5, a0, 1 -; RV64I-NEXT: sll t5, t5, a5 +; RV64I-NEXT: sll t5, t5, a4 ; RV64I-NEXT: or s1, t4, t5 ; RV64I-NEXT: .LBB9_6: ; RV64I-NEXT: negw t6, a6 -; RV64I-NEXT: sll t4, a4, t6 +; RV64I-NEXT: sll t4, a5, t6 ; RV64I-NEXT: li s0, 64 ; RV64I-NEXT: li t5, 128 ; RV64I-NEXT: sub s0, s0, a6 @@ -1642,7 +1642,7 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: bgez s0, .LBB9_10 ; RV64I-NEXT: .LBB9_9: ; RV64I-NEXT: sll t3, a3, t6 -; RV64I-NEXT: srli t4, a4, 1 +; RV64I-NEXT: srli t4, a5, 1 ; RV64I-NEXT: subw t6, t5, a6 ; RV64I-NEXT: xori t6, t6, 63 ; RV64I-NEXT: srl t4, t4, t6 @@ -1681,9 +1681,9 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: srl a4, a3, a7 ; RV64I-NEXT: j .LBB9_19 ; RV64I-NEXT: .LBB9_18: -; RV64I-NEXT: srl a4, a4, a6 -; RV64I-NEXT: sll a5, t0, a5 -; RV64I-NEXT: or a4, a4, a5 +; RV64I-NEXT: srl a5, a5, a6 +; RV64I-NEXT: sll a4, t0, a4 +; RV64I-NEXT: or a4, a5, a4 ; RV64I-NEXT: .LBB9_19: ; RV64I-NEXT: sltiu a5, a6, 128 ; RV64I-NEXT: neg a5, a5 @@ -1796,8 +1796,8 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a3, a3, a4 ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli s7, s7, 24 -; RV32I-NEXT: or a4, s7, a5 -; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: or a3, a5, a3 +; RV32I-NEXT: or a3, s7, a3 ; RV32I-NEXT: lbu a4, 17(a0) ; RV32I-NEXT: lbu a5, 16(a0) ; RV32I-NEXT: lbu s8, 18(a0) @@ -1815,8 +1815,8 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a4, a4, a5 ; RV32I-NEXT: slli s8, s8, 16 ; RV32I-NEXT: slli s10, s10, 24 -; RV32I-NEXT: or a5, s10, s8 -; RV32I-NEXT: or a4, a5, a4 +; RV32I-NEXT: or a4, s8, a4 +; RV32I-NEXT: or a4, s10, a4 ; RV32I-NEXT: lbu a5, 25(a0) ; RV32I-NEXT: lbu s8, 24(a0) ; RV32I-NEXT: lbu s10, 26(a0) @@ -1825,7 +1825,7 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a5, a5, s8 ; RV32I-NEXT: slli s10, s10, 16 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, s10 +; RV32I-NEXT: or a5, s10, a5 ; RV32I-NEXT: or ra, a0, a5 ; RV32I-NEXT: lbu a0, 1(a1) ; RV32I-NEXT: lbu a5, 0(a1) @@ -1835,7 +1835,7 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a0, a0, a5 ; RV32I-NEXT: slli s8, s8, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, s8 +; RV32I-NEXT: or a0, s8, a0 ; RV32I-NEXT: or a0, a1, a0 ; RV32I-NEXT: addi a5, a0, -192 ; RV32I-NEXT: addi a1, a0, -224 @@ -2376,105 +2376,105 @@ define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: or a3, a3, a4 ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a6, a6, 24 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: or a5, a5, a3 +; RV64I-NEXT: or a3, a5, a3 +; RV64I-NEXT: or a4, a6, a3 ; RV64I-NEXT: lbu a3, 21(a0) -; RV64I-NEXT: lbu a4, 20(a0) +; RV64I-NEXT: lbu a5, 20(a0) ; RV64I-NEXT: lbu a6, 22(a0) ; RV64I-NEXT: lbu a7, 23(a0) ; RV64I-NEXT: slli a3, a3, 8 -; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: or a3, a3, a5 ; RV64I-NEXT: slli a6, a6, 16 ; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a4, a7, a6 -; RV64I-NEXT: or a6, a4, a3 +; RV64I-NEXT: or a3, a6, a3 +; RV64I-NEXT: or a5, a7, a3 ; RV64I-NEXT: lbu a3, 25(a0) -; RV64I-NEXT: lbu a4, 24(a0) +; RV64I-NEXT: lbu a6, 24(a0) ; RV64I-NEXT: lbu a7, 26(a0) ; RV64I-NEXT: lbu t0, 27(a0) ; RV64I-NEXT: slli a3, a3, 8 -; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: or a3, a3, a6 ; RV64I-NEXT: slli a7, a7, 16 ; RV64I-NEXT: slli t0, t0, 24 -; RV64I-NEXT: or a4, t0, a7 -; RV64I-NEXT: or a7, a4, a3 +; RV64I-NEXT: or a3, a7, a3 +; RV64I-NEXT: or a7, t0, a3 ; RV64I-NEXT: lbu a3, 29(a0) -; RV64I-NEXT: lbu a4, 28(a0) +; RV64I-NEXT: lbu a6, 28(a0) ; RV64I-NEXT: lbu t0, 30(a0) ; RV64I-NEXT: lbu t1, 31(a0) ; RV64I-NEXT: slli a3, a3, 8 -; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: or a3, a3, a6 ; RV64I-NEXT: slli t0, t0, 16 ; RV64I-NEXT: slli t1, t1, 24 -; RV64I-NEXT: or a4, t1, t0 -; RV64I-NEXT: or t0, a4, a3 -; RV64I-NEXT: lbu a3, 1(a0) -; RV64I-NEXT: lbu a4, 0(a0) -; RV64I-NEXT: lbu t1, 2(a0) -; RV64I-NEXT: lbu t2, 3(a0) -; RV64I-NEXT: slli a3, a3, 8 -; RV64I-NEXT: or a3, a3, a4 -; RV64I-NEXT: slli t1, t1, 16 -; RV64I-NEXT: slli t2, t2, 24 -; RV64I-NEXT: or a4, t2, t1 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 5(a0) -; RV64I-NEXT: lbu t1, 4(a0) -; RV64I-NEXT: lbu t2, 6(a0) -; RV64I-NEXT: lbu t3, 7(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, t1 +; RV64I-NEXT: or a3, t0, a3 +; RV64I-NEXT: lbu a6, 1(a0) +; RV64I-NEXT: lbu t0, 0(a0) +; RV64I-NEXT: lbu t2, 2(a0) +; RV64I-NEXT: or t1, t1, a3 +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a3, a6, t0 ; RV64I-NEXT: slli t2, t2, 16 -; RV64I-NEXT: slli t3, t3, 24 -; RV64I-NEXT: or t1, t3, t2 -; RV64I-NEXT: or a4, t1, a4 -; RV64I-NEXT: slli a4, a4, 32 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 9(a0) -; RV64I-NEXT: lbu t1, 8(a0) -; RV64I-NEXT: lbu t2, 10(a0) +; RV64I-NEXT: lbu a6, 5(a0) +; RV64I-NEXT: lbu t0, 3(a0) +; RV64I-NEXT: or a3, t2, a3 +; RV64I-NEXT: lbu t2, 4(a0) +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: lbu t3, 6(a0) +; RV64I-NEXT: lbu t4, 7(a0) +; RV64I-NEXT: or a6, a6, t2 +; RV64I-NEXT: slli t0, t0, 24 +; RV64I-NEXT: slli t3, t3, 16 +; RV64I-NEXT: slli t4, t4, 24 +; RV64I-NEXT: or a6, t3, a6 +; RV64I-NEXT: or a6, t4, a6 +; RV64I-NEXT: slli a6, a6, 32 +; RV64I-NEXT: or a3, a6, a3 +; RV64I-NEXT: lbu a6, 9(a0) +; RV64I-NEXT: lbu t2, 8(a0) +; RV64I-NEXT: lbu t3, 10(a0) +; RV64I-NEXT: or a3, a3, t0 +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a6, a6, t2 +; RV64I-NEXT: slli t3, t3, 16 +; RV64I-NEXT: lbu t0, 13(a0) +; RV64I-NEXT: lbu t2, 12(a0) +; RV64I-NEXT: or a6, t3, a6 ; RV64I-NEXT: lbu t3, 11(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, t1 -; RV64I-NEXT: slli t2, t2, 16 +; RV64I-NEXT: slli t0, t0, 8 +; RV64I-NEXT: or t0, t0, t2 +; RV64I-NEXT: lbu t2, 14(a0) +; RV64I-NEXT: lbu t4, 15(a0) +; RV64I-NEXT: slli a0, a5, 32 ; RV64I-NEXT: slli t3, t3, 24 -; RV64I-NEXT: or t1, t3, t2 -; RV64I-NEXT: or a4, t1, a4 -; RV64I-NEXT: lbu t1, 13(a0) -; RV64I-NEXT: lbu t2, 12(a0) -; RV64I-NEXT: lbu t3, 14(a0) -; RV64I-NEXT: lbu a0, 15(a0) -; RV64I-NEXT: slli t1, t1, 8 -; RV64I-NEXT: or t1, t1, t2 -; RV64I-NEXT: slli t3, t3, 16 -; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, t3 -; RV64I-NEXT: or a0, a0, t1 -; RV64I-NEXT: slli a0, a0, 32 -; RV64I-NEXT: or a4, a0, a4 -; RV64I-NEXT: lbu a0, 1(a1) -; RV64I-NEXT: lbu t1, 0(a1) +; RV64I-NEXT: slli t2, t2, 16 +; RV64I-NEXT: slli t4, t4, 24 +; RV64I-NEXT: or a5, t2, t0 +; RV64I-NEXT: or a5, t4, a5 +; RV64I-NEXT: slli a5, a5, 32 +; RV64I-NEXT: or a5, a5, a6 +; RV64I-NEXT: lbu a6, 1(a1) +; RV64I-NEXT: lbu t0, 0(a1) ; RV64I-NEXT: lbu t2, 2(a1) -; RV64I-NEXT: lbu t3, 3(a1) -; RV64I-NEXT: slli a0, a0, 8 -; RV64I-NEXT: or a0, a0, t1 +; RV64I-NEXT: or a5, a5, t3 +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a6, a6, t0 ; RV64I-NEXT: slli t2, t2, 16 -; RV64I-NEXT: slli t3, t3, 24 -; RV64I-NEXT: lbu t1, 5(a1) -; RV64I-NEXT: lbu t4, 4(a1) -; RV64I-NEXT: or t2, t3, t2 -; RV64I-NEXT: or t2, t2, a0 -; RV64I-NEXT: slli t1, t1, 8 -; RV64I-NEXT: or t1, t1, t4 +; RV64I-NEXT: lbu t0, 5(a1) +; RV64I-NEXT: lbu t3, 4(a1) +; RV64I-NEXT: or a6, t2, a6 +; RV64I-NEXT: lbu t2, 3(a1) +; RV64I-NEXT: slli t0, t0, 8 +; RV64I-NEXT: or t0, t0, t3 ; RV64I-NEXT: lbu t3, 6(a1) ; RV64I-NEXT: lbu t4, 7(a1) -; RV64I-NEXT: slli a0, a6, 32 -; RV64I-NEXT: slli a1, t0, 32 +; RV64I-NEXT: slli a1, t1, 32 +; RV64I-NEXT: slli t2, t2, 24 ; RV64I-NEXT: slli t3, t3, 16 ; RV64I-NEXT: slli t4, t4, 24 -; RV64I-NEXT: or a6, t4, t3 -; RV64I-NEXT: or a6, a6, t1 -; RV64I-NEXT: slli a6, a6, 32 +; RV64I-NEXT: or t0, t3, t0 +; RV64I-NEXT: or t0, t4, t0 +; RV64I-NEXT: slli t0, t0, 32 +; RV64I-NEXT: or a6, t0, a6 ; RV64I-NEXT: or a6, a6, t2 ; RV64I-NEXT: addiw t1, a6, -128 ; RV64I-NEXT: addi t2, a6, -192 @@ -2484,15 +2484,15 @@ define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: sll t3, a3, t2 ; RV64I-NEXT: j .LBB10_3 ; RV64I-NEXT: .LBB10_2: -; RV64I-NEXT: sll t3, a4, t1 +; RV64I-NEXT: sll t3, a5, t1 ; RV64I-NEXT: xori t4, t1, 63 ; RV64I-NEXT: srl t4, t0, t4 ; RV64I-NEXT: or t3, t3, t4 ; RV64I-NEXT: .LBB10_3: -; RV64I-NEXT: or a0, a0, a5 +; RV64I-NEXT: or a0, a0, a4 ; RV64I-NEXT: or a1, a1, a7 ; RV64I-NEXT: addi a7, a6, -64 -; RV64I-NEXT: xori a5, a6, 63 +; RV64I-NEXT: xori a4, a6, 63 ; RV64I-NEXT: bltz a7, .LBB10_5 ; RV64I-NEXT: # %bb.4: ; RV64I-NEXT: sll s1, a0, a7 @@ -2500,11 +2500,11 @@ define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: .LBB10_5: ; RV64I-NEXT: sll t4, a1, a6 ; RV64I-NEXT: srli t5, a0, 1 -; RV64I-NEXT: srl t5, t5, a5 +; RV64I-NEXT: srl t5, t5, a4 ; RV64I-NEXT: or s1, t4, t5 ; RV64I-NEXT: .LBB10_6: ; RV64I-NEXT: negw t6, a6 -; RV64I-NEXT: srl t4, a4, t6 +; RV64I-NEXT: srl t4, a5, t6 ; RV64I-NEXT: li s0, 64 ; RV64I-NEXT: li t5, 128 ; RV64I-NEXT: sub s0, s0, a6 @@ -2515,7 +2515,7 @@ define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: bgez s0, .LBB10_10 ; RV64I-NEXT: .LBB10_9: ; RV64I-NEXT: srl t3, a3, t6 -; RV64I-NEXT: slli t4, a4, 1 +; RV64I-NEXT: slli t4, a5, 1 ; RV64I-NEXT: subw t6, t5, a6 ; RV64I-NEXT: xori t6, t6, 63 ; RV64I-NEXT: sll t4, t4, t6 @@ -2554,9 +2554,9 @@ define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: sll a4, a3, a7 ; RV64I-NEXT: j .LBB10_19 ; RV64I-NEXT: .LBB10_18: -; RV64I-NEXT: sll a4, a4, a6 -; RV64I-NEXT: srl a5, t0, a5 -; RV64I-NEXT: or a4, a4, a5 +; RV64I-NEXT: sll a5, a5, a6 +; RV64I-NEXT: srl a4, t0, a4 +; RV64I-NEXT: or a4, a5, a4 ; RV64I-NEXT: .LBB10_19: ; RV64I-NEXT: sltiu a5, a6, 128 ; RV64I-NEXT: neg a5, a5 @@ -2669,8 +2669,8 @@ define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a3, a3, a4 ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli s7, s7, 24 -; RV32I-NEXT: or a4, s7, a5 -; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: or a3, a5, a3 +; RV32I-NEXT: or a3, s7, a3 ; RV32I-NEXT: lbu a4, 13(a0) ; RV32I-NEXT: lbu a5, 12(a0) ; RV32I-NEXT: lbu s7, 14(a0) @@ -2688,8 +2688,8 @@ define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a4, a4, a5 ; RV32I-NEXT: slli s7, s7, 16 ; RV32I-NEXT: slli s10, s10, 24 -; RV32I-NEXT: or a5, s10, s7 -; RV32I-NEXT: or a4, a5, a4 +; RV32I-NEXT: or a4, s7, a4 +; RV32I-NEXT: or a4, s10, a4 ; RV32I-NEXT: lbu a5, 5(a0) ; RV32I-NEXT: lbu s7, 4(a0) ; RV32I-NEXT: lbu s10, 6(a0) @@ -2698,7 +2698,7 @@ define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a5, a5, s7 ; RV32I-NEXT: slli s10, s10, 16 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, s10 +; RV32I-NEXT: or a5, s10, a5 ; RV32I-NEXT: or s10, a0, a5 ; RV32I-NEXT: lbu a0, 1(a1) ; RV32I-NEXT: lbu a5, 0(a1) @@ -2708,7 +2708,7 @@ define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a0, a0, a5 ; RV32I-NEXT: slli s7, s7, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, s7 +; RV32I-NEXT: or a0, s7, a0 ; RV32I-NEXT: or a0, a1, a0 ; RV32I-NEXT: addi a5, a0, -192 ; RV32I-NEXT: addi a1, a0, -224 @@ -3254,8 +3254,8 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: or a3, a3, a4 ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a6, a6, 24 -; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a7, a4, a3 +; RV64I-NEXT: or a3, a5, a3 +; RV64I-NEXT: or a7, a6, a3 ; RV64I-NEXT: lbu a3, 13(a0) ; RV64I-NEXT: lbu a4, 12(a0) ; RV64I-NEXT: lbu a5, 14(a0) @@ -3264,95 +3264,95 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: or a3, a3, a4 ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a6, a6, 24 -; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a4, a4, a3 +; RV64I-NEXT: or a3, a5, a3 +; RV64I-NEXT: or a5, a6, a3 ; RV64I-NEXT: lbu a3, 1(a0) -; RV64I-NEXT: lbu a5, 0(a0) +; RV64I-NEXT: lbu a4, 0(a0) ; RV64I-NEXT: lbu a6, 2(a0) ; RV64I-NEXT: lbu t0, 3(a0) ; RV64I-NEXT: slli a3, a3, 8 -; RV64I-NEXT: or a3, a3, a5 +; RV64I-NEXT: or a3, a3, a4 ; RV64I-NEXT: slli a6, a6, 16 ; RV64I-NEXT: slli t0, t0, 24 -; RV64I-NEXT: or a5, t0, a6 -; RV64I-NEXT: or t1, a5, a3 +; RV64I-NEXT: or a3, a6, a3 +; RV64I-NEXT: or t1, t0, a3 ; RV64I-NEXT: lbu a3, 5(a0) -; RV64I-NEXT: lbu a5, 4(a0) +; RV64I-NEXT: lbu a4, 4(a0) ; RV64I-NEXT: lbu a6, 6(a0) ; RV64I-NEXT: lbu t0, 7(a0) ; RV64I-NEXT: slli a3, a3, 8 -; RV64I-NEXT: or a3, a3, a5 +; RV64I-NEXT: or a3, a3, a4 ; RV64I-NEXT: slli a6, a6, 16 ; RV64I-NEXT: slli t0, t0, 24 -; RV64I-NEXT: or a5, t0, a6 -; RV64I-NEXT: or t0, a5, a3 +; RV64I-NEXT: or a3, a6, a3 +; RV64I-NEXT: or a4, t0, a3 ; RV64I-NEXT: lbu a3, 25(a0) -; RV64I-NEXT: lbu a5, 24(a0) -; RV64I-NEXT: lbu a6, 26(a0) +; RV64I-NEXT: lbu a6, 24(a0) +; RV64I-NEXT: lbu t0, 26(a0) ; RV64I-NEXT: lbu t2, 27(a0) ; RV64I-NEXT: slli a3, a3, 8 -; RV64I-NEXT: or a3, a3, a5 -; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: or a3, a3, a6 +; RV64I-NEXT: slli t0, t0, 16 ; RV64I-NEXT: slli t2, t2, 24 -; RV64I-NEXT: or a5, t2, a6 -; RV64I-NEXT: or a3, a5, a3 -; RV64I-NEXT: lbu a5, 29(a0) -; RV64I-NEXT: lbu a6, 28(a0) -; RV64I-NEXT: lbu t2, 30(a0) -; RV64I-NEXT: lbu t3, 31(a0) -; RV64I-NEXT: slli a5, a5, 8 -; RV64I-NEXT: or a5, a5, a6 -; RV64I-NEXT: slli t2, t2, 16 -; RV64I-NEXT: slli t3, t3, 24 -; RV64I-NEXT: or a6, t3, t2 -; RV64I-NEXT: or a6, a6, a5 -; RV64I-NEXT: slli a5, a6, 32 -; RV64I-NEXT: or a3, a5, a3 -; RV64I-NEXT: lbu a5, 17(a0) -; RV64I-NEXT: lbu t2, 16(a0) -; RV64I-NEXT: lbu t3, 18(a0) -; RV64I-NEXT: lbu t4, 19(a0) -; RV64I-NEXT: slli a5, a5, 8 -; RV64I-NEXT: or a5, a5, t2 +; RV64I-NEXT: or a3, t0, a3 +; RV64I-NEXT: lbu a6, 29(a0) +; RV64I-NEXT: lbu t0, 28(a0) +; RV64I-NEXT: lbu t3, 30(a0) +; RV64I-NEXT: lbu t4, 31(a0) +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a6, a6, t0 ; RV64I-NEXT: slli t3, t3, 16 ; RV64I-NEXT: slli t4, t4, 24 -; RV64I-NEXT: or t2, t4, t3 -; RV64I-NEXT: or a5, t2, a5 +; RV64I-NEXT: or a6, t3, a6 +; RV64I-NEXT: or a6, t4, a6 +; RV64I-NEXT: slli t0, a6, 32 +; RV64I-NEXT: or a3, t0, a3 +; RV64I-NEXT: lbu t0, 17(a0) +; RV64I-NEXT: lbu t3, 16(a0) +; RV64I-NEXT: lbu t4, 18(a0) +; RV64I-NEXT: or a3, a3, t2 +; RV64I-NEXT: slli t0, t0, 8 +; RV64I-NEXT: or t0, t0, t3 +; RV64I-NEXT: slli t4, t4, 16 ; RV64I-NEXT: lbu t2, 21(a0) ; RV64I-NEXT: lbu t3, 20(a0) -; RV64I-NEXT: lbu t4, 22(a0) -; RV64I-NEXT: lbu a0, 23(a0) +; RV64I-NEXT: or t0, t4, t0 +; RV64I-NEXT: lbu t4, 19(a0) ; RV64I-NEXT: slli t2, t2, 8 ; RV64I-NEXT: or t2, t2, t3 -; RV64I-NEXT: slli t4, t4, 16 -; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, t4 -; RV64I-NEXT: or a0, a0, t2 -; RV64I-NEXT: slli a0, a0, 32 -; RV64I-NEXT: or a5, a0, a5 -; RV64I-NEXT: lbu a0, 1(a1) +; RV64I-NEXT: lbu t3, 22(a0) +; RV64I-NEXT: lbu t5, 23(a0) +; RV64I-NEXT: slli a0, a5, 32 +; RV64I-NEXT: slli t4, t4, 24 +; RV64I-NEXT: slli t3, t3, 16 +; RV64I-NEXT: slli t5, t5, 24 +; RV64I-NEXT: or a5, t3, t2 +; RV64I-NEXT: or a5, t5, a5 +; RV64I-NEXT: slli a5, a5, 32 +; RV64I-NEXT: or a5, a5, t0 +; RV64I-NEXT: lbu t0, 1(a1) ; RV64I-NEXT: lbu t2, 0(a1) ; RV64I-NEXT: lbu t3, 2(a1) -; RV64I-NEXT: lbu t4, 3(a1) -; RV64I-NEXT: slli a0, a0, 8 -; RV64I-NEXT: or a0, a0, t2 +; RV64I-NEXT: or a5, a5, t4 +; RV64I-NEXT: slli t0, t0, 8 +; RV64I-NEXT: or t0, t0, t2 ; RV64I-NEXT: slli t3, t3, 16 -; RV64I-NEXT: slli t4, t4, 24 ; RV64I-NEXT: lbu t2, 5(a1) -; RV64I-NEXT: lbu t5, 4(a1) -; RV64I-NEXT: or t3, t4, t3 -; RV64I-NEXT: or t3, t3, a0 +; RV64I-NEXT: lbu t4, 4(a1) +; RV64I-NEXT: or t0, t3, t0 +; RV64I-NEXT: lbu t3, 3(a1) ; RV64I-NEXT: slli t2, t2, 8 -; RV64I-NEXT: or t2, t2, t5 +; RV64I-NEXT: or t2, t2, t4 ; RV64I-NEXT: lbu t4, 6(a1) ; RV64I-NEXT: lbu t5, 7(a1) -; RV64I-NEXT: slli a0, a4, 32 -; RV64I-NEXT: slli a1, t0, 32 +; RV64I-NEXT: slli a1, a4, 32 +; RV64I-NEXT: slli t3, t3, 24 ; RV64I-NEXT: slli t4, t4, 16 ; RV64I-NEXT: slli t5, t5, 24 -; RV64I-NEXT: or a4, t5, t4 -; RV64I-NEXT: or a4, a4, t2 +; RV64I-NEXT: or a4, t4, t2 +; RV64I-NEXT: or a4, t5, a4 ; RV64I-NEXT: slli a4, a4, 32 +; RV64I-NEXT: or a4, a4, t0 ; RV64I-NEXT: or a4, a4, t3 ; RV64I-NEXT: addiw t3, a4, -128 ; RV64I-NEXT: addi t4, a4, -192 @@ -3561,8 +3561,8 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a3, a3, a4 ; RV32I-NEXT: slli a6, a6, 16 ; RV32I-NEXT: slli t5, t5, 24 -; RV32I-NEXT: or a4, t5, a6 -; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: or a3, a6, a3 +; RV32I-NEXT: or a3, t5, a3 ; RV32I-NEXT: lbu a4, 17(a0) ; RV32I-NEXT: lbu a6, 16(a0) ; RV32I-NEXT: lbu t5, 18(a0) @@ -3580,8 +3580,8 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a4, a4, t5 ; RV32I-NEXT: slli t5, s4, 16 ; RV32I-NEXT: slli s4, s10, 24 -; RV32I-NEXT: or t5, s4, t5 ; RV32I-NEXT: or a4, t5, a4 +; RV32I-NEXT: or a4, s4, a4 ; RV32I-NEXT: lbu t5, 25(a0) ; RV32I-NEXT: lbu s10, 24(a0) ; RV32I-NEXT: lbu s11, 26(a0) @@ -3590,7 +3590,7 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or t5, t5, s10 ; RV32I-NEXT: slli s11, s11, 16 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, s11 +; RV32I-NEXT: or t5, s11, t5 ; RV32I-NEXT: or s11, a0, t5 ; RV32I-NEXT: lbu a0, 1(a1) ; RV32I-NEXT: lbu t5, 0(a1) @@ -3600,7 +3600,7 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a0, a0, t5 ; RV32I-NEXT: slli s10, s10, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, s10 +; RV32I-NEXT: or a0, s10, a0 ; RV32I-NEXT: or a1, a1, a0 ; RV32I-NEXT: addi t5, a1, -192 ; RV32I-NEXT: addi a0, a1, -224 diff --git a/llvm/test/CodeGen/RISCV/xaluo.ll b/llvm/test/CodeGen/RISCV/xaluo.ll index ec58aab..362229b 100644 --- a/llvm/test/CodeGen/RISCV/xaluo.ll +++ b/llvm/test/CodeGen/RISCV/xaluo.ll @@ -187,10 +187,10 @@ entry: define zeroext i1 @saddo1.i64(i64 %v1, i64 %v2, ptr %res) { ; RV32-LABEL: saddo1.i64: ; RV32: # %bb.0: # %entry -; RV32-NEXT: add a5, a1, a3 ; RV32-NEXT: add a2, a0, a2 ; RV32-NEXT: sltu a0, a2, a0 -; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: add a0, a3, a0 +; RV32-NEXT: add a5, a1, a0 ; RV32-NEXT: xor a0, a1, a5 ; RV32-NEXT: xor a1, a1, a3 ; RV32-NEXT: not a1, a1 @@ -211,10 +211,10 @@ define zeroext i1 @saddo1.i64(i64 %v1, i64 %v2, ptr %res) { ; ; RV32ZBA-LABEL: saddo1.i64: ; RV32ZBA: # %bb.0: # %entry -; RV32ZBA-NEXT: add a5, a1, a3 ; RV32ZBA-NEXT: add a2, a0, a2 ; RV32ZBA-NEXT: sltu a0, a2, a0 -; RV32ZBA-NEXT: add a5, a5, a0 +; RV32ZBA-NEXT: add a0, a3, a0 +; RV32ZBA-NEXT: add a5, a1, a0 ; RV32ZBA-NEXT: xor a0, a1, a5 ; RV32ZBA-NEXT: xor a1, a1, a3 ; RV32ZBA-NEXT: not a1, a1 @@ -449,10 +449,10 @@ entry: define zeroext i1 @uaddo.i64(i64 %v1, i64 %v2, ptr %res) { ; RV32-LABEL: uaddo.i64: ; RV32: # %bb.0: # %entry -; RV32-NEXT: add a3, a1, a3 ; RV32-NEXT: add a2, a0, a2 ; RV32-NEXT: sltu a0, a2, a0 ; RV32-NEXT: add a3, a3, a0 +; RV32-NEXT: add a3, a1, a3 ; RV32-NEXT: beq a3, a1, .LBB10_2 ; RV32-NEXT: # %bb.1: # %entry ; RV32-NEXT: sltu a0, a3, a1 @@ -470,10 +470,10 @@ define zeroext i1 @uaddo.i64(i64 %v1, i64 %v2, ptr %res) { ; ; RV32ZBA-LABEL: uaddo.i64: ; RV32ZBA: # %bb.0: # %entry -; RV32ZBA-NEXT: add a3, a1, a3 ; RV32ZBA-NEXT: add a2, a0, a2 ; RV32ZBA-NEXT: sltu a0, a2, a0 ; RV32ZBA-NEXT: add a3, a3, a0 +; RV32ZBA-NEXT: add a3, a1, a3 ; RV32ZBA-NEXT: beq a3, a1, .LBB10_2 ; RV32ZBA-NEXT: # %bb.1: # %entry ; RV32ZBA-NEXT: sltu a0, a3, a1 @@ -634,8 +634,8 @@ define zeroext i1 @ssubo.i64(i64 %v1, i64 %v2, ptr %res) { ; RV32-LABEL: ssubo.i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: sltu a5, a0, a2 -; RV32-NEXT: sub a6, a1, a3 -; RV32-NEXT: sub a5, a6, a5 +; RV32-NEXT: add a5, a3, a5 +; RV32-NEXT: sub a5, a1, a5 ; RV32-NEXT: xor a6, a1, a5 ; RV32-NEXT: xor a1, a1, a3 ; RV32-NEXT: and a1, a1, a6 @@ -658,8 +658,8 @@ define zeroext i1 @ssubo.i64(i64 %v1, i64 %v2, ptr %res) { ; RV32ZBA-LABEL: ssubo.i64: ; RV32ZBA: # %bb.0: # %entry ; RV32ZBA-NEXT: sltu a5, a0, a2 -; RV32ZBA-NEXT: sub a6, a1, a3 -; RV32ZBA-NEXT: sub a5, a6, a5 +; RV32ZBA-NEXT: add a5, a3, a5 +; RV32ZBA-NEXT: sub a5, a1, a5 ; RV32ZBA-NEXT: xor a6, a1, a5 ; RV32ZBA-NEXT: xor a1, a1, a3 ; RV32ZBA-NEXT: and a1, a1, a6 @@ -806,8 +806,8 @@ define zeroext i1 @usubo.i64(i64 %v1, i64 %v2, ptr %res) { ; RV32-LABEL: usubo.i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: sltu a5, a0, a2 +; RV32-NEXT: add a3, a3, a5 ; RV32-NEXT: sub a3, a1, a3 -; RV32-NEXT: sub a3, a3, a5 ; RV32-NEXT: sub a2, a0, a2 ; RV32-NEXT: beq a3, a1, .LBB18_2 ; RV32-NEXT: # %bb.1: # %entry @@ -830,8 +830,8 @@ define zeroext i1 @usubo.i64(i64 %v1, i64 %v2, ptr %res) { ; RV32ZBA-LABEL: usubo.i64: ; RV32ZBA: # %bb.0: # %entry ; RV32ZBA-NEXT: sltu a5, a0, a2 +; RV32ZBA-NEXT: add a3, a3, a5 ; RV32ZBA-NEXT: sub a3, a1, a3 -; RV32ZBA-NEXT: sub a3, a3, a5 ; RV32ZBA-NEXT: sub a2, a0, a2 ; RV32ZBA-NEXT: beq a3, a1, .LBB18_2 ; RV32ZBA-NEXT: # %bb.1: # %entry @@ -987,21 +987,21 @@ define zeroext i1 @smulo.i64(i64 %v1, i64 %v2, ptr %res) { ; RV32-NEXT: sltu t0, t1, t0 ; RV32-NEXT: sltu a6, a7, a6 ; RV32-NEXT: mulhu a7, a1, a3 -; RV32-NEXT: add a6, a7, a6 ; RV32-NEXT: add a6, a6, t0 -; RV32-NEXT: mulhu a7, a2, t2 -; RV32-NEXT: add a7, a7, t3 +; RV32-NEXT: mulhu t0, a2, t2 ; RV32-NEXT: mul a3, a3, t2 -; RV32-NEXT: add a3, a7, a3 +; RV32-NEXT: add a3, t3, a3 +; RV32-NEXT: add a3, t0, a3 ; RV32-NEXT: mul a1, t4, a1 -; RV32-NEXT: mulhu a7, t4, a0 -; RV32-NEXT: add a1, a7, a1 +; RV32-NEXT: mulhu t0, t4, a0 ; RV32-NEXT: add a1, a1, t5 ; RV32-NEXT: add a1, a1, a3 ; RV32-NEXT: sltu a3, t6, t5 ; RV32-NEXT: add a1, a1, a3 +; RV32-NEXT: add a1, t0, a1 ; RV32-NEXT: add a1, a6, a1 ; RV32-NEXT: add a1, a1, s1 +; RV32-NEXT: add a1, a7, a1 ; RV32-NEXT: srai a3, a5, 31 ; RV32-NEXT: xor a1, a1, a3 ; RV32-NEXT: xor a3, s0, a3 @@ -1058,21 +1058,21 @@ define zeroext i1 @smulo.i64(i64 %v1, i64 %v2, ptr %res) { ; RV32ZBA-NEXT: sltu t0, t1, t0 ; RV32ZBA-NEXT: sltu a6, a7, a6 ; RV32ZBA-NEXT: mulhu a7, a1, a3 -; RV32ZBA-NEXT: add a6, a7, a6 ; RV32ZBA-NEXT: add a6, a6, t0 -; RV32ZBA-NEXT: mulhu a7, a2, t2 -; RV32ZBA-NEXT: add a7, a7, t3 +; RV32ZBA-NEXT: mulhu t0, a2, t2 ; RV32ZBA-NEXT: mul a3, a3, t2 -; RV32ZBA-NEXT: add a3, a7, a3 +; RV32ZBA-NEXT: add a3, t3, a3 +; RV32ZBA-NEXT: add a3, t0, a3 ; RV32ZBA-NEXT: mul a1, t4, a1 -; RV32ZBA-NEXT: mulhu a7, t4, a0 -; RV32ZBA-NEXT: add a1, a7, a1 +; RV32ZBA-NEXT: mulhu t0, t4, a0 ; RV32ZBA-NEXT: add a1, a1, t5 ; RV32ZBA-NEXT: add a1, a1, a3 ; RV32ZBA-NEXT: sltu a3, t6, t5 ; RV32ZBA-NEXT: add a1, a1, a3 +; RV32ZBA-NEXT: add a1, t0, a1 ; RV32ZBA-NEXT: add a1, a6, a1 ; RV32ZBA-NEXT: add a1, a1, s1 +; RV32ZBA-NEXT: add a1, a7, a1 ; RV32ZBA-NEXT: srai a3, a5, 31 ; RV32ZBA-NEXT: xor a1, a1, a3 ; RV32ZBA-NEXT: xor a3, s0, a3 @@ -1335,20 +1335,20 @@ define zeroext i1 @umulo.i64(i64 %v1, i64 %v2, ptr %res) { ; RV32: # %bb.0: # %entry ; RV32-NEXT: mul a5, a3, a0 ; RV32-NEXT: mul a6, a1, a2 -; RV32-NEXT: add a5, a6, a5 -; RV32-NEXT: mulhu a6, a0, a2 -; RV32-NEXT: add a5, a6, a5 -; RV32-NEXT: sltu a6, a5, a6 +; RV32-NEXT: mulhu a7, a0, a2 +; RV32-NEXT: add a5, a7, a5 +; RV32-NEXT: add a5, a5, a6 +; RV32-NEXT: sltu a6, a5, a7 ; RV32-NEXT: snez a7, a3 ; RV32-NEXT: snez t0, a1 ; RV32-NEXT: and a7, t0, a7 ; RV32-NEXT: mulhu a1, a1, a2 ; RV32-NEXT: snez a1, a1 -; RV32-NEXT: or a1, a7, a1 ; RV32-NEXT: mulhu a3, a3, a0 ; RV32-NEXT: snez a3, a3 ; RV32-NEXT: or a1, a1, a3 ; RV32-NEXT: or a1, a1, a6 +; RV32-NEXT: or a1, a7, a1 ; RV32-NEXT: mul a0, a0, a2 ; RV32-NEXT: sw a0, 0(a4) ; RV32-NEXT: sw a5, 4(a4) @@ -1368,20 +1368,20 @@ define zeroext i1 @umulo.i64(i64 %v1, i64 %v2, ptr %res) { ; RV32ZBA: # %bb.0: # %entry ; RV32ZBA-NEXT: mul a5, a3, a0 ; RV32ZBA-NEXT: mul a6, a1, a2 -; RV32ZBA-NEXT: add a5, a6, a5 -; RV32ZBA-NEXT: mulhu a6, a0, a2 -; RV32ZBA-NEXT: add a5, a6, a5 -; RV32ZBA-NEXT: sltu a6, a5, a6 +; RV32ZBA-NEXT: mulhu a7, a0, a2 +; RV32ZBA-NEXT: add a5, a7, a5 +; RV32ZBA-NEXT: add a5, a5, a6 +; RV32ZBA-NEXT: sltu a6, a5, a7 ; RV32ZBA-NEXT: snez a7, a3 ; RV32ZBA-NEXT: snez t0, a1 ; RV32ZBA-NEXT: and a7, t0, a7 ; RV32ZBA-NEXT: mulhu a1, a1, a2 ; RV32ZBA-NEXT: snez a1, a1 -; RV32ZBA-NEXT: or a1, a7, a1 ; RV32ZBA-NEXT: mulhu a3, a3, a0 ; RV32ZBA-NEXT: snez a3, a3 ; RV32ZBA-NEXT: or a1, a1, a3 ; RV32ZBA-NEXT: or a1, a1, a6 +; RV32ZBA-NEXT: or a1, a7, a1 ; RV32ZBA-NEXT: mul a0, a0, a2 ; RV32ZBA-NEXT: sw a0, 0(a4) ; RV32ZBA-NEXT: sw a5, 4(a4) @@ -1561,10 +1561,10 @@ entry: define i64 @saddo.select.i64(i64 %v1, i64 %v2) { ; RV32-LABEL: saddo.select.i64: ; RV32: # %bb.0: # %entry -; RV32-NEXT: add a4, a1, a3 -; RV32-NEXT: add a5, a0, a2 -; RV32-NEXT: sltu a5, a5, a0 -; RV32-NEXT: add a4, a4, a5 +; RV32-NEXT: add a4, a0, a2 +; RV32-NEXT: sltu a4, a4, a0 +; RV32-NEXT: add a4, a3, a4 +; RV32-NEXT: add a4, a1, a4 ; RV32-NEXT: xor a4, a1, a4 ; RV32-NEXT: xor a5, a1, a3 ; RV32-NEXT: not a5, a5 @@ -1589,10 +1589,10 @@ define i64 @saddo.select.i64(i64 %v1, i64 %v2) { ; ; RV32ZBA-LABEL: saddo.select.i64: ; RV32ZBA: # %bb.0: # %entry -; RV32ZBA-NEXT: add a4, a1, a3 -; RV32ZBA-NEXT: add a5, a0, a2 -; RV32ZBA-NEXT: sltu a5, a5, a0 -; RV32ZBA-NEXT: add a4, a4, a5 +; RV32ZBA-NEXT: add a4, a0, a2 +; RV32ZBA-NEXT: sltu a4, a4, a0 +; RV32ZBA-NEXT: add a4, a3, a4 +; RV32ZBA-NEXT: add a4, a1, a4 ; RV32ZBA-NEXT: xor a4, a1, a4 ; RV32ZBA-NEXT: xor a5, a1, a3 ; RV32ZBA-NEXT: not a5, a5 @@ -1624,10 +1624,10 @@ entry: define i1 @saddo.not.i64(i64 %v1, i64 %v2) { ; RV32-LABEL: saddo.not.i64: ; RV32: # %bb.0: # %entry -; RV32-NEXT: add a4, a1, a3 ; RV32-NEXT: add a2, a0, a2 ; RV32-NEXT: sltu a0, a2, a0 -; RV32-NEXT: add a0, a4, a0 +; RV32-NEXT: add a0, a3, a0 +; RV32-NEXT: add a0, a1, a0 ; RV32-NEXT: xor a0, a1, a0 ; RV32-NEXT: xor a1, a1, a3 ; RV32-NEXT: not a1, a1 @@ -1647,10 +1647,10 @@ define i1 @saddo.not.i64(i64 %v1, i64 %v2) { ; ; RV32ZBA-LABEL: saddo.not.i64: ; RV32ZBA: # %bb.0: # %entry -; RV32ZBA-NEXT: add a4, a1, a3 ; RV32ZBA-NEXT: add a2, a0, a2 ; RV32ZBA-NEXT: sltu a0, a2, a0 -; RV32ZBA-NEXT: add a0, a4, a0 +; RV32ZBA-NEXT: add a0, a3, a0 +; RV32ZBA-NEXT: add a0, a1, a0 ; RV32ZBA-NEXT: xor a0, a1, a0 ; RV32ZBA-NEXT: xor a1, a1, a3 ; RV32ZBA-NEXT: not a1, a1 @@ -1755,10 +1755,10 @@ entry: define i64 @uaddo.select.i64(i64 %v1, i64 %v2) { ; RV32-LABEL: uaddo.select.i64: ; RV32: # %bb.0: # %entry -; RV32-NEXT: add a5, a1, a3 ; RV32-NEXT: add a4, a0, a2 ; RV32-NEXT: sltu a4, a4, a0 -; RV32-NEXT: add a5, a5, a4 +; RV32-NEXT: add a5, a3, a4 +; RV32-NEXT: add a5, a1, a5 ; RV32-NEXT: bne a5, a1, .LBB34_3 ; RV32-NEXT: # %bb.1: # %entry ; RV32-NEXT: beqz a4, .LBB34_4 @@ -1783,10 +1783,10 @@ define i64 @uaddo.select.i64(i64 %v1, i64 %v2) { ; ; RV32ZBA-LABEL: uaddo.select.i64: ; RV32ZBA: # %bb.0: # %entry -; RV32ZBA-NEXT: add a5, a1, a3 ; RV32ZBA-NEXT: add a4, a0, a2 ; RV32ZBA-NEXT: sltu a4, a4, a0 -; RV32ZBA-NEXT: add a5, a5, a4 +; RV32ZBA-NEXT: add a5, a3, a4 +; RV32ZBA-NEXT: add a5, a1, a5 ; RV32ZBA-NEXT: bne a5, a1, .LBB34_3 ; RV32ZBA-NEXT: # %bb.1: # %entry ; RV32ZBA-NEXT: beqz a4, .LBB34_4 @@ -1818,10 +1818,10 @@ entry: define i1 @uaddo.not.i64(i64 %v1, i64 %v2) { ; RV32-LABEL: uaddo.not.i64: ; RV32: # %bb.0: # %entry -; RV32-NEXT: add a3, a1, a3 ; RV32-NEXT: add a2, a0, a2 ; RV32-NEXT: sltu a0, a2, a0 ; RV32-NEXT: add a2, a3, a0 +; RV32-NEXT: add a2, a1, a2 ; RV32-NEXT: beq a2, a1, .LBB35_2 ; RV32-NEXT: # %bb.1: # %entry ; RV32-NEXT: sltu a0, a2, a1 @@ -1838,10 +1838,10 @@ define i1 @uaddo.not.i64(i64 %v1, i64 %v2) { ; ; RV32ZBA-LABEL: uaddo.not.i64: ; RV32ZBA: # %bb.0: # %entry -; RV32ZBA-NEXT: add a3, a1, a3 ; RV32ZBA-NEXT: add a2, a0, a2 ; RV32ZBA-NEXT: sltu a0, a2, a0 ; RV32ZBA-NEXT: add a2, a3, a0 +; RV32ZBA-NEXT: add a2, a1, a2 ; RV32ZBA-NEXT: beq a2, a1, .LBB35_2 ; RV32ZBA-NEXT: # %bb.1: # %entry ; RV32ZBA-NEXT: sltu a0, a2, a1 @@ -1956,11 +1956,11 @@ define i64 @ssubo.select.i64(i64 %v1, i64 %v2) { ; RV32-LABEL: ssubo.select.i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: sltu a4, a0, a2 -; RV32-NEXT: sub a5, a1, a3 -; RV32-NEXT: sub a5, a5, a4 -; RV32-NEXT: xor a5, a1, a5 -; RV32-NEXT: xor a4, a1, a3 -; RV32-NEXT: and a4, a4, a5 +; RV32-NEXT: add a4, a3, a4 +; RV32-NEXT: sub a4, a1, a4 +; RV32-NEXT: xor a4, a1, a4 +; RV32-NEXT: xor a5, a1, a3 +; RV32-NEXT: and a4, a5, a4 ; RV32-NEXT: bltz a4, .LBB38_2 ; RV32-NEXT: # %bb.1: # %entry ; RV32-NEXT: mv a0, a2 @@ -1982,11 +1982,11 @@ define i64 @ssubo.select.i64(i64 %v1, i64 %v2) { ; RV32ZBA-LABEL: ssubo.select.i64: ; RV32ZBA: # %bb.0: # %entry ; RV32ZBA-NEXT: sltu a4, a0, a2 -; RV32ZBA-NEXT: sub a5, a1, a3 -; RV32ZBA-NEXT: sub a5, a5, a4 -; RV32ZBA-NEXT: xor a5, a1, a5 -; RV32ZBA-NEXT: xor a4, a1, a3 -; RV32ZBA-NEXT: and a4, a4, a5 +; RV32ZBA-NEXT: add a4, a3, a4 +; RV32ZBA-NEXT: sub a4, a1, a4 +; RV32ZBA-NEXT: xor a4, a1, a4 +; RV32ZBA-NEXT: xor a5, a1, a3 +; RV32ZBA-NEXT: and a4, a5, a4 ; RV32ZBA-NEXT: bltz a4, .LBB38_2 ; RV32ZBA-NEXT: # %bb.1: # %entry ; RV32ZBA-NEXT: mv a0, a2 @@ -2015,12 +2015,12 @@ define i1 @ssub.not.i64(i64 %v1, i64 %v2) { ; RV32-LABEL: ssub.not.i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: sltu a0, a0, a2 -; RV32-NEXT: sub a2, a1, a3 -; RV32-NEXT: sub a2, a2, a0 -; RV32-NEXT: xor a2, a1, a2 +; RV32-NEXT: add a0, a3, a0 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: xor a0, a1, a0 ; RV32-NEXT: xor a1, a1, a3 -; RV32-NEXT: and a1, a1, a2 -; RV32-NEXT: slti a0, a1, 0 +; RV32-NEXT: and a0, a1, a0 +; RV32-NEXT: slti a0, a0, 0 ; RV32-NEXT: xori a0, a0, 1 ; RV32-NEXT: ret ; @@ -2036,12 +2036,12 @@ define i1 @ssub.not.i64(i64 %v1, i64 %v2) { ; RV32ZBA-LABEL: ssub.not.i64: ; RV32ZBA: # %bb.0: # %entry ; RV32ZBA-NEXT: sltu a0, a0, a2 -; RV32ZBA-NEXT: sub a2, a1, a3 -; RV32ZBA-NEXT: sub a2, a2, a0 -; RV32ZBA-NEXT: xor a2, a1, a2 +; RV32ZBA-NEXT: add a0, a3, a0 +; RV32ZBA-NEXT: sub a0, a1, a0 +; RV32ZBA-NEXT: xor a0, a1, a0 ; RV32ZBA-NEXT: xor a1, a1, a3 -; RV32ZBA-NEXT: and a1, a1, a2 -; RV32ZBA-NEXT: slti a0, a1, 0 +; RV32ZBA-NEXT: and a0, a1, a0 +; RV32ZBA-NEXT: slti a0, a0, 0 ; RV32ZBA-NEXT: xori a0, a0, 1 ; RV32ZBA-NEXT: ret ; @@ -2142,8 +2142,8 @@ define i64 @usubo.select.i64(i64 %v1, i64 %v2) { ; RV32-LABEL: usubo.select.i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: sltu a4, a0, a2 -; RV32-NEXT: sub a5, a1, a3 -; RV32-NEXT: sub a4, a5, a4 +; RV32-NEXT: add a4, a3, a4 +; RV32-NEXT: sub a4, a1, a4 ; RV32-NEXT: beq a4, a1, .LBB42_2 ; RV32-NEXT: # %bb.1: # %entry ; RV32-NEXT: sltu a4, a1, a4 @@ -2171,8 +2171,8 @@ define i64 @usubo.select.i64(i64 %v1, i64 %v2) { ; RV32ZBA-LABEL: usubo.select.i64: ; RV32ZBA: # %bb.0: # %entry ; RV32ZBA-NEXT: sltu a4, a0, a2 -; RV32ZBA-NEXT: sub a5, a1, a3 -; RV32ZBA-NEXT: sub a4, a5, a4 +; RV32ZBA-NEXT: add a4, a3, a4 +; RV32ZBA-NEXT: sub a4, a1, a4 ; RV32ZBA-NEXT: beq a4, a1, .LBB42_2 ; RV32ZBA-NEXT: # %bb.1: # %entry ; RV32ZBA-NEXT: sltu a4, a1, a4 @@ -2207,8 +2207,8 @@ define i1 @usubo.not.i64(i64 %v1, i64 %v2) { ; RV32-LABEL: usubo.not.i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: sltu a4, a0, a2 +; RV32-NEXT: add a3, a3, a4 ; RV32-NEXT: sub a3, a1, a3 -; RV32-NEXT: sub a3, a3, a4 ; RV32-NEXT: beq a3, a1, .LBB43_2 ; RV32-NEXT: # %bb.1: # %entry ; RV32-NEXT: sltu a0, a1, a3 @@ -2230,8 +2230,8 @@ define i1 @usubo.not.i64(i64 %v1, i64 %v2) { ; RV32ZBA-LABEL: usubo.not.i64: ; RV32ZBA: # %bb.0: # %entry ; RV32ZBA-NEXT: sltu a4, a0, a2 +; RV32ZBA-NEXT: add a3, a3, a4 ; RV32ZBA-NEXT: sub a3, a1, a3 -; RV32ZBA-NEXT: sub a3, a3, a4 ; RV32ZBA-NEXT: beq a3, a1, .LBB43_2 ; RV32ZBA-NEXT: # %bb.1: # %entry ; RV32ZBA-NEXT: sltu a0, a1, a3 @@ -2377,21 +2377,21 @@ define i64 @smulo.select.i64(i64 %v1, i64 %v2) { ; RV32-NEXT: sltu a7, t0, a7 ; RV32-NEXT: sltu a5, a6, a5 ; RV32-NEXT: mulhu a6, a1, a3 -; RV32-NEXT: add a5, a6, a5 ; RV32-NEXT: add a5, a5, a7 -; RV32-NEXT: mulhu a6, a2, t1 -; RV32-NEXT: add a6, a6, t2 -; RV32-NEXT: mul a7, a3, t1 -; RV32-NEXT: add a6, a6, a7 -; RV32-NEXT: mul a7, t3, a1 -; RV32-NEXT: mulhu t0, t3, a0 +; RV32-NEXT: mulhu a7, a2, t1 +; RV32-NEXT: mul t0, a3, t1 +; RV32-NEXT: add t0, t2, t0 +; RV32-NEXT: add a7, a7, t0 +; RV32-NEXT: mul t0, t3, a1 +; RV32-NEXT: mulhu t1, t3, a0 +; RV32-NEXT: add t0, t0, t4 ; RV32-NEXT: add a7, t0, a7 -; RV32-NEXT: add a7, a7, t4 -; RV32-NEXT: add a6, a7, a6 -; RV32-NEXT: sltu a7, t5, t4 -; RV32-NEXT: add a6, a6, a7 -; RV32-NEXT: add a5, a5, a6 +; RV32-NEXT: sltu t0, t5, t4 +; RV32-NEXT: add a7, a7, t0 +; RV32-NEXT: add a7, t1, a7 +; RV32-NEXT: add a5, a5, a7 ; RV32-NEXT: add a5, a5, s0 +; RV32-NEXT: add a5, a6, a5 ; RV32-NEXT: srai a4, a4, 31 ; RV32-NEXT: xor a5, a5, a4 ; RV32-NEXT: xor a4, t6, a4 @@ -2446,21 +2446,21 @@ define i64 @smulo.select.i64(i64 %v1, i64 %v2) { ; RV32ZBA-NEXT: sltu a7, t0, a7 ; RV32ZBA-NEXT: sltu a5, a6, a5 ; RV32ZBA-NEXT: mulhu a6, a1, a3 -; RV32ZBA-NEXT: add a5, a6, a5 ; RV32ZBA-NEXT: add a5, a5, a7 -; RV32ZBA-NEXT: mulhu a6, a2, t1 -; RV32ZBA-NEXT: add a6, a6, t2 -; RV32ZBA-NEXT: mul a7, a3, t1 -; RV32ZBA-NEXT: add a6, a6, a7 -; RV32ZBA-NEXT: mul a7, t3, a1 -; RV32ZBA-NEXT: mulhu t0, t3, a0 +; RV32ZBA-NEXT: mulhu a7, a2, t1 +; RV32ZBA-NEXT: mul t0, a3, t1 +; RV32ZBA-NEXT: add t0, t2, t0 +; RV32ZBA-NEXT: add a7, a7, t0 +; RV32ZBA-NEXT: mul t0, t3, a1 +; RV32ZBA-NEXT: mulhu t1, t3, a0 +; RV32ZBA-NEXT: add t0, t0, t4 ; RV32ZBA-NEXT: add a7, t0, a7 -; RV32ZBA-NEXT: add a7, a7, t4 -; RV32ZBA-NEXT: add a6, a7, a6 -; RV32ZBA-NEXT: sltu a7, t5, t4 -; RV32ZBA-NEXT: add a6, a6, a7 -; RV32ZBA-NEXT: add a5, a5, a6 +; RV32ZBA-NEXT: sltu t0, t5, t4 +; RV32ZBA-NEXT: add a7, a7, t0 +; RV32ZBA-NEXT: add a7, t1, a7 +; RV32ZBA-NEXT: add a5, a5, a7 ; RV32ZBA-NEXT: add a5, a5, s0 +; RV32ZBA-NEXT: add a5, a6, a5 ; RV32ZBA-NEXT: srai a4, a4, 31 ; RV32ZBA-NEXT: xor a5, a5, a4 ; RV32ZBA-NEXT: xor a4, t6, a4 @@ -2522,21 +2522,21 @@ define i1 @smulo.not.i64(i64 %v1, i64 %v2) { ; RV32-NEXT: sltu a7, t0, a7 ; RV32-NEXT: sltu a5, a6, a5 ; RV32-NEXT: mulhu a6, a1, a3 -; RV32-NEXT: add a5, a6, a5 ; RV32-NEXT: add a5, a5, a7 ; RV32-NEXT: mulhu a2, a2, t1 -; RV32-NEXT: add a2, a2, t2 ; RV32-NEXT: mul a3, a3, t1 +; RV32-NEXT: add a3, t2, a3 ; RV32-NEXT: add a2, a2, a3 ; RV32-NEXT: mul a1, t3, a1 ; RV32-NEXT: mulhu a0, t3, a0 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, a0, t4 -; RV32-NEXT: add a0, a0, a2 -; RV32-NEXT: sltu a1, t5, t4 +; RV32-NEXT: add a1, a1, t4 +; RV32-NEXT: add a1, a1, a2 +; RV32-NEXT: sltu a2, t5, t4 +; RV32-NEXT: add a1, a1, a2 ; RV32-NEXT: add a0, a0, a1 ; RV32-NEXT: add a0, a5, a0 ; RV32-NEXT: add a0, a0, s0 +; RV32-NEXT: add a0, a6, a0 ; RV32-NEXT: srai a4, a4, 31 ; RV32-NEXT: xor a0, a0, a4 ; RV32-NEXT: xor a1, t6, a4 @@ -2585,21 +2585,21 @@ define i1 @smulo.not.i64(i64 %v1, i64 %v2) { ; RV32ZBA-NEXT: sltu a7, t0, a7 ; RV32ZBA-NEXT: sltu a5, a6, a5 ; RV32ZBA-NEXT: mulhu a6, a1, a3 -; RV32ZBA-NEXT: add a5, a6, a5 ; RV32ZBA-NEXT: add a5, a5, a7 ; RV32ZBA-NEXT: mulhu a2, a2, t1 -; RV32ZBA-NEXT: add a2, a2, t2 ; RV32ZBA-NEXT: mul a3, a3, t1 +; RV32ZBA-NEXT: add a3, t2, a3 ; RV32ZBA-NEXT: add a2, a2, a3 ; RV32ZBA-NEXT: mul a1, t3, a1 ; RV32ZBA-NEXT: mulhu a0, t3, a0 -; RV32ZBA-NEXT: add a0, a0, a1 -; RV32ZBA-NEXT: add a0, a0, t4 -; RV32ZBA-NEXT: add a0, a0, a2 -; RV32ZBA-NEXT: sltu a1, t5, t4 +; RV32ZBA-NEXT: add a1, a1, t4 +; RV32ZBA-NEXT: add a1, a1, a2 +; RV32ZBA-NEXT: sltu a2, t5, t4 +; RV32ZBA-NEXT: add a1, a1, a2 ; RV32ZBA-NEXT: add a0, a0, a1 ; RV32ZBA-NEXT: add a0, a5, a0 ; RV32ZBA-NEXT: add a0, a0, s0 +; RV32ZBA-NEXT: add a0, a6, a0 ; RV32ZBA-NEXT: srai a4, a4, 31 ; RV32ZBA-NEXT: xor a0, a0, a4 ; RV32ZBA-NEXT: xor a1, t6, a4 @@ -2715,19 +2715,19 @@ define i64 @umulo.select.i64(i64 %v1, i64 %v2) { ; RV32: # %bb.0: # %entry ; RV32-NEXT: mul a4, a3, a0 ; RV32-NEXT: mul a5, a1, a2 -; RV32-NEXT: add a4, a5, a4 -; RV32-NEXT: mulhu a5, a0, a2 -; RV32-NEXT: add a4, a5, a4 -; RV32-NEXT: sltu a4, a4, a5 +; RV32-NEXT: mulhu a6, a0, a2 +; RV32-NEXT: add a4, a6, a4 +; RV32-NEXT: add a4, a4, a5 +; RV32-NEXT: sltu a4, a4, a6 ; RV32-NEXT: snez a5, a3 ; RV32-NEXT: snez a6, a1 ; RV32-NEXT: and a5, a6, a5 ; RV32-NEXT: mulhu a6, a1, a2 ; RV32-NEXT: snez a6, a6 -; RV32-NEXT: or a5, a5, a6 -; RV32-NEXT: mulhu a6, a3, a0 -; RV32-NEXT: snez a6, a6 -; RV32-NEXT: or a5, a5, a6 +; RV32-NEXT: mulhu a7, a3, a0 +; RV32-NEXT: snez a7, a7 +; RV32-NEXT: or a6, a6, a7 +; RV32-NEXT: or a4, a6, a4 ; RV32-NEXT: or a4, a5, a4 ; RV32-NEXT: bnez a4, .LBB50_2 ; RV32-NEXT: # %bb.1: # %entry @@ -2749,19 +2749,19 @@ define i64 @umulo.select.i64(i64 %v1, i64 %v2) { ; RV32ZBA: # %bb.0: # %entry ; RV32ZBA-NEXT: mul a4, a3, a0 ; RV32ZBA-NEXT: mul a5, a1, a2 -; RV32ZBA-NEXT: add a4, a5, a4 -; RV32ZBA-NEXT: mulhu a5, a0, a2 -; RV32ZBA-NEXT: add a4, a5, a4 -; RV32ZBA-NEXT: sltu a4, a4, a5 +; RV32ZBA-NEXT: mulhu a6, a0, a2 +; RV32ZBA-NEXT: add a4, a6, a4 +; RV32ZBA-NEXT: add a4, a4, a5 +; RV32ZBA-NEXT: sltu a4, a4, a6 ; RV32ZBA-NEXT: snez a5, a3 ; RV32ZBA-NEXT: snez a6, a1 ; RV32ZBA-NEXT: and a5, a6, a5 ; RV32ZBA-NEXT: mulhu a6, a1, a2 ; RV32ZBA-NEXT: snez a6, a6 -; RV32ZBA-NEXT: or a5, a5, a6 -; RV32ZBA-NEXT: mulhu a6, a3, a0 -; RV32ZBA-NEXT: snez a6, a6 -; RV32ZBA-NEXT: or a5, a5, a6 +; RV32ZBA-NEXT: mulhu a7, a3, a0 +; RV32ZBA-NEXT: snez a7, a7 +; RV32ZBA-NEXT: or a6, a6, a7 +; RV32ZBA-NEXT: or a4, a6, a4 ; RV32ZBA-NEXT: or a4, a5, a4 ; RV32ZBA-NEXT: bnez a4, .LBB50_2 ; RV32ZBA-NEXT: # %bb.1: # %entry @@ -2790,20 +2790,20 @@ define i1 @umulo.not.i64(i64 %v1, i64 %v2) { ; RV32: # %bb.0: # %entry ; RV32-NEXT: mul a4, a3, a0 ; RV32-NEXT: mul a5, a1, a2 -; RV32-NEXT: add a4, a5, a4 -; RV32-NEXT: mulhu a5, a0, a2 -; RV32-NEXT: add a4, a5, a4 -; RV32-NEXT: sltu a4, a4, a5 +; RV32-NEXT: mulhu a6, a0, a2 +; RV32-NEXT: add a4, a6, a4 +; RV32-NEXT: add a4, a4, a5 +; RV32-NEXT: sltu a4, a4, a6 ; RV32-NEXT: snez a5, a3 ; RV32-NEXT: snez a6, a1 ; RV32-NEXT: and a5, a6, a5 ; RV32-NEXT: mulhu a1, a1, a2 ; RV32-NEXT: snez a1, a1 -; RV32-NEXT: or a1, a5, a1 ; RV32-NEXT: mulhu a0, a3, a0 ; RV32-NEXT: snez a0, a0 ; RV32-NEXT: or a0, a1, a0 ; RV32-NEXT: or a0, a0, a4 +; RV32-NEXT: or a0, a5, a0 ; RV32-NEXT: xori a0, a0, 1 ; RV32-NEXT: ret ; @@ -2817,20 +2817,20 @@ define i1 @umulo.not.i64(i64 %v1, i64 %v2) { ; RV32ZBA: # %bb.0: # %entry ; RV32ZBA-NEXT: mul a4, a3, a0 ; RV32ZBA-NEXT: mul a5, a1, a2 -; RV32ZBA-NEXT: add a4, a5, a4 -; RV32ZBA-NEXT: mulhu a5, a0, a2 -; RV32ZBA-NEXT: add a4, a5, a4 -; RV32ZBA-NEXT: sltu a4, a4, a5 +; RV32ZBA-NEXT: mulhu a6, a0, a2 +; RV32ZBA-NEXT: add a4, a6, a4 +; RV32ZBA-NEXT: add a4, a4, a5 +; RV32ZBA-NEXT: sltu a4, a4, a6 ; RV32ZBA-NEXT: snez a5, a3 ; RV32ZBA-NEXT: snez a6, a1 ; RV32ZBA-NEXT: and a5, a6, a5 ; RV32ZBA-NEXT: mulhu a1, a1, a2 ; RV32ZBA-NEXT: snez a1, a1 -; RV32ZBA-NEXT: or a1, a5, a1 ; RV32ZBA-NEXT: mulhu a0, a3, a0 ; RV32ZBA-NEXT: snez a0, a0 ; RV32ZBA-NEXT: or a0, a1, a0 ; RV32ZBA-NEXT: or a0, a0, a4 +; RV32ZBA-NEXT: or a0, a5, a0 ; RV32ZBA-NEXT: xori a0, a0, 1 ; RV32ZBA-NEXT: ret ; @@ -2916,10 +2916,10 @@ continue: define zeroext i1 @saddo.br.i64(i64 %v1, i64 %v2) { ; RV32-LABEL: saddo.br.i64: ; RV32: # %bb.0: # %entry -; RV32-NEXT: add a4, a1, a3 ; RV32-NEXT: add a2, a0, a2 ; RV32-NEXT: sltu a0, a2, a0 -; RV32-NEXT: add a0, a4, a0 +; RV32-NEXT: add a0, a3, a0 +; RV32-NEXT: add a0, a1, a0 ; RV32-NEXT: xor a0, a1, a0 ; RV32-NEXT: xor a1, a1, a3 ; RV32-NEXT: not a1, a1 @@ -2947,10 +2947,10 @@ define zeroext i1 @saddo.br.i64(i64 %v1, i64 %v2) { ; ; RV32ZBA-LABEL: saddo.br.i64: ; RV32ZBA: # %bb.0: # %entry -; RV32ZBA-NEXT: add a4, a1, a3 ; RV32ZBA-NEXT: add a2, a0, a2 ; RV32ZBA-NEXT: sltu a0, a2, a0 -; RV32ZBA-NEXT: add a0, a4, a0 +; RV32ZBA-NEXT: add a0, a3, a0 +; RV32ZBA-NEXT: add a0, a1, a0 ; RV32ZBA-NEXT: xor a0, a1, a0 ; RV32ZBA-NEXT: xor a1, a1, a3 ; RV32ZBA-NEXT: not a1, a1 @@ -3050,10 +3050,10 @@ continue: define zeroext i1 @uaddo.br.i64(i64 %v1, i64 %v2) { ; RV32-LABEL: uaddo.br.i64: ; RV32: # %bb.0: # %entry -; RV32-NEXT: add a3, a1, a3 ; RV32-NEXT: add a2, a0, a2 ; RV32-NEXT: sltu a0, a2, a0 ; RV32-NEXT: add a2, a3, a0 +; RV32-NEXT: add a2, a1, a2 ; RV32-NEXT: beq a2, a1, .LBB55_2 ; RV32-NEXT: # %bb.1: # %entry ; RV32-NEXT: sltu a0, a2, a1 @@ -3079,10 +3079,10 @@ define zeroext i1 @uaddo.br.i64(i64 %v1, i64 %v2) { ; ; RV32ZBA-LABEL: uaddo.br.i64: ; RV32ZBA: # %bb.0: # %entry -; RV32ZBA-NEXT: add a3, a1, a3 ; RV32ZBA-NEXT: add a2, a0, a2 ; RV32ZBA-NEXT: sltu a0, a2, a0 ; RV32ZBA-NEXT: add a2, a3, a0 +; RV32ZBA-NEXT: add a2, a1, a2 ; RV32ZBA-NEXT: beq a2, a1, .LBB55_2 ; RV32ZBA-NEXT: # %bb.1: # %entry ; RV32ZBA-NEXT: sltu a0, a2, a1 @@ -3185,12 +3185,12 @@ define zeroext i1 @ssubo.br.i64(i64 %v1, i64 %v2) { ; RV32-LABEL: ssubo.br.i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: sltu a0, a0, a2 -; RV32-NEXT: sub a2, a1, a3 -; RV32-NEXT: sub a2, a2, a0 -; RV32-NEXT: xor a2, a1, a2 +; RV32-NEXT: add a0, a3, a0 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: xor a0, a1, a0 ; RV32-NEXT: xor a1, a1, a3 -; RV32-NEXT: and a1, a1, a2 -; RV32-NEXT: bgez a1, .LBB57_2 +; RV32-NEXT: and a0, a1, a0 +; RV32-NEXT: bgez a0, .LBB57_2 ; RV32-NEXT: # %bb.1: # %overflow ; RV32-NEXT: li a0, 0 ; RV32-NEXT: ret @@ -3214,12 +3214,12 @@ define zeroext i1 @ssubo.br.i64(i64 %v1, i64 %v2) { ; RV32ZBA-LABEL: ssubo.br.i64: ; RV32ZBA: # %bb.0: # %entry ; RV32ZBA-NEXT: sltu a0, a0, a2 -; RV32ZBA-NEXT: sub a2, a1, a3 -; RV32ZBA-NEXT: sub a2, a2, a0 -; RV32ZBA-NEXT: xor a2, a1, a2 +; RV32ZBA-NEXT: add a0, a3, a0 +; RV32ZBA-NEXT: sub a0, a1, a0 +; RV32ZBA-NEXT: xor a0, a1, a0 ; RV32ZBA-NEXT: xor a1, a1, a3 -; RV32ZBA-NEXT: and a1, a1, a2 -; RV32ZBA-NEXT: bgez a1, .LBB57_2 +; RV32ZBA-NEXT: and a0, a1, a0 +; RV32ZBA-NEXT: bgez a0, .LBB57_2 ; RV32ZBA-NEXT: # %bb.1: # %overflow ; RV32ZBA-NEXT: li a0, 0 ; RV32ZBA-NEXT: ret @@ -3313,8 +3313,8 @@ define zeroext i1 @usubo.br.i64(i64 %v1, i64 %v2) { ; RV32-LABEL: usubo.br.i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: sltu a4, a0, a2 +; RV32-NEXT: add a3, a3, a4 ; RV32-NEXT: sub a3, a1, a3 -; RV32-NEXT: sub a3, a3, a4 ; RV32-NEXT: beq a3, a1, .LBB59_3 ; RV32-NEXT: # %bb.1: # %entry ; RV32-NEXT: sltu a0, a1, a3 @@ -3344,8 +3344,8 @@ define zeroext i1 @usubo.br.i64(i64 %v1, i64 %v2) { ; RV32ZBA-LABEL: usubo.br.i64: ; RV32ZBA: # %bb.0: # %entry ; RV32ZBA-NEXT: sltu a4, a0, a2 +; RV32ZBA-NEXT: add a3, a3, a4 ; RV32ZBA-NEXT: sub a3, a1, a3 -; RV32ZBA-NEXT: sub a3, a3, a4 ; RV32ZBA-NEXT: beq a3, a1, .LBB59_3 ; RV32ZBA-NEXT: # %bb.1: # %entry ; RV32ZBA-NEXT: sltu a0, a1, a3 @@ -3478,21 +3478,21 @@ define zeroext i1 @smulo.br.i64(i64 %v1, i64 %v2) { ; RV32-NEXT: sltu a7, t0, a7 ; RV32-NEXT: sltu a5, a6, a5 ; RV32-NEXT: mulhu a6, a1, a3 -; RV32-NEXT: add a5, a6, a5 ; RV32-NEXT: add a5, a5, a7 ; RV32-NEXT: mulhu a2, a2, t1 -; RV32-NEXT: add a2, a2, t2 ; RV32-NEXT: mul a3, a3, t1 +; RV32-NEXT: add a3, t2, a3 ; RV32-NEXT: add a2, a2, a3 ; RV32-NEXT: mul a1, t3, a1 ; RV32-NEXT: mulhu a0, t3, a0 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, a0, t4 -; RV32-NEXT: add a0, a0, a2 -; RV32-NEXT: sltu a1, t5, t4 +; RV32-NEXT: add a1, a1, t4 +; RV32-NEXT: add a1, a1, a2 +; RV32-NEXT: sltu a2, t5, t4 +; RV32-NEXT: add a1, a1, a2 ; RV32-NEXT: add a0, a0, a1 ; RV32-NEXT: add a0, a5, a0 ; RV32-NEXT: add a0, a0, s0 +; RV32-NEXT: add a0, a6, a0 ; RV32-NEXT: srai a4, a4, 31 ; RV32-NEXT: xor a0, a0, a4 ; RV32-NEXT: xor a1, t6, a4 @@ -3551,21 +3551,21 @@ define zeroext i1 @smulo.br.i64(i64 %v1, i64 %v2) { ; RV32ZBA-NEXT: sltu a7, t0, a7 ; RV32ZBA-NEXT: sltu a5, a6, a5 ; RV32ZBA-NEXT: mulhu a6, a1, a3 -; RV32ZBA-NEXT: add a5, a6, a5 ; RV32ZBA-NEXT: add a5, a5, a7 ; RV32ZBA-NEXT: mulhu a2, a2, t1 -; RV32ZBA-NEXT: add a2, a2, t2 ; RV32ZBA-NEXT: mul a3, a3, t1 +; RV32ZBA-NEXT: add a3, t2, a3 ; RV32ZBA-NEXT: add a2, a2, a3 ; RV32ZBA-NEXT: mul a1, t3, a1 ; RV32ZBA-NEXT: mulhu a0, t3, a0 -; RV32ZBA-NEXT: add a0, a0, a1 -; RV32ZBA-NEXT: add a0, a0, t4 -; RV32ZBA-NEXT: add a0, a0, a2 -; RV32ZBA-NEXT: sltu a1, t5, t4 +; RV32ZBA-NEXT: add a1, a1, t4 +; RV32ZBA-NEXT: add a1, a1, a2 +; RV32ZBA-NEXT: sltu a2, t5, t4 +; RV32ZBA-NEXT: add a1, a1, a2 ; RV32ZBA-NEXT: add a0, a0, a1 ; RV32ZBA-NEXT: add a0, a5, a0 ; RV32ZBA-NEXT: add a0, a0, s0 +; RV32ZBA-NEXT: add a0, a6, a0 ; RV32ZBA-NEXT: srai a4, a4, 31 ; RV32ZBA-NEXT: xor a0, a0, a4 ; RV32ZBA-NEXT: xor a1, t6, a4 @@ -3633,16 +3633,16 @@ define zeroext i1 @smulo2.br.i64(i64 %v1) { ; RV32-NEXT: sltu t1, t1, t6 ; RV32-NEXT: sltu a4, a6, a4 ; RV32-NEXT: mulhu a6, a1, a7 -; RV32-NEXT: add a4, a6, a4 ; RV32-NEXT: add a4, a4, t1 ; RV32-NEXT: sltu a5, t3, a5 ; RV32-NEXT: mulh a2, t2, a2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: sub a0, a0, a2 +; RV32-NEXT: sub a0, a0, a5 ; RV32-NEXT: sub a0, t0, a0 -; RV32-NEXT: sub a0, a0, a1 -; RV32-NEXT: add a0, a0, a2 -; RV32-NEXT: add a0, a0, a5 ; RV32-NEXT: add a0, a4, a0 ; RV32-NEXT: add a0, a0, t5 +; RV32-NEXT: add a0, a6, a0 ; RV32-NEXT: srai a3, a3, 31 ; RV32-NEXT: xor a0, a0, a3 ; RV32-NEXT: xor a1, t4, a3 @@ -3695,16 +3695,16 @@ define zeroext i1 @smulo2.br.i64(i64 %v1) { ; RV32ZBA-NEXT: sltu t1, t1, t6 ; RV32ZBA-NEXT: sltu a4, a6, a4 ; RV32ZBA-NEXT: mulhu a6, a1, a7 -; RV32ZBA-NEXT: add a4, a6, a4 ; RV32ZBA-NEXT: add a4, a4, t1 ; RV32ZBA-NEXT: sltu a5, t3, a5 ; RV32ZBA-NEXT: mulh a2, t2, a2 +; RV32ZBA-NEXT: add a0, a0, a1 +; RV32ZBA-NEXT: sub a0, a0, a2 +; RV32ZBA-NEXT: sub a0, a0, a5 ; RV32ZBA-NEXT: sub a0, t0, a0 -; RV32ZBA-NEXT: sub a0, a0, a1 -; RV32ZBA-NEXT: add a0, a0, a2 -; RV32ZBA-NEXT: add a0, a0, a5 ; RV32ZBA-NEXT: add a0, a4, a0 ; RV32ZBA-NEXT: add a0, a0, t5 +; RV32ZBA-NEXT: add a0, a6, a0 ; RV32ZBA-NEXT: srai a3, a3, 31 ; RV32ZBA-NEXT: xor a0, a0, a3 ; RV32ZBA-NEXT: xor a1, t4, a3 @@ -3811,20 +3811,20 @@ define zeroext i1 @umulo.br.i64(i64 %v1, i64 %v2) { ; RV32: # %bb.0: # %entry ; RV32-NEXT: mul a4, a3, a0 ; RV32-NEXT: mul a5, a1, a2 -; RV32-NEXT: add a4, a5, a4 -; RV32-NEXT: mulhu a5, a0, a2 -; RV32-NEXT: add a4, a5, a4 -; RV32-NEXT: sltu a4, a4, a5 +; RV32-NEXT: mulhu a6, a0, a2 +; RV32-NEXT: add a4, a6, a4 +; RV32-NEXT: add a4, a4, a5 +; RV32-NEXT: sltu a4, a4, a6 ; RV32-NEXT: snez a5, a3 ; RV32-NEXT: snez a6, a1 ; RV32-NEXT: and a5, a6, a5 ; RV32-NEXT: mulhu a1, a1, a2 ; RV32-NEXT: snez a1, a1 -; RV32-NEXT: or a1, a5, a1 ; RV32-NEXT: mulhu a0, a3, a0 ; RV32-NEXT: snez a0, a0 ; RV32-NEXT: or a0, a1, a0 ; RV32-NEXT: or a0, a0, a4 +; RV32-NEXT: or a0, a5, a0 ; RV32-NEXT: beqz a0, .LBB64_2 ; RV32-NEXT: # %bb.1: # %overflow ; RV32-NEXT: li a0, 0 @@ -3848,20 +3848,20 @@ define zeroext i1 @umulo.br.i64(i64 %v1, i64 %v2) { ; RV32ZBA: # %bb.0: # %entry ; RV32ZBA-NEXT: mul a4, a3, a0 ; RV32ZBA-NEXT: mul a5, a1, a2 -; RV32ZBA-NEXT: add a4, a5, a4 -; RV32ZBA-NEXT: mulhu a5, a0, a2 -; RV32ZBA-NEXT: add a4, a5, a4 -; RV32ZBA-NEXT: sltu a4, a4, a5 +; RV32ZBA-NEXT: mulhu a6, a0, a2 +; RV32ZBA-NEXT: add a4, a6, a4 +; RV32ZBA-NEXT: add a4, a4, a5 +; RV32ZBA-NEXT: sltu a4, a4, a6 ; RV32ZBA-NEXT: snez a5, a3 ; RV32ZBA-NEXT: snez a6, a1 ; RV32ZBA-NEXT: and a5, a6, a5 ; RV32ZBA-NEXT: mulhu a1, a1, a2 ; RV32ZBA-NEXT: snez a1, a1 -; RV32ZBA-NEXT: or a1, a5, a1 ; RV32ZBA-NEXT: mulhu a0, a3, a0 ; RV32ZBA-NEXT: snez a0, a0 ; RV32ZBA-NEXT: or a0, a1, a0 ; RV32ZBA-NEXT: or a0, a0, a4 +; RV32ZBA-NEXT: or a0, a5, a0 ; RV32ZBA-NEXT: beqz a0, .LBB64_2 ; RV32ZBA-NEXT: # %bb.1: # %overflow ; RV32ZBA-NEXT: li a0, 0 @@ -3898,8 +3898,8 @@ define zeroext i1 @umulo2.br.i64(i64 %v1) { ; RV32: # %bb.0: # %entry ; RV32-NEXT: add a2, a0, a0 ; RV32-NEXT: sltu a0, a2, a0 -; RV32-NEXT: add a2, a1, a1 -; RV32-NEXT: add a2, a2, a0 +; RV32-NEXT: add a2, a1, a0 +; RV32-NEXT: add a2, a1, a2 ; RV32-NEXT: beq a2, a1, .LBB65_2 ; RV32-NEXT: # %bb.1: # %entry ; RV32-NEXT: sltu a0, a2, a1 @@ -3927,8 +3927,8 @@ define zeroext i1 @umulo2.br.i64(i64 %v1) { ; RV32ZBA: # %bb.0: # %entry ; RV32ZBA-NEXT: add a2, a0, a0 ; RV32ZBA-NEXT: sltu a0, a2, a0 -; RV32ZBA-NEXT: add a2, a1, a1 -; RV32ZBA-NEXT: add a2, a2, a0 +; RV32ZBA-NEXT: add a2, a1, a0 +; RV32ZBA-NEXT: add a2, a1, a2 ; RV32ZBA-NEXT: beq a2, a1, .LBB65_2 ; RV32ZBA-NEXT: # %bb.1: # %entry ; RV32ZBA-NEXT: sltu a0, a2, a1 -- 2.7.4