From 79d6e9c7130c1b55da188413e9f59e8605fe0902 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 28 Dec 2022 19:43:18 -0800 Subject: [PATCH] [RISCV] Prefer ADDI over ORI if the known bits are disjoint. There is no compressed form of ORI but there is a compressed form for ADDI. This also works for XORI since DAGCombine will turn Xor with disjoint bits in Or. Note: The compressed forms require a simm6 immediate, but I'm doing this for the full simm12 range. Reviewed By: kito-cheng Differential Revision: https://reviews.llvm.org/D140674 --- llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 1 - llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h | 1 + llvm/lib/Target/RISCV/RISCVInstrInfo.td | 9 +++ llvm/test/CodeGen/RISCV/or-is-add.ll | 109 ++++++++++++++++++++++++++++ llvm/test/CodeGen/RISCV/rv64zba.ll | 2 +- llvm/test/CodeGen/RISCV/vararg.ll | 8 +- 6 files changed, 124 insertions(+), 6 deletions(-) create mode 100644 llvm/test/CodeGen/RISCV/or-is-add.ll diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 3898dbb..de8672b 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -19,7 +19,6 @@ #include "llvm/IR/IntrinsicsRISCV.h" #include "llvm/Support/Alignment.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h index 350e90a..17205b8 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h @@ -16,6 +16,7 @@ #include "RISCV.h" #include "RISCVTargetMachine.h" #include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/Support/KnownBits.h" // RISCV-specific code to select RISCV machine instructions for // SelectionDAG operations. diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index e7940bc..9885a3e 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -1239,6 +1239,15 @@ def : PatGprUimmLog2XLen; def : PatGprUimmLog2XLen; def : PatGprUimmLog2XLen; +// Select 'or' as ADDI if the immediate bits are known to be 0 in $rs1. This +// can improve compressibility. +def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{ + KnownBits Known0 = CurDAG->computeKnownBits(N->getOperand(0), 0); + KnownBits Known1 = CurDAG->computeKnownBits(N->getOperand(1), 0); + return KnownBits::haveNoCommonBitsSet(Known0, Known1); +}]>; +def : PatGprSimm12; + // negate of low bit can be done via two (compressible) shifts. The negate // is never compressible since rs1 and rd can't be the same register. def : Pat<(XLenVT (sub 0, (and_oneuse GPR:$rs, 1))), diff --git a/llvm/test/CodeGen/RISCV/or-is-add.ll b/llvm/test/CodeGen/RISCV/or-is-add.ll new file mode 100644 index 0000000..853dd20 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/or-is-add.ll @@ -0,0 +1,109 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=riscv32 | FileCheck %s --check-prefixes=RV32 +; RUN: llc < %s -mtriple=riscv64 | FileCheck %s --check-prefixes=RV64 + +define signext i32 @test1(i32 signext %x) { +; RV32-LABEL: test1: +; RV32: # %bb.0: +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: addi a0, a0, 1 +; RV32-NEXT: ret +; +; RV64-LABEL: test1: +; RV64: # %bb.0: +; RV64-NEXT: slliw a0, a0, 1 +; RV64-NEXT: addi a0, a0, 1 +; RV64-NEXT: ret + %a = shl i32 %x, 1 + %b = or i32 %a, 1 + ret i32 %b +} + +define i64 @test2(i64 %x) { +; RV32-LABEL: test2: +; RV32: # %bb.0: +; RV32-NEXT: andi a0, a0, -4 +; RV32-NEXT: addi a0, a0, 2 +; RV32-NEXT: ret +; +; RV64-LABEL: test2: +; RV64: # %bb.0: +; RV64-NEXT: andi a0, a0, -4 +; RV64-NEXT: addi a0, a0, 2 +; RV64-NEXT: ret + %a = and i64 %x, -4 + %b = or i64 %a, 2 + ret i64 %b +} + +define signext i32 @test3(i32 signext %x) { +; RV32-LABEL: test3: +; RV32: # %bb.0: +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: addi a0, a0, 6 +; RV32-NEXT: ret +; +; RV64-LABEL: test3: +; RV64: # %bb.0: +; RV64-NEXT: slliw a0, a0, 3 +; RV64-NEXT: addi a0, a0, 6 +; RV64-NEXT: ret + %a = shl i32 %x, 3 + %b = add i32 %a, 6 + ret i32 %b +} + +define i64 @test4(i64 %x) { +; RV32-LABEL: test4: +; RV32: # %bb.0: +; RV32-NEXT: srli a2, a0, 28 +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: or a1, a1, a2 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: addi a0, a0, 13 +; RV32-NEXT: ret +; +; RV64-LABEL: test4: +; RV64: # %bb.0: +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: addi a0, a0, 13 +; RV64-NEXT: ret + %a = shl i64 %x, 4 + %b = add i64 %a, 13 + ret i64 %b +} + +define signext i32 @test5(i32 signext %x) { +; RV32-LABEL: test5: +; RV32: # %bb.0: +; RV32-NEXT: srli a0, a0, 24 +; RV32-NEXT: addi a0, a0, 256 +; RV32-NEXT: ret +; +; RV64-LABEL: test5: +; RV64: # %bb.0: +; RV64-NEXT: srliw a0, a0, 24 +; RV64-NEXT: addi a0, a0, 256 +; RV64-NEXT: ret + %a = lshr i32 %x, 24 + %b = xor i32 %a, 256 + ret i32 %b +} + +define i64 @test6(i64 %x) { +; RV32-LABEL: test6: +; RV32: # %bb.0: +; RV32-NEXT: srli a1, a1, 22 +; RV32-NEXT: addi a0, a1, 1024 +; RV32-NEXT: li a1, 0 +; RV32-NEXT: ret +; +; RV64-LABEL: test6: +; RV64: # %bb.0: +; RV64-NEXT: srli a0, a0, 54 +; RV64-NEXT: addi a0, a0, 1024 +; RV64-NEXT: ret + %a = lshr i64 %x, 54 + %b = xor i64 %a, 1024 + ret i64 %b +} diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll index 104dfda..02e5a4b 100644 --- a/llvm/test/CodeGen/RISCV/rv64zba.ll +++ b/llvm/test/CodeGen/RISCV/rv64zba.ll @@ -679,7 +679,7 @@ define i64 @sh3add_imm(i64 %0) { ; CHECK-LABEL: sh3add_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: ori a0, a0, 7 +; CHECK-NEXT: addi a0, a0, 7 ; CHECK-NEXT: ret %a = shl i64 %0, 3 %b = add i64 %a, 7 diff --git a/llvm/test/CodeGen/RISCV/vararg.ll b/llvm/test/CodeGen/RISCV/vararg.ll index 5d7e2048..df8a670 100644 --- a/llvm/test/CodeGen/RISCV/vararg.ll +++ b/llvm/test/CodeGen/RISCV/vararg.ll @@ -627,7 +627,7 @@ define i64 @va2_va_arg(ptr %fmt, ...) nounwind { ; ILP32-ILP32F-FPELIM-NEXT: sw a1, 20(sp) ; ILP32-ILP32F-FPELIM-NEXT: addi a0, sp, 27 ; ILP32-ILP32F-FPELIM-NEXT: andi a1, a0, -8 -; ILP32-ILP32F-FPELIM-NEXT: ori a0, a1, 4 +; ILP32-ILP32F-FPELIM-NEXT: addi a0, a1, 4 ; ILP32-ILP32F-FPELIM-NEXT: sw a0, 12(sp) ; ILP32-ILP32F-FPELIM-NEXT: lw a0, 0(a1) ; ILP32-ILP32F-FPELIM-NEXT: addi a2, a1, 8 @@ -651,7 +651,7 @@ define i64 @va2_va_arg(ptr %fmt, ...) nounwind { ; ILP32-ILP32F-WITHFP-NEXT: sw a1, 4(s0) ; ILP32-ILP32F-WITHFP-NEXT: addi a0, s0, 11 ; ILP32-ILP32F-WITHFP-NEXT: andi a1, a0, -8 -; ILP32-ILP32F-WITHFP-NEXT: ori a0, a1, 4 +; ILP32-ILP32F-WITHFP-NEXT: addi a0, a1, 4 ; ILP32-ILP32F-WITHFP-NEXT: sw a0, -12(s0) ; ILP32-ILP32F-WITHFP-NEXT: lw a0, 0(a1) ; ILP32-ILP32F-WITHFP-NEXT: addi a2, a1, 8 @@ -943,7 +943,7 @@ define i64 @va3_va_arg(i32 %a, i64 %b, ...) nounwind { ; ILP32-ILP32F-FPELIM-NEXT: sw a3, 12(sp) ; ILP32-ILP32F-FPELIM-NEXT: addi a0, sp, 19 ; ILP32-ILP32F-FPELIM-NEXT: andi a0, a0, -8 -; ILP32-ILP32F-FPELIM-NEXT: ori a3, a0, 4 +; ILP32-ILP32F-FPELIM-NEXT: addi a3, a0, 4 ; ILP32-ILP32F-FPELIM-NEXT: sw a3, 4(sp) ; ILP32-ILP32F-FPELIM-NEXT: lw a3, 0(a0) ; ILP32-ILP32F-FPELIM-NEXT: addi a4, a0, 8 @@ -969,7 +969,7 @@ define i64 @va3_va_arg(i32 %a, i64 %b, ...) nounwind { ; ILP32-ILP32F-WITHFP-NEXT: sw a3, 4(s0) ; ILP32-ILP32F-WITHFP-NEXT: addi a0, s0, 11 ; ILP32-ILP32F-WITHFP-NEXT: andi a0, a0, -8 -; ILP32-ILP32F-WITHFP-NEXT: ori a3, a0, 4 +; ILP32-ILP32F-WITHFP-NEXT: addi a3, a0, 4 ; ILP32-ILP32F-WITHFP-NEXT: sw a3, -12(s0) ; ILP32-ILP32F-WITHFP-NEXT: lw a3, 0(a0) ; ILP32-ILP32F-WITHFP-NEXT: addi a4, a0, 8 -- 2.7.4