From 7e4ab9d5b8b855602c602fe0908afcf078ce3f20 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Sat, 2 Jul 2022 23:11:14 -0700
Subject: [PATCH] [RISCV] Add more SHXADD isel patterns.

This handles the code we get for

int foo(int* x, unsigned y) {
  return x[y >> 1];
}

The shift right and the shl will get DAG combined into
(shl (and X, 0xfffffffe), 1). We have custom isel to match the
shl+and, but with Zba the (add (shl X, 1), Y) part will get
matched and leave the and to be iseled by itself. This commit
adds a larger pattern that includes the and.
---
 llvm/lib/Target/RISCV/RISCVInstrInfoZb.td | 12 ++++++++++++
 llvm/test/CodeGen/RISCV/rv64zba.ll        | 21 ++++++---------------
 2 files changed, 18 insertions(+), 15 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
index 0980d90..9532d1d 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
@@ -1190,6 +1190,18 @@ def : Pat<(i64 (add (and GPR:$rs1, 0x3FFFFFFFC), non_imm12:$rs2)),
           (SH2ADD_UW (SRLI GPR:$rs1, 2), GPR:$rs2)>;
 def : Pat<(i64 (add (and GPR:$rs1, 0x7FFFFFFF8), non_imm12:$rs2)),
           (SH3ADD_UW (SRLI GPR:$rs1, 3), GPR:$rs2)>;
+
+// Use SRLIW to shift out the LSBs and zero the upper 32-bits. Use SHXADD to
+// shift zeros into the LSBs the addition shl amount.
+def : Pat<(i64 (add (shl (binop_oneuse<and> GPR:$rs1, 0xFFFFFFFE), (i64 1)),
+                    non_imm12:$rs2)),
+          (SH2ADD (SRLIW GPR:$rs1, 1), GPR:$rs2)>;
+def : Pat<(i64 (add (shl (binop_oneuse<and> GPR:$rs1, 0xFFFFFFFE), (i64 2)),
+                    non_imm12:$rs2)),
+          (SH3ADD (SRLIW GPR:$rs1, 1), GPR:$rs2)>;
+def : Pat<(i64 (add (shl (binop_oneuse<and> GPR:$rs1, 0xFFFFFFFC), (i64 1)),
+                    non_imm12:$rs2)),
+          (SH3ADD (SRLIW GPR:$rs1, 2), GPR:$rs2)>;
 } // Predicates = [HasStdExtZba, IsRV64]
 
 let Predicates = [HasStdExtZbcOrZbkc] in {
diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll
index c6af0b7..06d256e 100644
--- a/llvm/test/CodeGen/RISCV/rv64zba.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zba.ll
@@ -1310,7 +1310,6 @@ define i64 @srliw_3_sh3add(i64* %0, i32 signext %1) {
   ret i64 %6
 }
 
-; FIXME: This should select sh2add with Zba.
 define signext i32 @srliw_1_sh2add(i32* %0, i32 signext %1) {
 ; RV64I-LABEL: srliw_1_sh2add:
 ; RV64I:       # %bb.0:
@@ -1322,10 +1321,8 @@ define signext i32 @srliw_1_sh2add(i32* %0, i32 signext %1) {
 ;
 ; RV64ZBA-LABEL: srliw_1_sh2add:
 ; RV64ZBA:       # %bb.0:
-; RV64ZBA-NEXT:    li a2, -2
-; RV64ZBA-NEXT:    zext.w a2, a2
-; RV64ZBA-NEXT:    and a1, a1, a2
-; RV64ZBA-NEXT:    sh1add a0, a1, a0
+; RV64ZBA-NEXT:    srliw a1, a1, 1
+; RV64ZBA-NEXT:    sh2add a0, a1, a0
 ; RV64ZBA-NEXT:    lw a0, 0(a0)
 ; RV64ZBA-NEXT:    ret
   %3 = lshr i32 %1, 1
@@ -1335,7 +1332,6 @@ define signext i32 @srliw_1_sh2add(i32* %0, i32 signext %1) {
   ret i32 %6
 }
 
-; FIXME: This should select sh3add with Zba.
 define i64 @srliw_1_sh3add(i64* %0, i32 signext %1) {
 ; RV64I-LABEL: srliw_1_sh3add:
 ; RV64I:       # %bb.0:
@@ -1347,10 +1343,8 @@ define i64 @srliw_1_sh3add(i64* %0, i32 signext %1) {
 ;
 ; RV64ZBA-LABEL: srliw_1_sh3add:
 ; RV64ZBA:       # %bb.0:
-; RV64ZBA-NEXT:    li a2, -2
-; RV64ZBA-NEXT:    zext.w a2, a2
-; RV64ZBA-NEXT:    and a1, a1, a2
-; RV64ZBA-NEXT:    sh2add a0, a1, a0
+; RV64ZBA-NEXT:    srliw a1, a1, 1
+; RV64ZBA-NEXT:    sh3add a0, a1, a0
 ; RV64ZBA-NEXT:    ld a0, 0(a0)
 ; RV64ZBA-NEXT:    ret
   %3 = lshr i32 %1, 1
@@ -1360,7 +1354,6 @@ define i64 @srliw_1_sh3add(i64* %0, i32 signext %1) {
   ret i64 %6
 }
 
-; FIXME: This should select sh3add with Zba.
 define i64 @srliw_2_sh3add(i64* %0, i32 signext %1) {
 ; RV64I-LABEL: srliw_2_sh3add:
 ; RV64I:       # %bb.0:
@@ -1372,10 +1365,8 @@ define i64 @srliw_2_sh3add(i64* %0, i32 signext %1) {
 ;
 ; RV64ZBA-LABEL: srliw_2_sh3add:
 ; RV64ZBA:       # %bb.0:
-; RV64ZBA-NEXT:    li a2, -4
-; RV64ZBA-NEXT:    zext.w a2, a2
-; RV64ZBA-NEXT:    and a1, a1, a2
-; RV64ZBA-NEXT:    sh1add a0, a1, a0
+; RV64ZBA-NEXT:    srliw a1, a1, 2
+; RV64ZBA-NEXT:    sh3add a0, a1, a0
 ; RV64ZBA-NEXT:    ld a0, 0(a0)
 ; RV64ZBA-NEXT:    ret
   %3 = lshr i32 %1, 2
-- 
2.7.4