From 7e4ab9d5b8b855602c602fe0908afcf078ce3f20 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 2 Jul 2022 23:11:14 -0700 Subject: [PATCH] [RISCV] Add more SHXADD isel patterns. This handles the code we get for int foo(int* x, unsigned y) { return x[y >> 1]; } The shift right and the shl will get DAG combined into (shl (and X, 0xfffffffe), 1). We have custom isel to match the shl+and, but with Zba the (add (shl X, 1), Y) part will get matched and leave the and to be iseled by itself. This commit adds a larger pattern that includes the and. --- llvm/lib/Target/RISCV/RISCVInstrInfoZb.td | 12 ++++++++++++ llvm/test/CodeGen/RISCV/rv64zba.ll | 21 ++++++--------------- 2 files changed, 18 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td index 0980d90..9532d1d 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td @@ -1190,6 +1190,18 @@ def : Pat<(i64 (add (and GPR:$rs1, 0x3FFFFFFFC), non_imm12:$rs2)), (SH2ADD_UW (SRLI GPR:$rs1, 2), GPR:$rs2)>; def : Pat<(i64 (add (and GPR:$rs1, 0x7FFFFFFF8), non_imm12:$rs2)), (SH3ADD_UW (SRLI GPR:$rs1, 3), GPR:$rs2)>; + +// Use SRLIW to shift out the LSBs and zero the upper 32-bits. Use SHXADD to +// shift zeros into the LSBs the addition shl amount. +def : Pat<(i64 (add (shl (binop_oneuse GPR:$rs1, 0xFFFFFFFE), (i64 1)), + non_imm12:$rs2)), + (SH2ADD (SRLIW GPR:$rs1, 1), GPR:$rs2)>; +def : Pat<(i64 (add (shl (binop_oneuse GPR:$rs1, 0xFFFFFFFE), (i64 2)), + non_imm12:$rs2)), + (SH3ADD (SRLIW GPR:$rs1, 1), GPR:$rs2)>; +def : Pat<(i64 (add (shl (binop_oneuse GPR:$rs1, 0xFFFFFFFC), (i64 1)), + non_imm12:$rs2)), + (SH3ADD (SRLIW GPR:$rs1, 2), GPR:$rs2)>; } // Predicates = [HasStdExtZba, IsRV64] let Predicates = [HasStdExtZbcOrZbkc] in { diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll index c6af0b7..06d256e 100644 --- a/llvm/test/CodeGen/RISCV/rv64zba.ll +++ b/llvm/test/CodeGen/RISCV/rv64zba.ll @@ -1310,7 +1310,6 @@ define i64 @srliw_3_sh3add(i64* %0, i32 signext %1) { ret i64 %6 } -; FIXME: This should select sh2add with Zba. define signext i32 @srliw_1_sh2add(i32* %0, i32 signext %1) { ; RV64I-LABEL: srliw_1_sh2add: ; RV64I: # %bb.0: @@ -1322,10 +1321,8 @@ define signext i32 @srliw_1_sh2add(i32* %0, i32 signext %1) { ; ; RV64ZBA-LABEL: srliw_1_sh2add: ; RV64ZBA: # %bb.0: -; RV64ZBA-NEXT: li a2, -2 -; RV64ZBA-NEXT: zext.w a2, a2 -; RV64ZBA-NEXT: and a1, a1, a2 -; RV64ZBA-NEXT: sh1add a0, a1, a0 +; RV64ZBA-NEXT: srliw a1, a1, 1 +; RV64ZBA-NEXT: sh2add a0, a1, a0 ; RV64ZBA-NEXT: lw a0, 0(a0) ; RV64ZBA-NEXT: ret %3 = lshr i32 %1, 1 @@ -1335,7 +1332,6 @@ define signext i32 @srliw_1_sh2add(i32* %0, i32 signext %1) { ret i32 %6 } -; FIXME: This should select sh3add with Zba. define i64 @srliw_1_sh3add(i64* %0, i32 signext %1) { ; RV64I-LABEL: srliw_1_sh3add: ; RV64I: # %bb.0: @@ -1347,10 +1343,8 @@ define i64 @srliw_1_sh3add(i64* %0, i32 signext %1) { ; ; RV64ZBA-LABEL: srliw_1_sh3add: ; RV64ZBA: # %bb.0: -; RV64ZBA-NEXT: li a2, -2 -; RV64ZBA-NEXT: zext.w a2, a2 -; RV64ZBA-NEXT: and a1, a1, a2 -; RV64ZBA-NEXT: sh2add a0, a1, a0 +; RV64ZBA-NEXT: srliw a1, a1, 1 +; RV64ZBA-NEXT: sh3add a0, a1, a0 ; RV64ZBA-NEXT: ld a0, 0(a0) ; RV64ZBA-NEXT: ret %3 = lshr i32 %1, 1 @@ -1360,7 +1354,6 @@ define i64 @srliw_1_sh3add(i64* %0, i32 signext %1) { ret i64 %6 } -; FIXME: This should select sh3add with Zba. define i64 @srliw_2_sh3add(i64* %0, i32 signext %1) { ; RV64I-LABEL: srliw_2_sh3add: ; RV64I: # %bb.0: @@ -1372,10 +1365,8 @@ define i64 @srliw_2_sh3add(i64* %0, i32 signext %1) { ; ; RV64ZBA-LABEL: srliw_2_sh3add: ; RV64ZBA: # %bb.0: -; RV64ZBA-NEXT: li a2, -4 -; RV64ZBA-NEXT: zext.w a2, a2 -; RV64ZBA-NEXT: and a1, a1, a2 -; RV64ZBA-NEXT: sh1add a0, a1, a0 +; RV64ZBA-NEXT: srliw a1, a1, 2 +; RV64ZBA-NEXT: sh3add a0, a1, a0 ; RV64ZBA-NEXT: ld a0, 0(a0) ; RV64ZBA-NEXT: ret %3 = lshr i32 %1, 2 -- 2.7.4