From 90ecb862a003d581136842dcdc213315727d50e2 Mon Sep 17 00:00:00 2001 From: Sjoerd Meijer Date: Tue, 16 Mar 2021 11:53:43 +0000 Subject: [PATCH] [AArch64] Rewrite (add, csel) to cinc Don't rewrite an add instruction with 2 SET_CC operands into a csel instruction. The total instruction sequence uses an extra instruction and register. Preventing this allows us to match a `(add, csel)` pattern and rewrite this into a `cinc`. Differential Revision: https://reviews.llvm.org/D98704 --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 7 +++++ llvm/lib/Target/AArch64/AArch64InstrInfo.td | 5 +++ llvm/test/CodeGen/AArch64/arm64-csel.ll | 41 +++++++++++++++++++++++++ llvm/test/CodeGen/AArch64/half.ll | 12 ++++---- 4 files changed, 59 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 3c823f5..e3c928e1b 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -13190,6 +13190,13 @@ static SDValue performSetccAddFolding(SDNode *Op, SelectionDAG &DAG) { SDValue RHS = Op->getOperand(1); SetCCInfoAndKind InfoAndKind; + // If both operands are a SET_CC, then we don't want to perform this + // folding and create another csel as this results in more instructions + // (and higher register usage). + if (isSetCCOrZExtSetCC(LHS, InfoAndKind) && + isSetCCOrZExtSetCC(RHS, InfoAndKind)) + return SDValue(); + // If neither operand is a SET_CC, give up. if (!isSetCCOrZExtSetCC(LHS, InfoAndKind)) { std::swap(LHS, RHS); diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index d5dd0ae..338963f 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -2162,6 +2162,11 @@ def : Pat<(AArch64csel (i32 -1), GPR32:$fval, (i32 imm:$cc), NZCV), def : Pat<(AArch64csel (i64 -1), GPR64:$fval, (i32 imm:$cc), NZCV), (CSINVXr GPR64:$fval, XZR, (i32 (inv_cond_XFORM imm:$cc)))>; +def : Pat<(add GPR32:$val, (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV)), + (CSINCWr GPR32:$val, GPR32:$val, (i32 imm:$cc))>; +def : Pat<(add GPR64:$val, (zext (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV))), + (CSINCXr GPR64:$val, GPR64:$val, (i32 imm:$cc))>; + // The inverse of the condition code from the alias instruction is what is used // in the aliased instruction. The parser all ready inverts the condition code // for these aliases. diff --git a/llvm/test/CodeGen/AArch64/arm64-csel.ll b/llvm/test/CodeGen/AArch64/arm64-csel.ll index f031710..44e951e 100644 --- a/llvm/test/CodeGen/AArch64/arm64-csel.ll +++ b/llvm/test/CodeGen/AArch64/arm64-csel.ll @@ -269,3 +269,44 @@ define i64 @foo23(i64 %x) { %res = select i1 %cmp, i64 1, i64 6 ret i64 %res } + +define i16 @foo24(i8* nocapture readonly %A, i8* nocapture readonly %B) { +; CHECK-LABEL: foo24: +; CHECK: ldrb w[[W8:[0-9]+]], [x1] +; CHECK-NEXT: ldrb w[[W9:[0-9]+]], [x0] +; CHECK-NEXT: cmp w[[W8]], #33 +; CHECK-NEXT: cset w[[W8]], hi +; CHECK-NEXT: cmp w[[W9]], #3 +; CHECK-NEXT: cinc w0, w[[W8]], hi +; CHECK-NEXT: ret +entry: + %0 = load i8, i8* %A, align 1 + %cmp = icmp ugt i8 %0, 3 + %conv1 = zext i1 %cmp to i16 + %1 = load i8, i8* %B, align 1 + %cmp4 = icmp ugt i8 %1, 33 + %conv5 = zext i1 %cmp4 to i16 + %add = add nuw nsw i16 %conv5, %conv1 + ret i16 %add +} + +define i64 @foo25(i64* nocapture readonly %A, i64* nocapture readonly %B) { +; CHECK-LABEL: foo25: +; CHECK: ldr x[[X8:[0-9]+]], [x1] +; CHECK-NEXT: ldr x[[X9:[0-9]+]], [x0] +; CHECK-NEXT: cmp x[[X8]], #33 +; CHECK-NEXT: cset w[[W8]], hi +; CHECK-NEXT: cmp x[[X9]], #3 +; CHECK-NEXT: cinc x0, x[[X8]], hi +; CHECK-NEXT: ret +entry: + %0 = load i64, i64* %A, align 1 + %cmp = icmp ugt i64 %0, 3 + %conv1 = zext i1 %cmp to i64 + %1 = load i64, i64* %B, align 1 + %cmp4 = icmp ugt i64 %1, 33 + %conv5 = zext i1 %cmp4 to i64 + %add = add nuw nsw i64 %conv5, %conv1 + ret i64 %add +} + diff --git a/llvm/test/CodeGen/AArch64/half.ll b/llvm/test/CodeGen/AArch64/half.ll index b815c53..ab64cc0 100644 --- a/llvm/test/CodeGen/AArch64/half.ll +++ b/llvm/test/CodeGen/AArch64/half.ll @@ -107,12 +107,12 @@ define i16 @test_fccmp(i1 %a, i16 %in) { ; CHECK-NEXT: movk w9, #15428, lsl #16 ; CHECK-NEXT: fmov s1, w8 ; CHECK-NEXT: fcmp s0, s1 -; CHECK-NEXT: fmov s1, w9 -; CHECK-NEXT: cset w8, pl -; CHECK-NEXT: fccmp s0, s1, #8, pl -; CHECK-NEXT: mov w9, #4 -; CHECK-NEXT: csinc w9, w9, wzr, mi -; CHECK-NEXT: add w0, w8, w9 +; CHECK-NEXT: fmov s2, w9 +; CHECK-NEXT: mov w10, #4 +; CHECK-NEXT: fccmp s0, s2, #8, pl +; CHECK-NEXT: csinc w8, w10, wzr, mi +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: cinc w0, w8, pl ; CHECK-NEXT: ret %f16 = bitcast i16 %in to half %cmp0 = fcmp ogt half 0xH3333, %f16 -- 2.7.4