From b42ff9fb038206c7967e22ceef2c7ea8275dc198 Mon Sep 17 00:00:00 2001 From: Michael Munday Date: Mon, 18 Jan 2021 17:30:19 +0000 Subject: [PATCH] [RISCV][NFC] Increase test coverage of Zbt extension Add Zbt (ternary) extension code generation to the select lowering tests since it can have a significant impact on how select is lowered. While we are here make the neg-abs commands more consistent with the other tests. Reviewed By: lenary Differential Revision: https://reviews.llvm.org/D94798 --- llvm/test/CodeGen/RISCV/neg-abs.ll | 186 +++++++++----- llvm/test/CodeGen/RISCV/select-and.ll | 58 +++++ .../RISCV/{bare-select.ll => select-bare.ll} | 22 ++ llvm/test/CodeGen/RISCV/select-cc.ll | 56 +++++ llvm/test/CodeGen/RISCV/select-const.ll | 156 ++++++++++++ .../test/CodeGen/RISCV/select-optimize-multiple.ll | 272 +++++++++++++++++++++ .../CodeGen/RISCV/select-optimize-multiple.mir | 90 +++++++ llvm/test/CodeGen/RISCV/select-or.ll | 58 +++++ 8 files changed, 840 insertions(+), 58 deletions(-) rename llvm/test/CodeGen/RISCV/{bare-select.ll => select-bare.ll} (57%) diff --git a/llvm/test/CodeGen/RISCV/neg-abs.ll b/llvm/test/CodeGen/RISCV/neg-abs.ll index b5e5d2a..f2e7579 100644 --- a/llvm/test/CodeGen/RISCV/neg-abs.ll +++ b/llvm/test/CodeGen/RISCV/neg-abs.ll @@ -1,43 +1,77 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -verify-machineinstrs -mtriple=riscv32-unknown-unknown | FileCheck %s --check-prefix=RV32 -; RUN: llc < %s -verify-machineinstrs -mtriple=riscv64-unknown-unknown | FileCheck %s --check-prefix=RV64 +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefix=RV32I +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zbt -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefix=RV32IBT +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefix=RV64I +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zbt -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefix=RV64IBT declare i32 @llvm.abs.i32(i32, i1 immarg) declare i64 @llvm.abs.i64(i64, i1 immarg) define i32 @neg_abs32(i32 %x) { -; RV32-LABEL: neg_abs32: -; RV32: # %bb.0: -; RV32-NEXT: srai a1, a0, 31 -; RV32-NEXT: xor a0, a0, a1 -; RV32-NEXT: sub a0, a1, a0 -; RV32-NEXT: ret +; RV32I-LABEL: neg_abs32: +; RV32I: # %bb.0: +; RV32I-NEXT: srai a1, a0, 31 +; RV32I-NEXT: xor a0, a0, a1 +; RV32I-NEXT: sub a0, a1, a0 +; RV32I-NEXT: ret ; -; RV64-LABEL: neg_abs32: -; RV64: # %bb.0: -; RV64-NEXT: sraiw a1, a0, 31 -; RV64-NEXT: xor a0, a0, a1 -; RV64-NEXT: subw a0, a1, a0 -; RV64-NEXT: ret +; RV32IBT-LABEL: neg_abs32: +; RV32IBT: # %bb.0: +; RV32IBT-NEXT: srai a1, a0, 31 +; RV32IBT-NEXT: xor a0, a0, a1 +; RV32IBT-NEXT: sub a0, a1, a0 +; RV32IBT-NEXT: ret +; +; RV64I-LABEL: neg_abs32: +; RV64I: # %bb.0: +; RV64I-NEXT: sraiw a1, a0, 31 +; RV64I-NEXT: xor a0, a0, a1 +; RV64I-NEXT: subw a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64IBT-LABEL: neg_abs32: +; RV64IBT: # %bb.0: +; RV64IBT-NEXT: sraiw a1, a0, 31 +; RV64IBT-NEXT: xor a0, a0, a1 +; RV64IBT-NEXT: subw a0, a1, a0 +; RV64IBT-NEXT: ret %abs = tail call i32 @llvm.abs.i32(i32 %x, i1 true) %neg = sub nsw i32 0, %abs ret i32 %neg } define i32 @select_neg_abs32(i32 %x) { -; RV32-LABEL: select_neg_abs32: -; RV32: # %bb.0: -; RV32-NEXT: srai a1, a0, 31 -; RV32-NEXT: xor a0, a0, a1 -; RV32-NEXT: sub a0, a1, a0 -; RV32-NEXT: ret +; RV32I-LABEL: select_neg_abs32: +; RV32I: # %bb.0: +; RV32I-NEXT: srai a1, a0, 31 +; RV32I-NEXT: xor a0, a0, a1 +; RV32I-NEXT: sub a0, a1, a0 +; RV32I-NEXT: ret +; +; RV32IBT-LABEL: select_neg_abs32: +; RV32IBT: # %bb.0: +; RV32IBT-NEXT: srai a1, a0, 31 +; RV32IBT-NEXT: xor a0, a0, a1 +; RV32IBT-NEXT: sub a0, a1, a0 +; RV32IBT-NEXT: ret +; +; RV64I-LABEL: select_neg_abs32: +; RV64I: # %bb.0: +; RV64I-NEXT: sraiw a1, a0, 31 +; RV64I-NEXT: xor a0, a0, a1 +; RV64I-NEXT: subw a0, a1, a0 +; RV64I-NEXT: ret ; -; RV64-LABEL: select_neg_abs32: -; RV64: # %bb.0: -; RV64-NEXT: sraiw a1, a0, 31 -; RV64-NEXT: xor a0, a0, a1 -; RV64-NEXT: subw a0, a1, a0 -; RV64-NEXT: ret +; RV64IBT-LABEL: select_neg_abs32: +; RV64IBT: # %bb.0: +; RV64IBT-NEXT: sraiw a1, a0, 31 +; RV64IBT-NEXT: xor a0, a0, a1 +; RV64IBT-NEXT: subw a0, a1, a0 +; RV64IBT-NEXT: ret %1 = icmp slt i32 %x, 0 %2 = sub nsw i32 0, %x %3 = select i1 %1, i32 %x, i32 %2 @@ -45,46 +79,82 @@ define i32 @select_neg_abs32(i32 %x) { } define i64 @neg_abs64(i64 %x) { -; RV32-LABEL: neg_abs64: -; RV32: # %bb.0: -; RV32-NEXT: srai a2, a1, 31 -; RV32-NEXT: xor a0, a0, a2 -; RV32-NEXT: sltu a3, a2, a0 -; RV32-NEXT: xor a1, a1, a2 -; RV32-NEXT: sub a1, a2, a1 -; RV32-NEXT: sub a1, a1, a3 -; RV32-NEXT: sub a0, a2, a0 -; RV32-NEXT: ret +; RV32I-LABEL: neg_abs64: +; RV32I: # %bb.0: +; RV32I-NEXT: srai a2, a1, 31 +; RV32I-NEXT: xor a0, a0, a2 +; RV32I-NEXT: sltu a3, a2, a0 +; RV32I-NEXT: xor a1, a1, a2 +; RV32I-NEXT: sub a1, a2, a1 +; RV32I-NEXT: sub a1, a1, a3 +; RV32I-NEXT: sub a0, a2, a0 +; RV32I-NEXT: ret ; -; RV64-LABEL: neg_abs64: -; RV64: # %bb.0: -; RV64-NEXT: srai a1, a0, 63 -; RV64-NEXT: xor a0, a0, a1 -; RV64-NEXT: sub a0, a1, a0 -; RV64-NEXT: ret +; RV32IBT-LABEL: neg_abs64: +; RV32IBT: # %bb.0: +; RV32IBT-NEXT: srai a2, a1, 31 +; RV32IBT-NEXT: xor a0, a0, a2 +; RV32IBT-NEXT: sltu a3, a2, a0 +; RV32IBT-NEXT: xor a1, a1, a2 +; RV32IBT-NEXT: sub a1, a2, a1 +; RV32IBT-NEXT: sub a1, a1, a3 +; RV32IBT-NEXT: sub a0, a2, a0 +; RV32IBT-NEXT: ret +; +; RV64I-LABEL: neg_abs64: +; RV64I: # %bb.0: +; RV64I-NEXT: srai a1, a0, 63 +; RV64I-NEXT: xor a0, a0, a1 +; RV64I-NEXT: sub a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64IBT-LABEL: neg_abs64: +; RV64IBT: # %bb.0: +; RV64IBT-NEXT: srai a1, a0, 63 +; RV64IBT-NEXT: xor a0, a0, a1 +; RV64IBT-NEXT: sub a0, a1, a0 +; RV64IBT-NEXT: ret %abs = tail call i64 @llvm.abs.i64(i64 %x, i1 true) %neg = sub nsw i64 0, %abs ret i64 %neg } define i64 @select_neg_abs64(i64 %x) { -; RV32-LABEL: select_neg_abs64: -; RV32: # %bb.0: -; RV32-NEXT: srai a2, a1, 31 -; RV32-NEXT: xor a0, a0, a2 -; RV32-NEXT: sltu a3, a2, a0 -; RV32-NEXT: xor a1, a1, a2 -; RV32-NEXT: sub a1, a2, a1 -; RV32-NEXT: sub a1, a1, a3 -; RV32-NEXT: sub a0, a2, a0 -; RV32-NEXT: ret +; RV32I-LABEL: select_neg_abs64: +; RV32I: # %bb.0: +; RV32I-NEXT: srai a2, a1, 31 +; RV32I-NEXT: xor a0, a0, a2 +; RV32I-NEXT: sltu a3, a2, a0 +; RV32I-NEXT: xor a1, a1, a2 +; RV32I-NEXT: sub a1, a2, a1 +; RV32I-NEXT: sub a1, a1, a3 +; RV32I-NEXT: sub a0, a2, a0 +; RV32I-NEXT: ret +; +; RV32IBT-LABEL: select_neg_abs64: +; RV32IBT: # %bb.0: +; RV32IBT-NEXT: srai a2, a1, 31 +; RV32IBT-NEXT: xor a0, a0, a2 +; RV32IBT-NEXT: sltu a3, a2, a0 +; RV32IBT-NEXT: xor a1, a1, a2 +; RV32IBT-NEXT: sub a1, a2, a1 +; RV32IBT-NEXT: sub a1, a1, a3 +; RV32IBT-NEXT: sub a0, a2, a0 +; RV32IBT-NEXT: ret +; +; RV64I-LABEL: select_neg_abs64: +; RV64I: # %bb.0: +; RV64I-NEXT: srai a1, a0, 63 +; RV64I-NEXT: xor a0, a0, a1 +; RV64I-NEXT: sub a0, a1, a0 +; RV64I-NEXT: ret ; -; RV64-LABEL: select_neg_abs64: -; RV64: # %bb.0: -; RV64-NEXT: srai a1, a0, 63 -; RV64-NEXT: xor a0, a0, a1 -; RV64-NEXT: sub a0, a1, a0 -; RV64-NEXT: ret +; RV64IBT-LABEL: select_neg_abs64: +; RV64IBT: # %bb.0: +; RV64IBT-NEXT: srai a1, a0, 63 +; RV64IBT-NEXT: xor a0, a0, a1 +; RV64IBT-NEXT: sub a0, a1, a0 +; RV64IBT-NEXT: ret %1 = icmp slt i64 %x, 0 %2 = sub nsw i64 0, %x %3 = select i1 %1, i64 %x, i64 %2 diff --git a/llvm/test/CodeGen/RISCV/select-and.ll b/llvm/test/CodeGen/RISCV/select-and.ll index c7554d5..5160a6c 100644 --- a/llvm/test/CodeGen/RISCV/select-and.ll +++ b/llvm/test/CodeGen/RISCV/select-and.ll @@ -1,8 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV32I %s +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zbt -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV32IBT %s ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV64I %s +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zbt -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64IBT %s ;; There are a few different ways to lower (select (and A, B), X, Y). This test ;; ensures that we do so with as few branches as possible. @@ -18,6 +22,16 @@ define signext i32 @select_of_and(i1 zeroext %a, i1 zeroext %b, i32 signext %c, ; RV32I-NEXT: .LBB0_2: ; RV32I-NEXT: ret ; +; RV32IBT-LABEL: select_of_and: +; RV32IBT: # %bb.0: +; RV32IBT-NEXT: and a1, a0, a1 +; RV32IBT-NEXT: mv a0, a2 +; RV32IBT-NEXT: bnez a1, .LBB0_2 +; RV32IBT-NEXT: # %bb.1: +; RV32IBT-NEXT: mv a0, a3 +; RV32IBT-NEXT: .LBB0_2: +; RV32IBT-NEXT: ret +; ; RV64I-LABEL: select_of_and: ; RV64I: # %bb.0: ; RV64I-NEXT: and a1, a0, a1 @@ -27,6 +41,16 @@ define signext i32 @select_of_and(i1 zeroext %a, i1 zeroext %b, i32 signext %c, ; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: .LBB0_2: ; RV64I-NEXT: ret +; +; RV64IBT-LABEL: select_of_and: +; RV64IBT: # %bb.0: +; RV64IBT-NEXT: and a1, a0, a1 +; RV64IBT-NEXT: mv a0, a2 +; RV64IBT-NEXT: bnez a1, .LBB0_2 +; RV64IBT-NEXT: # %bb.1: +; RV64IBT-NEXT: mv a0, a3 +; RV64IBT-NEXT: .LBB0_2: +; RV64IBT-NEXT: ret %1 = and i1 %a, %b %2 = select i1 %1, i32 %c, i32 %d ret i32 %2 @@ -53,6 +77,23 @@ define signext i32 @if_of_and(i1 zeroext %a, i1 zeroext %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32IBT-LABEL: if_of_and: +; RV32IBT: # %bb.0: +; RV32IBT-NEXT: addi sp, sp, -16 +; RV32IBT-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IBT-NEXT: and a0, a0, a1 +; RV32IBT-NEXT: addi a1, zero, 1 +; RV32IBT-NEXT: bne a0, a1, .LBB1_2 +; RV32IBT-NEXT: # %bb.1: # %if.then +; RV32IBT-NEXT: call both@plt +; RV32IBT-NEXT: j .LBB1_3 +; RV32IBT-NEXT: .LBB1_2: # %if.else +; RV32IBT-NEXT: call neither@plt +; RV32IBT-NEXT: .LBB1_3: # %if.end +; RV32IBT-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IBT-NEXT: addi sp, sp, 16 +; RV32IBT-NEXT: ret +; ; RV64I-LABEL: if_of_and: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -69,6 +110,23 @@ define signext i32 @if_of_and(i1 zeroext %a, i1 zeroext %b) nounwind { ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; RV64IBT-LABEL: if_of_and: +; RV64IBT: # %bb.0: +; RV64IBT-NEXT: addi sp, sp, -16 +; RV64IBT-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IBT-NEXT: and a0, a0, a1 +; RV64IBT-NEXT: addi a1, zero, 1 +; RV64IBT-NEXT: bne a0, a1, .LBB1_2 +; RV64IBT-NEXT: # %bb.1: # %if.then +; RV64IBT-NEXT: call both@plt +; RV64IBT-NEXT: j .LBB1_3 +; RV64IBT-NEXT: .LBB1_2: # %if.else +; RV64IBT-NEXT: call neither@plt +; RV64IBT-NEXT: .LBB1_3: # %if.end +; RV64IBT-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IBT-NEXT: addi sp, sp, 16 +; RV64IBT-NEXT: ret %1 = and i1 %a, %b br i1 %1, label %if.then, label %if.else diff --git a/llvm/test/CodeGen/RISCV/bare-select.ll b/llvm/test/CodeGen/RISCV/select-bare.ll similarity index 57% rename from llvm/test/CodeGen/RISCV/bare-select.ll rename to llvm/test/CodeGen/RISCV/select-bare.ll index cf8fe96..f3feb68 100644 --- a/llvm/test/CodeGen/RISCV/bare-select.ll +++ b/llvm/test/CodeGen/RISCV/select-bare.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV32I +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zbt -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32IBT define i32 @bare_select(i1 %a, i32 %b, i32 %c) nounwind { ; RV32I-LABEL: bare_select: @@ -12,6 +14,16 @@ define i32 @bare_select(i1 %a, i32 %b, i32 %c) nounwind { ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: .LBB0_2: ; RV32I-NEXT: ret +; +; RV32IBT-LABEL: bare_select: +; RV32IBT: # %bb.0: +; RV32IBT-NEXT: andi a3, a0, 1 +; RV32IBT-NEXT: mv a0, a1 +; RV32IBT-NEXT: bnez a3, .LBB0_2 +; RV32IBT-NEXT: # %bb.1: +; RV32IBT-NEXT: mv a0, a2 +; RV32IBT-NEXT: .LBB0_2: +; RV32IBT-NEXT: ret %1 = select i1 %a, i32 %b, i32 %c ret i32 %1 } @@ -26,6 +38,16 @@ define float @bare_select_float(i1 %a, float %b, float %c) nounwind { ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: .LBB1_2: ; RV32I-NEXT: ret +; +; RV32IBT-LABEL: bare_select_float: +; RV32IBT: # %bb.0: +; RV32IBT-NEXT: andi a3, a0, 1 +; RV32IBT-NEXT: mv a0, a1 +; RV32IBT-NEXT: bnez a3, .LBB1_2 +; RV32IBT-NEXT: # %bb.1: +; RV32IBT-NEXT: mv a0, a2 +; RV32IBT-NEXT: .LBB1_2: +; RV32IBT-NEXT: ret %1 = select i1 %a, float %b, float %c ret float %1 } diff --git a/llvm/test/CodeGen/RISCV/select-cc.ll b/llvm/test/CodeGen/RISCV/select-cc.ll index 7faad3b..405c88a 100644 --- a/llvm/test/CodeGen/RISCV/select-cc.ll +++ b/llvm/test/CodeGen/RISCV/select-cc.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -disable-block-placement -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV32I %s +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zbt -disable-block-placement -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV32IBT %s define i32 @foo(i32 %a, i32 *%b) nounwind { ; RV32I-LABEL: foo: @@ -56,6 +58,60 @@ define i32 @foo(i32 %a, i32 *%b) nounwind { ; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: .LBB0_20: ; RV32I-NEXT: ret +; +; RV32IBT-LABEL: foo: +; RV32IBT: # %bb.0: +; RV32IBT-NEXT: lw a2, 0(a1) +; RV32IBT-NEXT: beq a0, a2, .LBB0_2 +; RV32IBT-NEXT: # %bb.1: +; RV32IBT-NEXT: mv a0, a2 +; RV32IBT-NEXT: .LBB0_2: +; RV32IBT-NEXT: lw a2, 0(a1) +; RV32IBT-NEXT: bne a0, a2, .LBB0_4 +; RV32IBT-NEXT: # %bb.3: +; RV32IBT-NEXT: mv a0, a2 +; RV32IBT-NEXT: .LBB0_4: +; RV32IBT-NEXT: lw a2, 0(a1) +; RV32IBT-NEXT: bltu a2, a0, .LBB0_6 +; RV32IBT-NEXT: # %bb.5: +; RV32IBT-NEXT: mv a0, a2 +; RV32IBT-NEXT: .LBB0_6: +; RV32IBT-NEXT: lw a2, 0(a1) +; RV32IBT-NEXT: bgeu a0, a2, .LBB0_8 +; RV32IBT-NEXT: # %bb.7: +; RV32IBT-NEXT: mv a0, a2 +; RV32IBT-NEXT: .LBB0_8: +; RV32IBT-NEXT: lw a2, 0(a1) +; RV32IBT-NEXT: bltu a0, a2, .LBB0_10 +; RV32IBT-NEXT: # %bb.9: +; RV32IBT-NEXT: mv a0, a2 +; RV32IBT-NEXT: .LBB0_10: +; RV32IBT-NEXT: lw a2, 0(a1) +; RV32IBT-NEXT: bgeu a2, a0, .LBB0_12 +; RV32IBT-NEXT: # %bb.11: +; RV32IBT-NEXT: mv a0, a2 +; RV32IBT-NEXT: .LBB0_12: +; RV32IBT-NEXT: lw a2, 0(a1) +; RV32IBT-NEXT: blt a2, a0, .LBB0_14 +; RV32IBT-NEXT: # %bb.13: +; RV32IBT-NEXT: mv a0, a2 +; RV32IBT-NEXT: .LBB0_14: +; RV32IBT-NEXT: lw a2, 0(a1) +; RV32IBT-NEXT: bge a0, a2, .LBB0_16 +; RV32IBT-NEXT: # %bb.15: +; RV32IBT-NEXT: mv a0, a2 +; RV32IBT-NEXT: .LBB0_16: +; RV32IBT-NEXT: lw a2, 0(a1) +; RV32IBT-NEXT: blt a0, a2, .LBB0_18 +; RV32IBT-NEXT: # %bb.17: +; RV32IBT-NEXT: mv a0, a2 +; RV32IBT-NEXT: .LBB0_18: +; RV32IBT-NEXT: lw a1, 0(a1) +; RV32IBT-NEXT: bge a1, a0, .LBB0_20 +; RV32IBT-NEXT: # %bb.19: +; RV32IBT-NEXT: mv a0, a1 +; RV32IBT-NEXT: .LBB0_20: +; RV32IBT-NEXT: ret %val1 = load volatile i32, i32* %b %tst1 = icmp eq i32 %a, %val1 %val2 = select i1 %tst1, i32 %a, i32 %val1 diff --git a/llvm/test/CodeGen/RISCV/select-const.ll b/llvm/test/CodeGen/RISCV/select-const.ll index 40e6ee10..7c9b42f 100644 --- a/llvm/test/CodeGen/RISCV/select-const.ll +++ b/llvm/test/CodeGen/RISCV/select-const.ll @@ -3,10 +3,18 @@ ; RUN: | FileCheck -check-prefix=RV32I %s ; RUN: llc -mtriple=riscv32 -mattr=+f -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV32IF %s +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zbt -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV32IBT %s +; RUN: llc -mtriple=riscv32 -mattr=+f,+experimental-zbt -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV32IFBT %s ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV64I %s ; RUN: llc -mtriple=riscv64 -mattr=+f,+d -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV64IFD %s +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zbt -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64IBT %s +; RUN: llc -mtriple=riscv64 -mattr=+f,+d,+experimental-zbt -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64IFDBT %s ;; This tests how good we are at materialising constants using `select`. The aim ;; is that we do so without a branch if possible (at the moment our lowering of @@ -25,6 +33,14 @@ define signext i32 @select_const_int_easy(i1 zeroext %a) nounwind { ; RV32IF: # %bb.0: ; RV32IF-NEXT: ret ; +; RV32IBT-LABEL: select_const_int_easy: +; RV32IBT: # %bb.0: +; RV32IBT-NEXT: ret +; +; RV32IFBT-LABEL: select_const_int_easy: +; RV32IFBT: # %bb.0: +; RV32IFBT-NEXT: ret +; ; RV64I-LABEL: select_const_int_easy: ; RV64I: # %bb.0: ; RV64I-NEXT: ret @@ -32,6 +48,14 @@ define signext i32 @select_const_int_easy(i1 zeroext %a) nounwind { ; RV64IFD-LABEL: select_const_int_easy: ; RV64IFD: # %bb.0: ; RV64IFD-NEXT: ret +; +; RV64IBT-LABEL: select_const_int_easy: +; RV64IBT: # %bb.0: +; RV64IBT-NEXT: ret +; +; RV64IFDBT-LABEL: select_const_int_easy: +; RV64IFDBT: # %bb.0: +; RV64IFDBT-NEXT: ret %1 = select i1 %a, i32 1, i32 0 ret i32 %1 } @@ -49,6 +73,18 @@ define signext i32 @select_const_int_one_away(i1 zeroext %a) nounwind { ; RV32IF-NEXT: sub a0, a1, a0 ; RV32IF-NEXT: ret ; +; RV32IBT-LABEL: select_const_int_one_away: +; RV32IBT: # %bb.0: +; RV32IBT-NEXT: addi a1, zero, 4 +; RV32IBT-NEXT: sub a0, a1, a0 +; RV32IBT-NEXT: ret +; +; RV32IFBT-LABEL: select_const_int_one_away: +; RV32IFBT: # %bb.0: +; RV32IFBT-NEXT: addi a1, zero, 4 +; RV32IFBT-NEXT: sub a0, a1, a0 +; RV32IFBT-NEXT: ret +; ; RV64I-LABEL: select_const_int_one_away: ; RV64I: # %bb.0: ; RV64I-NEXT: addi a1, zero, 4 @@ -60,6 +96,18 @@ define signext i32 @select_const_int_one_away(i1 zeroext %a) nounwind { ; RV64IFD-NEXT: addi a1, zero, 4 ; RV64IFD-NEXT: sub a0, a1, a0 ; RV64IFD-NEXT: ret +; +; RV64IBT-LABEL: select_const_int_one_away: +; RV64IBT: # %bb.0: +; RV64IBT-NEXT: addi a1, zero, 4 +; RV64IBT-NEXT: sub a0, a1, a0 +; RV64IBT-NEXT: ret +; +; RV64IFDBT-LABEL: select_const_int_one_away: +; RV64IFDBT: # %bb.0: +; RV64IFDBT-NEXT: addi a1, zero, 4 +; RV64IFDBT-NEXT: sub a0, a1, a0 +; RV64IFDBT-NEXT: ret %1 = select i1 %a, i32 3, i32 4 ret i32 %1 } @@ -75,6 +123,16 @@ define signext i32 @select_const_int_pow2_zero(i1 zeroext %a) nounwind { ; RV32IF-NEXT: slli a0, a0, 2 ; RV32IF-NEXT: ret ; +; RV32IBT-LABEL: select_const_int_pow2_zero: +; RV32IBT: # %bb.0: +; RV32IBT-NEXT: slli a0, a0, 2 +; RV32IBT-NEXT: ret +; +; RV32IFBT-LABEL: select_const_int_pow2_zero: +; RV32IFBT: # %bb.0: +; RV32IFBT-NEXT: slli a0, a0, 2 +; RV32IFBT-NEXT: ret +; ; RV64I-LABEL: select_const_int_pow2_zero: ; RV64I: # %bb.0: ; RV64I-NEXT: slli a0, a0, 2 @@ -84,6 +142,16 @@ define signext i32 @select_const_int_pow2_zero(i1 zeroext %a) nounwind { ; RV64IFD: # %bb.0: ; RV64IFD-NEXT: slli a0, a0, 2 ; RV64IFD-NEXT: ret +; +; RV64IBT-LABEL: select_const_int_pow2_zero: +; RV64IBT: # %bb.0: +; RV64IBT-NEXT: slli a0, a0, 2 +; RV64IBT-NEXT: ret +; +; RV64IFDBT-LABEL: select_const_int_pow2_zero: +; RV64IFDBT: # %bb.0: +; RV64IFDBT-NEXT: slli a0, a0, 2 +; RV64IFDBT-NEXT: ret %1 = select i1 %a, i32 4, i32 0 ret i32 %1 } @@ -109,6 +177,26 @@ define signext i32 @select_const_int_harder(i1 zeroext %a) nounwind { ; RV32IF-NEXT: .LBB3_2: ; RV32IF-NEXT: ret ; +; RV32IBT-LABEL: select_const_int_harder: +; RV32IBT: # %bb.0: +; RV32IBT-NEXT: mv a1, a0 +; RV32IBT-NEXT: addi a0, zero, 6 +; RV32IBT-NEXT: bnez a1, .LBB3_2 +; RV32IBT-NEXT: # %bb.1: +; RV32IBT-NEXT: addi a0, zero, 38 +; RV32IBT-NEXT: .LBB3_2: +; RV32IBT-NEXT: ret +; +; RV32IFBT-LABEL: select_const_int_harder: +; RV32IFBT: # %bb.0: +; RV32IFBT-NEXT: mv a1, a0 +; RV32IFBT-NEXT: addi a0, zero, 6 +; RV32IFBT-NEXT: bnez a1, .LBB3_2 +; RV32IFBT-NEXT: # %bb.1: +; RV32IFBT-NEXT: addi a0, zero, 38 +; RV32IFBT-NEXT: .LBB3_2: +; RV32IFBT-NEXT: ret +; ; RV64I-LABEL: select_const_int_harder: ; RV64I: # %bb.0: ; RV64I-NEXT: mv a1, a0 @@ -128,6 +216,26 @@ define signext i32 @select_const_int_harder(i1 zeroext %a) nounwind { ; RV64IFD-NEXT: addi a0, zero, 38 ; RV64IFD-NEXT: .LBB3_2: ; RV64IFD-NEXT: ret +; +; RV64IBT-LABEL: select_const_int_harder: +; RV64IBT: # %bb.0: +; RV64IBT-NEXT: mv a1, a0 +; RV64IBT-NEXT: addi a0, zero, 6 +; RV64IBT-NEXT: bnez a1, .LBB3_2 +; RV64IBT-NEXT: # %bb.1: +; RV64IBT-NEXT: addi a0, zero, 38 +; RV64IBT-NEXT: .LBB3_2: +; RV64IBT-NEXT: ret +; +; RV64IFDBT-LABEL: select_const_int_harder: +; RV64IFDBT: # %bb.0: +; RV64IFDBT-NEXT: mv a1, a0 +; RV64IFDBT-NEXT: addi a0, zero, 6 +; RV64IFDBT-NEXT: bnez a1, .LBB3_2 +; RV64IFDBT-NEXT: # %bb.1: +; RV64IFDBT-NEXT: addi a0, zero, 38 +; RV64IFDBT-NEXT: .LBB3_2: +; RV64IFDBT-NEXT: ret %1 = select i1 %a, i32 6, i32 38 ret i32 %1 } @@ -157,6 +265,30 @@ define float @select_const_fp(i1 zeroext %a) nounwind { ; RV32IF-NEXT: fmv.x.w a0, ft0 ; RV32IF-NEXT: ret ; +; RV32IBT-LABEL: select_const_fp: +; RV32IBT: # %bb.0: +; RV32IBT-NEXT: mv a1, a0 +; RV32IBT-NEXT: lui a0, 263168 +; RV32IBT-NEXT: bnez a1, .LBB4_2 +; RV32IBT-NEXT: # %bb.1: +; RV32IBT-NEXT: lui a0, 264192 +; RV32IBT-NEXT: .LBB4_2: +; RV32IBT-NEXT: ret +; +; RV32IFBT-LABEL: select_const_fp: +; RV32IFBT: # %bb.0: +; RV32IFBT-NEXT: bnez a0, .LBB4_2 +; RV32IFBT-NEXT: # %bb.1: +; RV32IFBT-NEXT: lui a0, %hi(.LCPI4_0) +; RV32IFBT-NEXT: flw ft0, %lo(.LCPI4_0)(a0) +; RV32IFBT-NEXT: fmv.x.w a0, ft0 +; RV32IFBT-NEXT: ret +; RV32IFBT-NEXT: .LBB4_2: +; RV32IFBT-NEXT: lui a0, %hi(.LCPI4_1) +; RV32IFBT-NEXT: flw ft0, %lo(.LCPI4_1)(a0) +; RV32IFBT-NEXT: fmv.x.w a0, ft0 +; RV32IFBT-NEXT: ret +; ; RV64I-LABEL: select_const_fp: ; RV64I: # %bb.0: ; RV64I-NEXT: mv a1, a0 @@ -180,6 +312,30 @@ define float @select_const_fp(i1 zeroext %a) nounwind { ; RV64IFD-NEXT: flw ft0, %lo(.LCPI4_1)(a0) ; RV64IFD-NEXT: fmv.x.w a0, ft0 ; RV64IFD-NEXT: ret +; +; RV64IBT-LABEL: select_const_fp: +; RV64IBT: # %bb.0: +; RV64IBT-NEXT: mv a1, a0 +; RV64IBT-NEXT: lui a0, 263168 +; RV64IBT-NEXT: bnez a1, .LBB4_2 +; RV64IBT-NEXT: # %bb.1: +; RV64IBT-NEXT: lui a0, 264192 +; RV64IBT-NEXT: .LBB4_2: +; RV64IBT-NEXT: ret +; +; RV64IFDBT-LABEL: select_const_fp: +; RV64IFDBT: # %bb.0: +; RV64IFDBT-NEXT: bnez a0, .LBB4_2 +; RV64IFDBT-NEXT: # %bb.1: +; RV64IFDBT-NEXT: lui a0, %hi(.LCPI4_0) +; RV64IFDBT-NEXT: flw ft0, %lo(.LCPI4_0)(a0) +; RV64IFDBT-NEXT: fmv.x.w a0, ft0 +; RV64IFDBT-NEXT: ret +; RV64IFDBT-NEXT: .LBB4_2: +; RV64IFDBT-NEXT: lui a0, %hi(.LCPI4_1) +; RV64IFDBT-NEXT: flw ft0, %lo(.LCPI4_1)(a0) +; RV64IFDBT-NEXT: fmv.x.w a0, ft0 +; RV64IFDBT-NEXT: ret %1 = select i1 %a, float 3.0, float 4.0 ret float %1 } diff --git a/llvm/test/CodeGen/RISCV/select-optimize-multiple.ll b/llvm/test/CodeGen/RISCV/select-optimize-multiple.ll index 5dbaa90..35c1dd5 100644 --- a/llvm/test/CodeGen/RISCV/select-optimize-multiple.ll +++ b/llvm/test/CodeGen/RISCV/select-optimize-multiple.ll @@ -1,8 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV32I +; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-zbt -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32IBT ; RUN: llc -mtriple=riscv64 -mattr=+d -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV64I +; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zbt -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64IBT ; Selects of wide values are split into two selects, which can easily cause ; unnecessary control flow. Here we check some cases where we can currently @@ -21,6 +25,18 @@ define i64 @cmovcc64(i32 signext %a, i64 %b, i64 %c) nounwind { ; RV32I-NEXT: mv a1, a2 ; RV32I-NEXT: ret ; +; RV32IBT-LABEL: cmovcc64: +; RV32IBT: # %bb.0: # %entry +; RV32IBT-NEXT: addi a5, zero, 123 +; RV32IBT-NEXT: beq a0, a5, .LBB0_2 +; RV32IBT-NEXT: # %bb.1: # %entry +; RV32IBT-NEXT: mv a1, a3 +; RV32IBT-NEXT: mv a2, a4 +; RV32IBT-NEXT: .LBB0_2: # %entry +; RV32IBT-NEXT: mv a0, a1 +; RV32IBT-NEXT: mv a1, a2 +; RV32IBT-NEXT: ret +; ; RV64I-LABEL: cmovcc64: ; RV64I: # %bb.0: # %entry ; RV64I-NEXT: addi a3, zero, 123 @@ -30,6 +46,16 @@ define i64 @cmovcc64(i32 signext %a, i64 %b, i64 %c) nounwind { ; RV64I-NEXT: .LBB0_2: # %entry ; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: ret +; +; RV64IBT-LABEL: cmovcc64: +; RV64IBT: # %bb.0: # %entry +; RV64IBT-NEXT: addi a3, zero, 123 +; RV64IBT-NEXT: beq a0, a3, .LBB0_2 +; RV64IBT-NEXT: # %bb.1: # %entry +; RV64IBT-NEXT: mv a1, a2 +; RV64IBT-NEXT: .LBB0_2: # %entry +; RV64IBT-NEXT: mv a0, a1 +; RV64IBT-NEXT: ret entry: %cmp = icmp eq i32 %a, 123 %cond = select i1 %cmp, i64 %b, i64 %c @@ -76,6 +102,30 @@ define i128 @cmovcc128(i64 signext %a, i128 %b, i128 %c) nounwind { ; RV32I-NEXT: sw a6, 0(a0) ; RV32I-NEXT: ret ; +; RV32IBT-LABEL: cmovcc128: +; RV32IBT: # %bb.0: # %entry +; RV32IBT-NEXT: addi a6, a3, 12 +; RV32IBT-NEXT: addi a7, a4, 12 +; RV32IBT-NEXT: addi t0, a3, 8 +; RV32IBT-NEXT: addi t1, a4, 8 +; RV32IBT-NEXT: addi t2, a3, 4 +; RV32IBT-NEXT: addi a5, a4, 4 +; RV32IBT-NEXT: xori a1, a1, 123 +; RV32IBT-NEXT: or a1, a1, a2 +; RV32IBT-NEXT: cmov a2, a1, a4, a3 +; RV32IBT-NEXT: cmov a3, a1, a5, t2 +; RV32IBT-NEXT: cmov a4, a1, t1, t0 +; RV32IBT-NEXT: cmov a1, a1, a7, a6 +; RV32IBT-NEXT: lw a1, 0(a1) +; RV32IBT-NEXT: lw a4, 0(a4) +; RV32IBT-NEXT: lw a3, 0(a3) +; RV32IBT-NEXT: lw a2, 0(a2) +; RV32IBT-NEXT: sw a1, 12(a0) +; RV32IBT-NEXT: sw a4, 8(a0) +; RV32IBT-NEXT: sw a3, 4(a0) +; RV32IBT-NEXT: sw a2, 0(a0) +; RV32IBT-NEXT: ret +; ; RV64I-LABEL: cmovcc128: ; RV64I: # %bb.0: # %entry ; RV64I-NEXT: addi a5, zero, 123 @@ -87,6 +137,18 @@ define i128 @cmovcc128(i64 signext %a, i128 %b, i128 %c) nounwind { ; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: mv a1, a2 ; RV64I-NEXT: ret +; +; RV64IBT-LABEL: cmovcc128: +; RV64IBT: # %bb.0: # %entry +; RV64IBT-NEXT: addi a5, zero, 123 +; RV64IBT-NEXT: beq a0, a5, .LBB1_2 +; RV64IBT-NEXT: # %bb.1: # %entry +; RV64IBT-NEXT: mv a1, a3 +; RV64IBT-NEXT: mv a2, a4 +; RV64IBT-NEXT: .LBB1_2: # %entry +; RV64IBT-NEXT: mv a0, a1 +; RV64IBT-NEXT: mv a1, a2 +; RV64IBT-NEXT: ret entry: %cmp = icmp eq i64 %a, 123 %cond = select i1 %cmp, i128 %b, i128 %c @@ -106,6 +168,18 @@ define i64 @cmov64(i1 %a, i64 %b, i64 %c) nounwind { ; RV32I-NEXT: mv a1, a2 ; RV32I-NEXT: ret ; +; RV32IBT-LABEL: cmov64: +; RV32IBT: # %bb.0: # %entry +; RV32IBT-NEXT: andi a5, a0, 1 +; RV32IBT-NEXT: mv a0, a1 +; RV32IBT-NEXT: bnez a5, .LBB2_2 +; RV32IBT-NEXT: # %bb.1: # %entry +; RV32IBT-NEXT: mv a0, a3 +; RV32IBT-NEXT: mv a2, a4 +; RV32IBT-NEXT: .LBB2_2: # %entry +; RV32IBT-NEXT: mv a1, a2 +; RV32IBT-NEXT: ret +; ; RV64I-LABEL: cmov64: ; RV64I: # %bb.0: # %entry ; RV64I-NEXT: andi a3, a0, 1 @@ -115,6 +189,16 @@ define i64 @cmov64(i1 %a, i64 %b, i64 %c) nounwind { ; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: .LBB2_2: # %entry ; RV64I-NEXT: ret +; +; RV64IBT-LABEL: cmov64: +; RV64IBT: # %bb.0: # %entry +; RV64IBT-NEXT: andi a3, a0, 1 +; RV64IBT-NEXT: mv a0, a1 +; RV64IBT-NEXT: bnez a3, .LBB2_2 +; RV64IBT-NEXT: # %bb.1: # %entry +; RV64IBT-NEXT: mv a0, a2 +; RV64IBT-NEXT: .LBB2_2: # %entry +; RV64IBT-NEXT: ret entry: %cond = select i1 %a, i64 %b, i64 %c ret i64 %cond @@ -159,6 +243,44 @@ define i128 @cmov128(i1 %a, i128 %b, i128 %c) nounwind { ; RV32I-NEXT: sw a6, 0(a0) ; RV32I-NEXT: ret ; +; RV32IBT-LABEL: cmov128: +; RV32IBT: # %bb.0: # %entry +; RV32IBT-NEXT: andi a1, a1, 1 +; RV32IBT-NEXT: mv a4, a2 +; RV32IBT-NEXT: bnez a1, .LBB3_2 +; RV32IBT-NEXT: # %bb.1: # %entry +; RV32IBT-NEXT: mv a4, a3 +; RV32IBT-NEXT: .LBB3_2: # %entry +; RV32IBT-NEXT: bnez a1, .LBB3_5 +; RV32IBT-NEXT: # %bb.3: # %entry +; RV32IBT-NEXT: addi a7, a3, 4 +; RV32IBT-NEXT: beqz a1, .LBB3_6 +; RV32IBT-NEXT: .LBB3_4: +; RV32IBT-NEXT: addi a5, a2, 8 +; RV32IBT-NEXT: j .LBB3_7 +; RV32IBT-NEXT: .LBB3_5: +; RV32IBT-NEXT: addi a7, a2, 4 +; RV32IBT-NEXT: bnez a1, .LBB3_4 +; RV32IBT-NEXT: .LBB3_6: # %entry +; RV32IBT-NEXT: addi a5, a3, 8 +; RV32IBT-NEXT: .LBB3_7: # %entry +; RV32IBT-NEXT: lw a6, 0(a4) +; RV32IBT-NEXT: lw a7, 0(a7) +; RV32IBT-NEXT: lw a4, 0(a5) +; RV32IBT-NEXT: bnez a1, .LBB3_9 +; RV32IBT-NEXT: # %bb.8: # %entry +; RV32IBT-NEXT: addi a1, a3, 12 +; RV32IBT-NEXT: j .LBB3_10 +; RV32IBT-NEXT: .LBB3_9: +; RV32IBT-NEXT: addi a1, a2, 12 +; RV32IBT-NEXT: .LBB3_10: # %entry +; RV32IBT-NEXT: lw a1, 0(a1) +; RV32IBT-NEXT: sw a1, 12(a0) +; RV32IBT-NEXT: sw a4, 8(a0) +; RV32IBT-NEXT: sw a7, 4(a0) +; RV32IBT-NEXT: sw a6, 0(a0) +; RV32IBT-NEXT: ret +; ; RV64I-LABEL: cmov128: ; RV64I: # %bb.0: # %entry ; RV64I-NEXT: andi a5, a0, 1 @@ -170,6 +292,18 @@ define i128 @cmov128(i1 %a, i128 %b, i128 %c) nounwind { ; RV64I-NEXT: .LBB3_2: # %entry ; RV64I-NEXT: mv a1, a2 ; RV64I-NEXT: ret +; +; RV64IBT-LABEL: cmov128: +; RV64IBT: # %bb.0: # %entry +; RV64IBT-NEXT: andi a5, a0, 1 +; RV64IBT-NEXT: mv a0, a1 +; RV64IBT-NEXT: bnez a5, .LBB3_2 +; RV64IBT-NEXT: # %bb.1: # %entry +; RV64IBT-NEXT: mv a0, a3 +; RV64IBT-NEXT: mv a2, a4 +; RV64IBT-NEXT: .LBB3_2: # %entry +; RV64IBT-NEXT: mv a1, a2 +; RV64IBT-NEXT: ret entry: %cond = select i1 %a, i128 %b, i128 %c ret i128 %cond @@ -192,6 +326,22 @@ define float @cmovfloat(i1 %a, float %b, float %c, float %d, float %e) nounwind ; RV32I-NEXT: fmv.x.w a0, ft0 ; RV32I-NEXT: ret ; +; RV32IBT-LABEL: cmovfloat: +; RV32IBT: # %bb.0: # %entry +; RV32IBT-NEXT: andi a0, a0, 1 +; RV32IBT-NEXT: bnez a0, .LBB4_2 +; RV32IBT-NEXT: # %bb.1: # %entry +; RV32IBT-NEXT: fmv.w.x ft0, a4 +; RV32IBT-NEXT: fmv.w.x ft1, a2 +; RV32IBT-NEXT: j .LBB4_3 +; RV32IBT-NEXT: .LBB4_2: +; RV32IBT-NEXT: fmv.w.x ft0, a3 +; RV32IBT-NEXT: fmv.w.x ft1, a1 +; RV32IBT-NEXT: .LBB4_3: # %entry +; RV32IBT-NEXT: fadd.s ft0, ft1, ft0 +; RV32IBT-NEXT: fmv.x.w a0, ft0 +; RV32IBT-NEXT: ret +; ; RV64I-LABEL: cmovfloat: ; RV64I: # %bb.0: # %entry ; RV64I-NEXT: andi a0, a0, 1 @@ -207,6 +357,22 @@ define float @cmovfloat(i1 %a, float %b, float %c, float %d, float %e) nounwind ; RV64I-NEXT: fadd.s ft0, ft1, ft0 ; RV64I-NEXT: fmv.x.w a0, ft0 ; RV64I-NEXT: ret +; +; RV64IBT-LABEL: cmovfloat: +; RV64IBT: # %bb.0: # %entry +; RV64IBT-NEXT: andi a0, a0, 1 +; RV64IBT-NEXT: bnez a0, .LBB4_2 +; RV64IBT-NEXT: # %bb.1: # %entry +; RV64IBT-NEXT: fmv.w.x ft0, a4 +; RV64IBT-NEXT: fmv.w.x ft1, a2 +; RV64IBT-NEXT: j .LBB4_3 +; RV64IBT-NEXT: .LBB4_2: +; RV64IBT-NEXT: fmv.w.x ft0, a3 +; RV64IBT-NEXT: fmv.w.x ft1, a1 +; RV64IBT-NEXT: .LBB4_3: # %entry +; RV64IBT-NEXT: fadd.s ft0, ft1, ft0 +; RV64IBT-NEXT: fmv.x.w a0, ft0 +; RV64IBT-NEXT: ret entry: %cond1 = select i1 %a, float %b, float %c %cond2 = select i1 %a, float %d, float %e @@ -234,6 +400,25 @@ define double @cmovdouble(i1 %a, double %b, double %c) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32IBT-LABEL: cmovdouble: +; RV32IBT: # %bb.0: # %entry +; RV32IBT-NEXT: addi sp, sp, -16 +; RV32IBT-NEXT: sw a3, 8(sp) +; RV32IBT-NEXT: sw a4, 12(sp) +; RV32IBT-NEXT: fld ft0, 8(sp) +; RV32IBT-NEXT: sw a1, 8(sp) +; RV32IBT-NEXT: andi a0, a0, 1 +; RV32IBT-NEXT: sw a2, 12(sp) +; RV32IBT-NEXT: beqz a0, .LBB5_2 +; RV32IBT-NEXT: # %bb.1: +; RV32IBT-NEXT: fld ft0, 8(sp) +; RV32IBT-NEXT: .LBB5_2: # %entry +; RV32IBT-NEXT: fsd ft0, 8(sp) +; RV32IBT-NEXT: lw a0, 8(sp) +; RV32IBT-NEXT: lw a1, 12(sp) +; RV32IBT-NEXT: addi sp, sp, 16 +; RV32IBT-NEXT: ret +; ; RV64I-LABEL: cmovdouble: ; RV64I: # %bb.0: # %entry ; RV64I-NEXT: andi a0, a0, 1 @@ -246,6 +431,19 @@ define double @cmovdouble(i1 %a, double %b, double %c) nounwind { ; RV64I-NEXT: fmv.d.x ft0, a1 ; RV64I-NEXT: fmv.x.d a0, ft0 ; RV64I-NEXT: ret +; +; RV64IBT-LABEL: cmovdouble: +; RV64IBT: # %bb.0: # %entry +; RV64IBT-NEXT: andi a0, a0, 1 +; RV64IBT-NEXT: bnez a0, .LBB5_2 +; RV64IBT-NEXT: # %bb.1: # %entry +; RV64IBT-NEXT: fmv.d.x ft0, a2 +; RV64IBT-NEXT: fmv.x.d a0, ft0 +; RV64IBT-NEXT: ret +; RV64IBT-NEXT: .LBB5_2: +; RV64IBT-NEXT: fmv.d.x ft0, a1 +; RV64IBT-NEXT: fmv.x.d a0, ft0 +; RV64IBT-NEXT: ret entry: %cond = select i1 %a, double %b, double %c ret double %cond @@ -274,6 +472,25 @@ define i32 @cmovccdep(i32 signext %a, i32 %b, i32 %c, i32 %d) nounwind { ; RV32I-NEXT: add a0, a1, a2 ; RV32I-NEXT: ret ; +; RV32IBT-LABEL: cmovccdep: +; RV32IBT: # %bb.0: # %entry +; RV32IBT-NEXT: addi a4, zero, 123 +; RV32IBT-NEXT: bne a0, a4, .LBB6_3 +; RV32IBT-NEXT: # %bb.1: # %entry +; RV32IBT-NEXT: mv a2, a1 +; RV32IBT-NEXT: bne a0, a4, .LBB6_4 +; RV32IBT-NEXT: .LBB6_2: # %entry +; RV32IBT-NEXT: add a0, a1, a2 +; RV32IBT-NEXT: ret +; RV32IBT-NEXT: .LBB6_3: # %entry +; RV32IBT-NEXT: mv a1, a2 +; RV32IBT-NEXT: mv a2, a1 +; RV32IBT-NEXT: beq a0, a4, .LBB6_2 +; RV32IBT-NEXT: .LBB6_4: # %entry +; RV32IBT-NEXT: mv a2, a3 +; RV32IBT-NEXT: add a0, a1, a2 +; RV32IBT-NEXT: ret +; ; RV64I-LABEL: cmovccdep: ; RV64I: # %bb.0: # %entry ; RV64I-NEXT: addi a4, zero, 123 @@ -292,6 +509,25 @@ define i32 @cmovccdep(i32 signext %a, i32 %b, i32 %c, i32 %d) nounwind { ; RV64I-NEXT: mv a2, a3 ; RV64I-NEXT: addw a0, a1, a2 ; RV64I-NEXT: ret +; +; RV64IBT-LABEL: cmovccdep: +; RV64IBT: # %bb.0: # %entry +; RV64IBT-NEXT: addi a4, zero, 123 +; RV64IBT-NEXT: bne a0, a4, .LBB6_3 +; RV64IBT-NEXT: # %bb.1: # %entry +; RV64IBT-NEXT: mv a2, a1 +; RV64IBT-NEXT: bne a0, a4, .LBB6_4 +; RV64IBT-NEXT: .LBB6_2: # %entry +; RV64IBT-NEXT: addw a0, a1, a2 +; RV64IBT-NEXT: ret +; RV64IBT-NEXT: .LBB6_3: # %entry +; RV64IBT-NEXT: mv a1, a2 +; RV64IBT-NEXT: mv a2, a1 +; RV64IBT-NEXT: beq a0, a4, .LBB6_2 +; RV64IBT-NEXT: .LBB6_4: # %entry +; RV64IBT-NEXT: mv a2, a3 +; RV64IBT-NEXT: addw a0, a1, a2 +; RV64IBT-NEXT: ret entry: %cmp = icmp eq i32 %a, 123 %cond1 = select i1 %cmp, i32 %b, i32 %c @@ -321,6 +557,24 @@ define i32 @cmovdiffcc(i1 %a, i1 %b, i32 %c, i32 %d, i32 %e, i32 %f) nounwind { ; RV32I-NEXT: add a0, a2, a4 ; RV32I-NEXT: ret ; +; RV32IBT-LABEL: cmovdiffcc: +; RV32IBT: # %bb.0: # %entry +; RV32IBT-NEXT: andi a0, a0, 1 +; RV32IBT-NEXT: andi a1, a1, 1 +; RV32IBT-NEXT: beqz a0, .LBB7_3 +; RV32IBT-NEXT: # %bb.1: # %entry +; RV32IBT-NEXT: beqz a1, .LBB7_4 +; RV32IBT-NEXT: .LBB7_2: # %entry +; RV32IBT-NEXT: add a0, a2, a4 +; RV32IBT-NEXT: ret +; RV32IBT-NEXT: .LBB7_3: # %entry +; RV32IBT-NEXT: mv a2, a3 +; RV32IBT-NEXT: bnez a1, .LBB7_2 +; RV32IBT-NEXT: .LBB7_4: # %entry +; RV32IBT-NEXT: mv a4, a5 +; RV32IBT-NEXT: add a0, a2, a4 +; RV32IBT-NEXT: ret +; ; RV64I-LABEL: cmovdiffcc: ; RV64I: # %bb.0: # %entry ; RV64I-NEXT: andi a0, a0, 1 @@ -338,6 +592,24 @@ define i32 @cmovdiffcc(i1 %a, i1 %b, i32 %c, i32 %d, i32 %e, i32 %f) nounwind { ; RV64I-NEXT: mv a4, a5 ; RV64I-NEXT: addw a0, a2, a4 ; RV64I-NEXT: ret +; +; RV64IBT-LABEL: cmovdiffcc: +; RV64IBT: # %bb.0: # %entry +; RV64IBT-NEXT: andi a0, a0, 1 +; RV64IBT-NEXT: andi a1, a1, 1 +; RV64IBT-NEXT: beqz a0, .LBB7_3 +; RV64IBT-NEXT: # %bb.1: # %entry +; RV64IBT-NEXT: beqz a1, .LBB7_4 +; RV64IBT-NEXT: .LBB7_2: # %entry +; RV64IBT-NEXT: addw a0, a2, a4 +; RV64IBT-NEXT: ret +; RV64IBT-NEXT: .LBB7_3: # %entry +; RV64IBT-NEXT: mv a2, a3 +; RV64IBT-NEXT: bnez a1, .LBB7_2 +; RV64IBT-NEXT: .LBB7_4: # %entry +; RV64IBT-NEXT: mv a4, a5 +; RV64IBT-NEXT: addw a0, a2, a4 +; RV64IBT-NEXT: ret entry: %cond1 = select i1 %a, i32 %c, i32 %d %cond2 = select i1 %b, i32 %e, i32 %f diff --git a/llvm/test/CodeGen/RISCV/select-optimize-multiple.mir b/llvm/test/CodeGen/RISCV/select-optimize-multiple.mir index d7575b8..069b854 100644 --- a/llvm/test/CodeGen/RISCV/select-optimize-multiple.mir +++ b/llvm/test/CodeGen/RISCV/select-optimize-multiple.mir @@ -1,8 +1,12 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=riscv32 -run-pass=finalize-isel -simplify-mir -o - %s \ # RUN: | FileCheck -check-prefix=RV32I %s +# RUN: llc -mtriple=riscv32 -mattr=+experimental-zbt -run-pass=finalize-isel -simplify-mir -o - %s \ +# RUN: | FileCheck -check-prefix=RV32IBT %s # RUN: llc -mtriple=riscv64 -run-pass=finalize-isel -simplify-mir -o - %s \ # RUN: | FileCheck -check-prefix=RV64I %s +# RUN: llc -mtriple=riscv64 -mattr=+experimental-zbt -run-pass=finalize-isel -simplify-mir -o - %s \ +# RUN: | FileCheck -check-prefix=RV64IBT %s # Provide dummy definitions of functions and just enough metadata to create a # DBG_VALUE. @@ -64,6 +68,28 @@ body: | ; RV32I: [[ADD:%[0-9]+]]:gpr = ADD [[PHI]], killed [[PHI1]] ; RV32I: $x10 = COPY [[ADD]] ; RV32I: PseudoRET implicit $x10 + ; RV32IBT-LABEL: name: cmov_interleaved_bad + ; RV32IBT: successors: %bb.1, %bb.2 + ; RV32IBT: liveins: $x10, $x11, $x12, $x13 + ; RV32IBT: [[COPY:%[0-9]+]]:gpr = COPY $x13 + ; RV32IBT: [[COPY1:%[0-9]+]]:gpr = COPY $x12 + ; RV32IBT: [[COPY2:%[0-9]+]]:gpr = COPY $x11 + ; RV32IBT: [[COPY3:%[0-9]+]]:gpr = COPY $x10 + ; RV32IBT: [[ANDI:%[0-9]+]]:gpr = ANDI [[COPY3]], 1 + ; RV32IBT: [[COPY4:%[0-9]+]]:gpr = COPY $x0 + ; RV32IBT: BNE [[ANDI]], [[COPY4]], %bb.2 + ; RV32IBT: .1: + ; RV32IBT: .2: + ; RV32IBT: successors: %bb.3, %bb.4 + ; RV32IBT: [[PHI:%[0-9]+]]:gpr = PHI [[COPY2]], %bb.0, [[COPY1]], %bb.1 + ; RV32IBT: [[ADDI:%[0-9]+]]:gpr = ADDI [[PHI]], 1 + ; RV32IBT: BNE [[ANDI]], [[COPY4]], %bb.4 + ; RV32IBT: .3: + ; RV32IBT: .4: + ; RV32IBT: [[PHI1:%[0-9]+]]:gpr = PHI [[COPY]], %bb.2, [[COPY1]], %bb.3 + ; RV32IBT: [[ADD:%[0-9]+]]:gpr = ADD [[PHI]], killed [[PHI1]] + ; RV32IBT: $x10 = COPY [[ADD]] + ; RV32IBT: PseudoRET implicit $x10 ; RV64I-LABEL: name: cmov_interleaved_bad ; RV64I: successors: %bb.1, %bb.2 ; RV64I: liveins: $x10, $x11, $x12, $x13 @@ -86,6 +112,28 @@ body: | ; RV64I: [[ADD:%[0-9]+]]:gpr = ADD [[PHI]], killed [[PHI1]] ; RV64I: $x10 = COPY [[ADD]] ; RV64I: PseudoRET implicit $x10 + ; RV64IBT-LABEL: name: cmov_interleaved_bad + ; RV64IBT: successors: %bb.1, %bb.2 + ; RV64IBT: liveins: $x10, $x11, $x12, $x13 + ; RV64IBT: [[COPY:%[0-9]+]]:gpr = COPY $x13 + ; RV64IBT: [[COPY1:%[0-9]+]]:gpr = COPY $x12 + ; RV64IBT: [[COPY2:%[0-9]+]]:gpr = COPY $x11 + ; RV64IBT: [[COPY3:%[0-9]+]]:gpr = COPY $x10 + ; RV64IBT: [[ANDI:%[0-9]+]]:gpr = ANDI [[COPY3]], 1 + ; RV64IBT: [[COPY4:%[0-9]+]]:gpr = COPY $x0 + ; RV64IBT: BNE [[ANDI]], [[COPY4]], %bb.2 + ; RV64IBT: .1: + ; RV64IBT: .2: + ; RV64IBT: successors: %bb.3, %bb.4 + ; RV64IBT: [[PHI:%[0-9]+]]:gpr = PHI [[COPY2]], %bb.0, [[COPY1]], %bb.1 + ; RV64IBT: [[ADDI:%[0-9]+]]:gpr = ADDI [[PHI]], 1 + ; RV64IBT: BNE [[ANDI]], [[COPY4]], %bb.4 + ; RV64IBT: .3: + ; RV64IBT: .4: + ; RV64IBT: [[PHI1:%[0-9]+]]:gpr = PHI [[COPY]], %bb.2, [[COPY1]], %bb.3 + ; RV64IBT: [[ADD:%[0-9]+]]:gpr = ADD [[PHI]], killed [[PHI1]] + ; RV64IBT: $x10 = COPY [[ADD]] + ; RV64IBT: PseudoRET implicit $x10 %3:gpr = COPY $x13 %2:gpr = COPY $x12 %1:gpr = COPY $x11 @@ -149,6 +197,27 @@ body: | ; RV32I: [[ADD:%[0-9]+]]:gpr = ADD [[PHI]], killed [[PHI1]] ; RV32I: $x10 = COPY [[ADD]] ; RV32I: PseudoRET implicit $x10 + ; RV32IBT-LABEL: name: cmov_interleaved_debug_value + ; RV32IBT: successors: %bb.1, %bb.2 + ; RV32IBT: liveins: $x10, $x11, $x12, $x13 + ; RV32IBT: [[COPY:%[0-9]+]]:gpr = COPY $x13 + ; RV32IBT: [[COPY1:%[0-9]+]]:gpr = COPY $x12 + ; RV32IBT: [[COPY2:%[0-9]+]]:gpr = COPY $x11 + ; RV32IBT: [[COPY3:%[0-9]+]]:gpr = COPY $x10 + ; RV32IBT: [[ANDI:%[0-9]+]]:gpr = ANDI [[COPY3]], 1 + ; RV32IBT: [[COPY4:%[0-9]+]]:gpr = COPY $x0 + ; RV32IBT: [[ADDI:%[0-9]+]]:gpr = ADDI [[COPY3]], 1 + ; RV32IBT: DBG_VALUE [[ADDI]], $noreg + ; RV32IBT: BNE [[ANDI]], [[COPY4]], %bb.2 + ; RV32IBT: .1: + ; RV32IBT: .2: + ; RV32IBT: [[PHI:%[0-9]+]]:gpr = PHI [[COPY2]], %bb.0, [[COPY1]], %bb.1 + ; RV32IBT: [[PHI1:%[0-9]+]]:gpr = PHI [[COPY]], %bb.0, [[COPY1]], %bb.1 + ; RV32IBT: DBG_VALUE [[PHI]], $noreg + ; RV32IBT: DBG_VALUE [[PHI1]], $noreg + ; RV32IBT: [[ADD:%[0-9]+]]:gpr = ADD [[PHI]], killed [[PHI1]] + ; RV32IBT: $x10 = COPY [[ADD]] + ; RV32IBT: PseudoRET implicit $x10 ; RV64I-LABEL: name: cmov_interleaved_debug_value ; RV64I: successors: %bb.1, %bb.2 ; RV64I: liveins: $x10, $x11, $x12, $x13 @@ -170,6 +239,27 @@ body: | ; RV64I: [[ADD:%[0-9]+]]:gpr = ADD [[PHI]], killed [[PHI1]] ; RV64I: $x10 = COPY [[ADD]] ; RV64I: PseudoRET implicit $x10 + ; RV64IBT-LABEL: name: cmov_interleaved_debug_value + ; RV64IBT: successors: %bb.1, %bb.2 + ; RV64IBT: liveins: $x10, $x11, $x12, $x13 + ; RV64IBT: [[COPY:%[0-9]+]]:gpr = COPY $x13 + ; RV64IBT: [[COPY1:%[0-9]+]]:gpr = COPY $x12 + ; RV64IBT: [[COPY2:%[0-9]+]]:gpr = COPY $x11 + ; RV64IBT: [[COPY3:%[0-9]+]]:gpr = COPY $x10 + ; RV64IBT: [[ANDI:%[0-9]+]]:gpr = ANDI [[COPY3]], 1 + ; RV64IBT: [[COPY4:%[0-9]+]]:gpr = COPY $x0 + ; RV64IBT: [[ADDI:%[0-9]+]]:gpr = ADDI [[COPY3]], 1 + ; RV64IBT: DBG_VALUE [[ADDI]], $noreg + ; RV64IBT: BNE [[ANDI]], [[COPY4]], %bb.2 + ; RV64IBT: .1: + ; RV64IBT: .2: + ; RV64IBT: [[PHI:%[0-9]+]]:gpr = PHI [[COPY2]], %bb.0, [[COPY1]], %bb.1 + ; RV64IBT: [[PHI1:%[0-9]+]]:gpr = PHI [[COPY]], %bb.0, [[COPY1]], %bb.1 + ; RV64IBT: DBG_VALUE [[PHI]], $noreg + ; RV64IBT: DBG_VALUE [[PHI1]], $noreg + ; RV64IBT: [[ADD:%[0-9]+]]:gpr = ADD [[PHI]], killed [[PHI1]] + ; RV64IBT: $x10 = COPY [[ADD]] + ; RV64IBT: PseudoRET implicit $x10 %3:gpr = COPY $x13 %2:gpr = COPY $x12 %1:gpr = COPY $x11 diff --git a/llvm/test/CodeGen/RISCV/select-or.ll b/llvm/test/CodeGen/RISCV/select-or.ll index 768eb7e..4ae6b77 100644 --- a/llvm/test/CodeGen/RISCV/select-or.ll +++ b/llvm/test/CodeGen/RISCV/select-or.ll @@ -1,8 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV32I %s +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zbt -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV32IBT %s ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV64I %s +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zbt -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64IBT %s ;; There are a few different ways to lower (select (or A, B), X, Y). This test ;; ensures that we do so with as few branches as possible. @@ -18,6 +22,16 @@ define signext i32 @select_of_or(i1 zeroext %a, i1 zeroext %b, i32 signext %c, i ; RV32I-NEXT: .LBB0_2: ; RV32I-NEXT: ret ; +; RV32IBT-LABEL: select_of_or: +; RV32IBT: # %bb.0: +; RV32IBT-NEXT: or a1, a0, a1 +; RV32IBT-NEXT: mv a0, a2 +; RV32IBT-NEXT: bnez a1, .LBB0_2 +; RV32IBT-NEXT: # %bb.1: +; RV32IBT-NEXT: mv a0, a3 +; RV32IBT-NEXT: .LBB0_2: +; RV32IBT-NEXT: ret +; ; RV64I-LABEL: select_of_or: ; RV64I: # %bb.0: ; RV64I-NEXT: or a1, a0, a1 @@ -27,6 +41,16 @@ define signext i32 @select_of_or(i1 zeroext %a, i1 zeroext %b, i32 signext %c, i ; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: .LBB0_2: ; RV64I-NEXT: ret +; +; RV64IBT-LABEL: select_of_or: +; RV64IBT: # %bb.0: +; RV64IBT-NEXT: or a1, a0, a1 +; RV64IBT-NEXT: mv a0, a2 +; RV64IBT-NEXT: bnez a1, .LBB0_2 +; RV64IBT-NEXT: # %bb.1: +; RV64IBT-NEXT: mv a0, a3 +; RV64IBT-NEXT: .LBB0_2: +; RV64IBT-NEXT: ret %1 = or i1 %a, %b %2 = select i1 %1, i32 %c, i32 %d ret i32 %2 @@ -53,6 +77,23 @@ define signext i32 @if_of_or(i1 zeroext %a, i1 zeroext %b) nounwind { ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; +; RV32IBT-LABEL: if_of_or: +; RV32IBT: # %bb.0: +; RV32IBT-NEXT: addi sp, sp, -16 +; RV32IBT-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IBT-NEXT: or a0, a0, a1 +; RV32IBT-NEXT: addi a1, zero, 1 +; RV32IBT-NEXT: bne a0, a1, .LBB1_2 +; RV32IBT-NEXT: # %bb.1: # %if.then +; RV32IBT-NEXT: call either@plt +; RV32IBT-NEXT: j .LBB1_3 +; RV32IBT-NEXT: .LBB1_2: # %if.else +; RV32IBT-NEXT: call neither@plt +; RV32IBT-NEXT: .LBB1_3: # %if.end +; RV32IBT-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IBT-NEXT: addi sp, sp, 16 +; RV32IBT-NEXT: ret +; ; RV64I-LABEL: if_of_or: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -69,6 +110,23 @@ define signext i32 @if_of_or(i1 zeroext %a, i1 zeroext %b) nounwind { ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; RV64IBT-LABEL: if_of_or: +; RV64IBT: # %bb.0: +; RV64IBT-NEXT: addi sp, sp, -16 +; RV64IBT-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IBT-NEXT: or a0, a0, a1 +; RV64IBT-NEXT: addi a1, zero, 1 +; RV64IBT-NEXT: bne a0, a1, .LBB1_2 +; RV64IBT-NEXT: # %bb.1: # %if.then +; RV64IBT-NEXT: call either@plt +; RV64IBT-NEXT: j .LBB1_3 +; RV64IBT-NEXT: .LBB1_2: # %if.else +; RV64IBT-NEXT: call neither@plt +; RV64IBT-NEXT: .LBB1_3: # %if.end +; RV64IBT-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IBT-NEXT: addi sp, sp, 16 +; RV64IBT-NEXT: ret %1 = or i1 %a, %b br i1 %1, label %if.then, label %if.else -- 2.7.4