From eb67bd8d745ef6541605ff2558c18dd551f59ef1 Mon Sep 17 00:00:00 2001 From: Juergen Ributzka Date: Tue, 25 Nov 2014 04:16:15 +0000 Subject: [PATCH] [FastISel][AArch64] Fix and extend the tbz/tbnz pattern matching. The pattern matching failed to recognize all instances of "-1", because when comparing against "-1" we didn't use an APInt of the same bitwidth. This commit fixes this and also adds inverse versions of the conditon to catch more cases. llvm-svn: 222722 --- llvm/lib/Target/AArch64/AArch64FastISel.cpp | 39 +++---- llvm/test/CodeGen/AArch64/fast-isel-tbz.ll | 158 +++++++++++++++++++++++++++- 2 files changed, 176 insertions(+), 21 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp index 612cb00..fb0326b 100644 --- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp +++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp @@ -2112,15 +2112,15 @@ bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) { int TestBit = -1; bool IsCmpNE; - if ((Predicate == CmpInst::ICMP_EQ) || (Predicate == CmpInst::ICMP_NE)) { - if (const auto *C = dyn_cast(LHS)) - if (C->isNullValue()) - std::swap(LHS, RHS); - - if (!isa(RHS)) - return false; + switch (Predicate) { + default: + return false; + case CmpInst::ICMP_EQ: + case CmpInst::ICMP_NE: + if (isa(LHS) && cast(LHS)->isNullValue()) + std::swap(LHS, RHS); - if (!cast(RHS)->isNullValue()) + if (!isa(RHS) || !cast(RHS)->isNullValue()) return false; if (const auto *AI = dyn_cast(LHS)) @@ -2143,26 +2143,27 @@ bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) { TestBit = 0; IsCmpNE = Predicate == CmpInst::ICMP_NE; - } else if (Predicate == CmpInst::ICMP_SLT) { - if (!isa(RHS)) - return false; - - if (!cast(RHS)->isNullValue()) + break; + case CmpInst::ICMP_SLT: + case CmpInst::ICMP_SGE: + if (!isa(RHS) || !cast(RHS)->isNullValue()) return false; TestBit = BW - 1; - IsCmpNE = true; - } else if (Predicate == CmpInst::ICMP_SGT) { + IsCmpNE = Predicate == CmpInst::ICMP_SLT; + break; + case CmpInst::ICMP_SGT: + case CmpInst::ICMP_SLE: if (!isa(RHS)) return false; - if (cast(RHS)->getValue() != -1) + if (cast(RHS)->getValue() != APInt(BW, -1, true)) return false; TestBit = BW - 1; - IsCmpNE = false; - } else - return false; + IsCmpNE = Predicate == CmpInst::ICMP_SLE; + break; + } // end switch static const unsigned OpcTable[2][2][2] = { { {AArch64::CBZW, AArch64::CBZX }, diff --git a/llvm/test/CodeGen/AArch64/fast-isel-tbz.ll b/llvm/test/CodeGen/AArch64/fast-isel-tbz.ll index d7f46b2..67e81b4 100644 --- a/llvm/test/CodeGen/AArch64/fast-isel-tbz.ll +++ b/llvm/test/CodeGen/AArch64/fast-isel-tbz.ll @@ -1,5 +1,5 @@ -; RUN: llc -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs -mtriple=aarch64-apple-darwin < %s | FileCheck %s -; RUN: llc -fast-isel -fast-isel-abort -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs -mtriple=aarch64-apple-darwin < %s | FileCheck %s +; RUN: llc -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs -mtriple=aarch64-apple-darwin < %s | FileCheck --check-prefix=CHECK %s +; RUN: llc -fast-isel -fast-isel-abort -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs -mtriple=aarch64-apple-darwin < %s | FileCheck --check-prefix=CHECK --check-prefix=FAST %s define i32 @icmp_eq_i8(i8 zeroext %a) { ; CHECK-LABEL: icmp_eq_i8 @@ -121,6 +121,160 @@ bb2: ret i32 0 } +define i32 @icmp_slt_i8(i8 zeroext %a) { +; FAST-LABEL: icmp_slt_i8 +; FAST: tbnz w0, #7, {{LBB.+_2}} + %1 = icmp slt i8 %a, 0 + br i1 %1, label %bb1, label %bb2, !prof !0 +bb1: + ret i32 1 +bb2: + ret i32 0 +} + +define i32 @icmp_slt_i16(i16 zeroext %a) { +; FAST-LABEL: icmp_slt_i16 +; FAST: tbnz w0, #15, {{LBB.+_2}} + %1 = icmp slt i16 %a, 0 + br i1 %1, label %bb1, label %bb2, !prof !0 +bb1: + ret i32 1 +bb2: + ret i32 0 +} + +define i32 @icmp_slt_i32(i32 %a) { +; CHECK-LABEL: icmp_slt_i32 +; CHECK: tbnz w0, #31, {{LBB.+_2}} + %1 = icmp slt i32 %a, 0 + br i1 %1, label %bb1, label %bb2, !prof !0 +bb1: + ret i32 1 +bb2: + ret i32 0 +} + +define i32 @icmp_slt_i64(i64 %a) { +; CHECK-LABEL: icmp_slt_i64 +; CHECK: tbnz x0, #63, {{LBB.+_2}} + %1 = icmp slt i64 %a, 0 + br i1 %1, label %bb1, label %bb2, !prof !0 +bb1: + ret i32 1 +bb2: + ret i32 0 +} + +define i32 @icmp_sge_i8(i8 zeroext %a) { +; FAST-LABEL: icmp_sge_i8 +; FAST: tbz w0, #7, {{LBB.+_2}} + %1 = icmp sge i8 %a, 0 + br i1 %1, label %bb1, label %bb2, !prof !0 +bb1: + ret i32 1 +bb2: + ret i32 0 +} + +define i32 @icmp_sge_i16(i16 zeroext %a) { +; FAST-LABEL: icmp_sge_i16 +; FAST: tbz w0, #15, {{LBB.+_2}} + %1 = icmp sge i16 %a, 0 + br i1 %1, label %bb1, label %bb2, !prof !0 +bb1: + ret i32 1 +bb2: + ret i32 0 +} + +define i32 @icmp_sle_i8(i8 zeroext %a) { +; FAST-LABEL: icmp_sle_i8 +; FAST: tbnz w0, #7, {{LBB.+_2}} + %1 = icmp sle i8 %a, -1 + br i1 %1, label %bb1, label %bb2, !prof !0 +bb1: + ret i32 1 +bb2: + ret i32 0 +} + +define i32 @icmp_sle_i16(i16 zeroext %a) { +; FAST-LABEL: icmp_sle_i16 +; FAST: tbnz w0, #15, {{LBB.+_2}} + %1 = icmp sle i16 %a, -1 + br i1 %1, label %bb1, label %bb2, !prof !0 +bb1: + ret i32 1 +bb2: + ret i32 0 +} + +define i32 @icmp_sle_i32(i32 %a) { +; CHECK-LABEL: icmp_sle_i32 +; CHECK: tbnz w0, #31, {{LBB.+_2}} + %1 = icmp sle i32 %a, -1 + br i1 %1, label %bb1, label %bb2, !prof !0 +bb1: + ret i32 1 +bb2: + ret i32 0 +} + +define i32 @icmp_sle_i64(i64 %a) { +; CHECK-LABEL: icmp_sle_i64 +; CHECK: tbnz x0, #63, {{LBB.+_2}} + %1 = icmp sle i64 %a, -1 + br i1 %1, label %bb1, label %bb2, !prof !0 +bb1: + ret i32 1 +bb2: + ret i32 0 +} + +define i32 @icmp_sgt_i8(i8 zeroext %a) { +; FAST-LABEL: icmp_sgt_i8 +; FAST: tbz w0, #7, {{LBB.+_2}} + %1 = icmp sgt i8 %a, -1 + br i1 %1, label %bb1, label %bb2, !prof !0 +bb1: + ret i32 1 +bb2: + ret i32 0 +} + +define i32 @icmp_sgt_i16(i16 zeroext %a) { +; FAST-LABEL: icmp_sgt_i16 +; FAST: tbz w0, #15, {{LBB.+_2}} + %1 = icmp sgt i16 %a, -1 + br i1 %1, label %bb1, label %bb2, !prof !0 +bb1: + ret i32 1 +bb2: + ret i32 0 +} + +define i32 @icmp_sgt_i32(i32 %a) { +; CHECK-LABEL: icmp_sgt_i32 +; CHECK: tbz w0, #31, {{LBB.+_2}} + %1 = icmp sgt i32 %a, -1 + br i1 %1, label %bb1, label %bb2, !prof !0 +bb1: + ret i32 1 +bb2: + ret i32 0 +} + +define i32 @icmp_sgt_i64(i64 %a) { +; FAST-LABEL: icmp_sgt_i64 +; FAST: tbz x0, #63, {{LBB.+_2}} + %1 = icmp sgt i64 %a, -1 + br i1 %1, label %bb1, label %bb2, !prof !0 +bb1: + ret i32 1 +bb2: + ret i32 0 +} + ; Test that we don't fold the 'and' instruction into the compare. define i32 @icmp_eq_and_i32(i32 %a, i1 %c) { ; CHECK-LABEL: icmp_eq_and_i32 -- 2.7.4