From: Nicholas Guy Date: Mon, 16 Aug 2021 13:10:21 +0000 (+0100) Subject: [AArch64] Generate SMOV in place of sext(fmov(...)) X-Git-Tag: upstream/15.0.7~33056 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=36fcf47fc80dfdd50243f1d5a8871282aa5c4ffa;p=platform%2Fupstream%2Fllvm.git [AArch64] Generate SMOV in place of sext(fmov(...)) A single smov instruction is capable of moving from a vector register while performing the sign-extend during said move, rather than each step being performed by separate instructions. Differential Revision: https://reviews.llvm.org/D108633 --- diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 96ad071..1d8e3f9 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -2459,6 +2459,19 @@ let Predicates = [HasSVEorStreamingSVE] in { (i64 (UMOVvi64 (v2i64 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexD:$index))>; } + def : Pat<(sext_inreg (vector_extract (nxv16i8 ZPR:$vec), VectorIndexB:$index), i8), + (i32 (SMOVvi8to32 (v16i8 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexB:$index))>; + def : Pat<(sext_inreg (anyext (vector_extract (nxv16i8 ZPR:$vec), VectorIndexB:$index)), i8), + (i64 (SMOVvi8to64 (v16i8 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexB:$index))>; + + def : Pat<(sext_inreg (vector_extract (nxv8i16 ZPR:$vec), VectorIndexH:$index), i16), + (i32 (SMOVvi16to32 (v8i16 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexH:$index))>; + def : Pat<(sext_inreg (anyext (vector_extract (nxv8i16 ZPR:$vec), VectorIndexH:$index)), i16), + (i64 (SMOVvi16to64 (v8i16 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexH:$index))>; + + def : Pat<(sext (vector_extract (nxv4i32 ZPR:$vec), VectorIndexS:$index)), + (i64 (SMOVvi32to64 (v4i32 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexS:$index))>; + // Extract first element from vector. let AddedComplexity = 2 in { def : Pat<(vector_extract (nxv16i8 ZPR:$Zs), (i64 0)), @@ -2492,6 +2505,7 @@ let Predicates = [HasSVEorStreamingSVE] in { (EXT_ZZI ZPR:$Z1, ZPR:$Z2, sve_ext_imm_0_3:$index)>; def : Pat<(nxv2i64 (vector_splice (nxv2i64 ZPR:$Z1), (nxv2i64 ZPR:$Z2), (i64 (sve_ext_imm_0_1 i32:$index)))), (EXT_ZZI ZPR:$Z1, ZPR:$Z2, sve_ext_imm_0_1:$index)>; + } // End HasSVEorStreamingSVE let Predicates = [HasSVE, HasMatMulInt8] in { diff --git a/llvm/test/CodeGen/AArch64/aarch64-smov-gen.ll b/llvm/test/CodeGen/AArch64/aarch64-smov-gen.ll new file mode 100644 index 0000000..ee27a1b --- /dev/null +++ b/llvm/test/CodeGen/AArch64/aarch64-smov-gen.ll @@ -0,0 +1,128 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +define i32 @i8_i32( %a) #0 { +; CHECK-LABEL: i8_i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: smov w0, v0.b[15] +; CHECK-NEXT: ret +entry: + %elt = extractelement %a, i32 15 + %conv = sext i8 %elt to i32 + ret i32 %conv +} + +define i64 @i8_i64( %a) #0 { +; CHECK-LABEL: i8_i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: smov x0, v0.b[15] +; CHECK-NEXT: ret +entry: + %elt = extractelement %a, i32 15 + %conv = sext i8 %elt to i64 + ret i64 %conv +} + +define i32 @i16_i32( %a) #0 { +; CHECK-LABEL: i16_i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: smov w0, v0.h[7] +; CHECK-NEXT: ret +entry: + %elt = extractelement %a, i32 7 + %conv = sext i16 %elt to i32 + ret i32 %conv +} + +define i64 @i16_i64( %a) #0 { +; CHECK-LABEL: i16_i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: smov x0, v0.h[7] +; CHECK-NEXT: ret +entry: + %elt = extractelement %a, i32 7 + %conv = sext i16 %elt to i64 + ret i64 %conv +} + +define i64 @i32_i64( %a) #0 { +; CHECK-LABEL: i32_i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: smov x0, v0.s[3] +; CHECK-NEXT: ret +entry: + %elt = extractelement %a, i32 3 + %conv = sext i32 %elt to i64 + ret i64 %conv +} + +; NOTE: Testing out-of-range indices + +define i32 @i8_i32_oor( %a) #0 { +; CHECK-LABEL: i8_i32_oor: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov z0.b, z0.b[16] +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: sxtb w0, w8 +; CHECK-NEXT: ret +entry: + %elt = extractelement %a, i32 16 + %conv = sext i8 %elt to i32 + ret i32 %conv +} + +define i64 @i8_i64_oor( %a) #0 { +; CHECK-LABEL: i8_i64_oor: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov z0.b, z0.b[16] +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: sxtb x0, w8 +; CHECK-NEXT: ret +entry: + %elt = extractelement %a, i32 16 + %conv = sext i8 %elt to i64 + ret i64 %conv +} + +define i32 @i16_i32_oor( %a) #0 { +; CHECK-LABEL: i16_i32_oor: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov z0.h, z0.h[8] +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: sxth w0, w8 +; CHECK-NEXT: ret +entry: + %elt = extractelement %a, i32 8 + %conv = sext i16 %elt to i32 + ret i32 %conv +} + +define i64 @i16_i64_oor( %a) #0 { +; CHECK-LABEL: i16_i64_oor: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov z0.h, z0.h[8] +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: sxth x0, w8 +; CHECK-NEXT: ret +entry: + %elt = extractelement %a, i32 8 + %conv = sext i16 %elt to i64 + ret i64 %conv +} + +define i64 @i32_i64_oor( %a) #0 { +; CHECK-LABEL: i32_i64_oor: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov z0.s, z0.s[4] +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: sxtw x0, w8 +; CHECK-NEXT: ret +entry: + %elt = extractelement %a, i32 4 + %conv = sext i32 %elt to i64 + ret i64 %conv +} + +attributes #0 = { "target-features"="+sve" }