From 6375ca40590b9fea49a9d7d20f80d3da6af381f1 Mon Sep 17 00:00:00 2001 From: Sander de Smalen Date: Wed, 22 Sep 2021 14:25:14 +0100 Subject: [PATCH] [AArch64][SVE] Add extract_subvector patterns for unpacked fp16 and bfloat types. Reviewed By: david-arm Differential Revision: https://reviews.llvm.org/D110163 --- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td | 18 ++++++ llvm/test/CodeGen/AArch64/sve-extract-vector.ll | 85 ++++++++++++++++++++++++- 2 files changed, 102 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 7455b52..0911ecd 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1332,6 +1332,24 @@ let Predicates = [HasSVEorStreamingSVE] in { def : Pat<(nxv4bf16 (extract_subvector (nxv8bf16 ZPR:$Zs), (i64 4))), (UUNPKHI_ZZ_S ZPR:$Zs)>; + def : Pat<(nxv2f16 (extract_subvector (nxv8f16 ZPR:$Zs), (i64 0))), + (UUNPKLO_ZZ_S (UUNPKLO_ZZ_D ZPR:$Zs))>; + def : Pat<(nxv2f16 (extract_subvector (nxv8f16 ZPR:$Zs), (i64 2))), + (UUNPKHI_ZZ_S (UUNPKLO_ZZ_D ZPR:$Zs))>; + def : Pat<(nxv2f16 (extract_subvector (nxv8f16 ZPR:$Zs), (i64 4))), + (UUNPKLO_ZZ_S (UUNPKHI_ZZ_D ZPR:$Zs))>; + def : Pat<(nxv2f16 (extract_subvector (nxv8f16 ZPR:$Zs), (i64 6))), + (UUNPKHI_ZZ_S (UUNPKHI_ZZ_D ZPR:$Zs))>; + + def : Pat<(nxv2bf16 (extract_subvector (nxv8bf16 ZPR:$Zs), (i64 0))), + (UUNPKLO_ZZ_S (UUNPKLO_ZZ_D ZPR:$Zs))>; + def : Pat<(nxv2bf16 (extract_subvector (nxv8bf16 ZPR:$Zs), (i64 2))), + (UUNPKHI_ZZ_S (UUNPKLO_ZZ_D ZPR:$Zs))>; + def : Pat<(nxv2bf16 (extract_subvector (nxv8bf16 ZPR:$Zs), (i64 4))), + (UUNPKLO_ZZ_S (UUNPKHI_ZZ_D ZPR:$Zs))>; + def : Pat<(nxv2bf16 (extract_subvector (nxv8bf16 ZPR:$Zs), (i64 6))), + (UUNPKHI_ZZ_S (UUNPKHI_ZZ_D ZPR:$Zs))>; + // Concatenate two predicates. def : Pat<(nxv4i1 (concat_vectors nxv2i1:$p1, nxv2i1:$p2)), (UZP1_PPP_S $p1, $p2)>; diff --git a/llvm/test/CodeGen/AArch64/sve-extract-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-vector.ll index 45ccc89..a09ac04 100644 --- a/llvm/test/CodeGen/AArch64/sve-extract-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-extract-vector.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+bf16 < %s | FileCheck %s --check-prefixes=CHECK ; Should codegen to a nop, since idx is zero. define <2 x i64> @extract_v2i64_nxv2i64( %vec) nounwind { @@ -631,6 +631,86 @@ define @extract_nxv2i8_nxv14i8_12( %vec) { ret %res } +define @extract_nxv2f16_nxv8f16_0( %in) { +; CHECK-LABEL: extract_nxv2f16_nxv8f16_0: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpklo z0.d, z0.s +; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.extract.nxv2f16.nxv8f16( %in, i64 0) + ret %res +} + +define @extract_nxv2f16_nxv8f16_2( %in) { +; CHECK-LABEL: extract_nxv2f16_nxv8f16_2: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpklo z0.d, z0.s +; CHECK-NEXT: uunpkhi z0.s, z0.h +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.extract.nxv2f16.nxv8f16( %in, i64 2) + ret %res +} + +define @extract_nxv2f16_nxv8f16_4( %in) { +; CHECK-LABEL: extract_nxv2f16_nxv8f16_4: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpkhi z0.d, z0.s +; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.extract.nxv2f16.nxv8f16( %in, i64 4) + ret %res +} + +define @extract_nxv2f16_nxv8f16_6( %in) { +; CHECK-LABEL: extract_nxv2f16_nxv8f16_6: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpkhi z0.d, z0.s +; CHECK-NEXT: uunpkhi z0.s, z0.h +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.extract.nxv2f16.nxv8f16( %in, i64 6) + ret %res +} + +define @extract_nxv2bf16_nxv8bf16_0( %in) { +; CHECK-LABEL: extract_nxv2bf16_nxv8bf16_0: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpklo z0.d, z0.s +; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.extract.nxv2bf16.nxv8bf16( %in, i64 0) + ret %res +} + +define @extract_nxv2bf16_nxv8bf16_2( %in) { +; CHECK-LABEL: extract_nxv2bf16_nxv8bf16_2: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpklo z0.d, z0.s +; CHECK-NEXT: uunpkhi z0.s, z0.h +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.extract.nxv2bf16.nxv8bf16( %in, i64 2) + ret %res +} + +define @extract_nxv2bf16_nxv8bf16_4( %in) { +; CHECK-LABEL: extract_nxv2bf16_nxv8bf16_4: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpkhi z0.d, z0.s +; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.extract.nxv2bf16.nxv8bf16( %in, i64 4) + ret %res +} + +define @extract_nxv2bf16_nxv8bf16_6( %in) { +; CHECK-LABEL: extract_nxv2bf16_nxv8bf16_6: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpkhi z0.d, z0.s +; CHECK-NEXT: uunpkhi z0.s, z0.h +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.extract.nxv2bf16.nxv8bf16( %in, i64 6) + ret %res +} + attributes #0 = { vscale_range(2,2) } declare <2 x i64> @llvm.experimental.vector.extract.v2i64.nxv2i64(, i64) @@ -649,3 +729,6 @@ declare @llvm.experimental.vector.extract.nxv4i1.nxv16i1( @llvm.experimental.vector.extract.nxv2i8.nxv32i8( , i64) declare @llvm.experimental.vector.extract.nxv2i8.nxv14i8( , i64) + +declare @llvm.experimental.vector.extract.nxv2f16.nxv8f16(, i64) +declare @llvm.experimental.vector.extract.nxv2bf16.nxv8bf16(, i64) -- 2.7.4