[AArch64][SVE] Add extract_subvector patterns for unpacked fp16 and bfloat types.

author Sander de Smalen <sander.desmalen@arm.com>

Wed, 22 Sep 2021 13:25:14 +0000 (14:25 +0100)

committer Sander de Smalen <sander.desmalen@arm.com>

Wed, 22 Sep 2021 13:25:17 +0000 (14:25 +0100)
author Sander de Smalen <sander.desmalen@arm.com>
Wed, 22 Sep 2021 13:25:14 +0000 (14:25 +0100)
committer Sander de Smalen <sander.desmalen@arm.com>
Wed, 22 Sep 2021 13:25:17 +0000 (14:25 +0100)
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

index 7455b52..0911ecd 100644 (file)
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -1332,6 +1332,24 @@ let Predicates = [HasSVEorStreamingSVE] in {
    def : Pat<(nxv4bf16 (extract_subvector (nxv8bf16 ZPR:$Zs), (i64 4))),
              (UUNPKHI_ZZ_S ZPR:$Zs)>;
  
+  def : Pat<(nxv2f16 (extract_subvector (nxv8f16 ZPR:$Zs), (i64 0))),
+            (UUNPKLO_ZZ_S (UUNPKLO_ZZ_D ZPR:$Zs))>;
+  def : Pat<(nxv2f16 (extract_subvector (nxv8f16 ZPR:$Zs), (i64 2))),
+            (UUNPKHI_ZZ_S (UUNPKLO_ZZ_D ZPR:$Zs))>;
+  def : Pat<(nxv2f16 (extract_subvector (nxv8f16 ZPR:$Zs), (i64 4))),
+            (UUNPKLO_ZZ_S (UUNPKHI_ZZ_D ZPR:$Zs))>;
+  def : Pat<(nxv2f16 (extract_subvector (nxv8f16 ZPR:$Zs), (i64 6))),
+            (UUNPKHI_ZZ_S (UUNPKHI_ZZ_D ZPR:$Zs))>;
+
+  def : Pat<(nxv2bf16 (extract_subvector (nxv8bf16 ZPR:$Zs), (i64 0))),
+            (UUNPKLO_ZZ_S (UUNPKLO_ZZ_D ZPR:$Zs))>;
+  def : Pat<(nxv2bf16 (extract_subvector (nxv8bf16 ZPR:$Zs), (i64 2))),
+            (UUNPKHI_ZZ_S (UUNPKLO_ZZ_D ZPR:$Zs))>;
+  def : Pat<(nxv2bf16 (extract_subvector (nxv8bf16 ZPR:$Zs), (i64 4))),
+            (UUNPKLO_ZZ_S (UUNPKHI_ZZ_D ZPR:$Zs))>;
+  def : Pat<(nxv2bf16 (extract_subvector (nxv8bf16 ZPR:$Zs), (i64 6))),
+            (UUNPKHI_ZZ_S (UUNPKHI_ZZ_D ZPR:$Zs))>;
+
    // Concatenate two predicates.
    def : Pat<(nxv4i1 (concat_vectors nxv2i1:$p1, nxv2i1:$p2)),
              (UZP1_PPP_S $p1, $p2)>;
diff --git a/llvm/test/CodeGen/AArch64/sve-extract-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-vector.ll

index 45ccc89..a09ac04 100644 (file)
--- a/llvm/test/CodeGen/AArch64/sve-extract-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-extract-vector.ll
@@ -1,5 +1,5 @@
  ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s --check-prefixes=CHECK
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+bf16 < %s | FileCheck %s --check-prefixes=CHECK
  
  ; Should codegen to a nop, since idx is zero.
  define <2 x i64> @extract_v2i64_nxv2i64(<vscale x 2 x i64> %vec) nounwind {
@@ -631,6 +631,86 @@ define <vscale x 2 x i8> @extract_nxv2i8_nxv14i8_12(<vscale x 14 x i8> %vec) {
    ret <vscale x 2 x i8> %res
  }
  
+define <vscale x 2 x half> @extract_nxv2f16_nxv8f16_0(<vscale x 8 x half> %in) {
+; CHECK-LABEL: extract_nxv2f16_nxv8f16_0:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uunpklo z0.d, z0.s
+; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x half> @llvm.experimental.vector.extract.nxv2f16.nxv8f16(<vscale x 8 x half> %in, i64 0)
+  ret <vscale x 2 x half> %res
+}
+
+define <vscale x 2 x half> @extract_nxv2f16_nxv8f16_2(<vscale x 8 x half> %in) {
+; CHECK-LABEL: extract_nxv2f16_nxv8f16_2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uunpklo z0.d, z0.s
+; CHECK-NEXT:    uunpkhi z0.s, z0.h
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x half> @llvm.experimental.vector.extract.nxv2f16.nxv8f16(<vscale x 8 x half> %in, i64 2)
+  ret <vscale x 2 x half> %res
+}
+
+define <vscale x 2 x half> @extract_nxv2f16_nxv8f16_4(<vscale x 8 x half> %in) {
+; CHECK-LABEL: extract_nxv2f16_nxv8f16_4:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uunpkhi z0.d, z0.s
+; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x half> @llvm.experimental.vector.extract.nxv2f16.nxv8f16(<vscale x 8 x half> %in, i64 4)
+  ret <vscale x 2 x half> %res
+}
+
+define <vscale x 2 x half> @extract_nxv2f16_nxv8f16_6(<vscale x 8 x half> %in) {
+; CHECK-LABEL: extract_nxv2f16_nxv8f16_6:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uunpkhi z0.d, z0.s
+; CHECK-NEXT:    uunpkhi z0.s, z0.h
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x half> @llvm.experimental.vector.extract.nxv2f16.nxv8f16(<vscale x 8 x half> %in, i64 6)
+  ret <vscale x 2 x half> %res
+}
+
+define <vscale x 2 x bfloat> @extract_nxv2bf16_nxv8bf16_0(<vscale x 8 x bfloat> %in) {
+; CHECK-LABEL: extract_nxv2bf16_nxv8bf16_0:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uunpklo z0.d, z0.s
+; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x bfloat> @llvm.experimental.vector.extract.nxv2bf16.nxv8bf16(<vscale x 8 x bfloat> %in, i64 0)
+  ret <vscale x 2 x bfloat> %res
+}
+
+define <vscale x 2 x bfloat> @extract_nxv2bf16_nxv8bf16_2(<vscale x 8 x bfloat> %in) {
+; CHECK-LABEL: extract_nxv2bf16_nxv8bf16_2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uunpklo z0.d, z0.s
+; CHECK-NEXT:    uunpkhi z0.s, z0.h
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x bfloat> @llvm.experimental.vector.extract.nxv2bf16.nxv8bf16(<vscale x 8 x bfloat> %in, i64 2)
+  ret <vscale x 2 x bfloat> %res
+}
+
+define <vscale x 2 x bfloat> @extract_nxv2bf16_nxv8bf16_4(<vscale x 8 x bfloat> %in) {
+; CHECK-LABEL: extract_nxv2bf16_nxv8bf16_4:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uunpkhi z0.d, z0.s
+; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x bfloat> @llvm.experimental.vector.extract.nxv2bf16.nxv8bf16(<vscale x 8 x bfloat> %in, i64 4)
+  ret <vscale x 2 x bfloat> %res
+}
+
+define <vscale x 2 x bfloat> @extract_nxv2bf16_nxv8bf16_6(<vscale x 8 x bfloat> %in) {
+; CHECK-LABEL: extract_nxv2bf16_nxv8bf16_6:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uunpkhi z0.d, z0.s
+; CHECK-NEXT:    uunpkhi z0.s, z0.h
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x bfloat> @llvm.experimental.vector.extract.nxv2bf16.nxv8bf16(<vscale x 8 x bfloat> %in, i64 6)
+  ret <vscale x 2 x bfloat> %res
+}
+
  attributes #0 = { vscale_range(2,2) }
  
  declare <2 x i64> @llvm.experimental.vector.extract.v2i64.nxv2i64(<vscale x 2 x i64>, i64)
@@ -649,3 +729,6 @@ declare <vscale x 4 x i1> @llvm.experimental.vector.extract.nxv4i1.nxv16i1(<vsca
  
  declare <vscale x 2 x i8> @llvm.experimental.vector.extract.nxv2i8.nxv32i8(<vscale x 32 x i8> , i64)
  declare <vscale x 2 x i8> @llvm.experimental.vector.extract.nxv2i8.nxv14i8(<vscale x 14 x i8> , i64)
+
+declare <vscale x 2 x half> @llvm.experimental.vector.extract.nxv2f16.nxv8f16(<vscale x 8 x half>, i64)
+declare <vscale x 2 x bfloat> @llvm.experimental.vector.extract.nxv2bf16.nxv8bf16(<vscale x 8 x bfloat>, i64)
author	Sander de Smalen <sander.desmalen@arm.com>
	Wed, 22 Sep 2021 13:25:14 +0000 (14:25 +0100)
committer	Sander de Smalen <sander.desmalen@arm.com>
	Wed, 22 Sep 2021 13:25:17 +0000 (14:25 +0100)
llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td		patch \| blob \| history
llvm/test/CodeGen/AArch64/sve-extract-vector.ll		patch \| blob \| history