From 87bcbd61b5e60ed1f465cc276029be9f667162ba Mon Sep 17 00:00:00 2001 From: Sander de Smalen Date: Tue, 28 Sep 2021 19:44:10 +0100 Subject: [PATCH] [AArch64][SVE] Fix extract_subvector patterns for unpacked fp types. The patterns added in D110163 were incorrect, since it used the wrong element widths for its shuffles. Example for nxv2f16 extract_subvector(nxv8f16 %in, 6): ^^^ extract g and h. => UUNPKHI .h -> .s results in: => UUNPKHI .s -> .d results in: Reviewed By: david-arm Differential Revision: https://reviews.llvm.org/D110523 --- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td | 16 ++++---- .../CodeGen/AArch64/sve-extract-scalable-vector.ll | 44 +++++++++++----------- 2 files changed, 30 insertions(+), 30 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 0911ecd..6199d4e 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1333,22 +1333,22 @@ let Predicates = [HasSVEorStreamingSVE] in { (UUNPKHI_ZZ_S ZPR:$Zs)>; def : Pat<(nxv2f16 (extract_subvector (nxv8f16 ZPR:$Zs), (i64 0))), - (UUNPKLO_ZZ_S (UUNPKLO_ZZ_D ZPR:$Zs))>; + (UUNPKLO_ZZ_D (UUNPKLO_ZZ_S ZPR:$Zs))>; def : Pat<(nxv2f16 (extract_subvector (nxv8f16 ZPR:$Zs), (i64 2))), - (UUNPKHI_ZZ_S (UUNPKLO_ZZ_D ZPR:$Zs))>; + (UUNPKHI_ZZ_D (UUNPKLO_ZZ_S ZPR:$Zs))>; def : Pat<(nxv2f16 (extract_subvector (nxv8f16 ZPR:$Zs), (i64 4))), - (UUNPKLO_ZZ_S (UUNPKHI_ZZ_D ZPR:$Zs))>; + (UUNPKLO_ZZ_D (UUNPKHI_ZZ_S ZPR:$Zs))>; def : Pat<(nxv2f16 (extract_subvector (nxv8f16 ZPR:$Zs), (i64 6))), - (UUNPKHI_ZZ_S (UUNPKHI_ZZ_D ZPR:$Zs))>; + (UUNPKHI_ZZ_D (UUNPKHI_ZZ_S ZPR:$Zs))>; def : Pat<(nxv2bf16 (extract_subvector (nxv8bf16 ZPR:$Zs), (i64 0))), - (UUNPKLO_ZZ_S (UUNPKLO_ZZ_D ZPR:$Zs))>; + (UUNPKLO_ZZ_D (UUNPKLO_ZZ_S ZPR:$Zs))>; def : Pat<(nxv2bf16 (extract_subvector (nxv8bf16 ZPR:$Zs), (i64 2))), - (UUNPKHI_ZZ_S (UUNPKLO_ZZ_D ZPR:$Zs))>; + (UUNPKHI_ZZ_D (UUNPKLO_ZZ_S ZPR:$Zs))>; def : Pat<(nxv2bf16 (extract_subvector (nxv8bf16 ZPR:$Zs), (i64 4))), - (UUNPKLO_ZZ_S (UUNPKHI_ZZ_D ZPR:$Zs))>; + (UUNPKLO_ZZ_D (UUNPKHI_ZZ_S ZPR:$Zs))>; def : Pat<(nxv2bf16 (extract_subvector (nxv8bf16 ZPR:$Zs), (i64 6))), - (UUNPKHI_ZZ_S (UUNPKHI_ZZ_D ZPR:$Zs))>; + (UUNPKHI_ZZ_D (UUNPKHI_ZZ_S ZPR:$Zs))>; // Concatenate two predicates. def : Pat<(nxv4i1 (concat_vectors nxv2i1:$p1, nxv2i1:$p2)), diff --git a/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll index 5f59364..4367db6 100644 --- a/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll @@ -502,8 +502,8 @@ declare @llvm.experimental.vector.extract.nxv4i8.nxv16i8( @extract_nxv2f16_nxv6f16_0( %in) { ; CHECK-LABEL: extract_nxv2f16_nxv6f16_0: ; CHECK: // %bb.0: -; CHECK-NEXT: uunpklo z0.d, z0.s ; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: uunpklo z0.d, z0.s ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.extract.nxv2f16.nxv6f16( %in, i64 0) ret %res @@ -512,8 +512,8 @@ define @extract_nxv2f16_nxv6f16_0( %in) { define @extract_nxv2f16_nxv6f16_2( %in) { ; CHECK-LABEL: extract_nxv2f16_nxv6f16_2: ; CHECK: // %bb.0: -; CHECK-NEXT: uunpklo z0.d, z0.s -; CHECK-NEXT: uunpkhi z0.s, z0.h +; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: uunpkhi z0.d, z0.s ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.extract.nxv2f16.nxv6f16( %in, i64 2) ret %res @@ -522,8 +522,8 @@ define @extract_nxv2f16_nxv6f16_2( %in) { define @extract_nxv2f16_nxv6f16_4( %in) { ; CHECK-LABEL: extract_nxv2f16_nxv6f16_4: ; CHECK: // %bb.0: -; CHECK-NEXT: uunpkhi z0.d, z0.s -; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: uunpkhi z0.s, z0.h +; CHECK-NEXT: uunpklo z0.d, z0.s ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.extract.nxv2f16.nxv6f16( %in, i64 4) ret %res @@ -601,8 +601,8 @@ declare @llvm.experimental.vector.extract.nxv4f16.nxv16f16(< define @extract_nxv2f16_nxv8f16_0( %in) { ; CHECK-LABEL: extract_nxv2f16_nxv8f16_0: ; CHECK: // %bb.0: -; CHECK-NEXT: uunpklo z0.d, z0.s ; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: uunpklo z0.d, z0.s ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.extract.nxv2f16.nxv8f16( %in, i64 0) ret %res @@ -611,8 +611,8 @@ define @extract_nxv2f16_nxv8f16_0( %in) { define @extract_nxv2f16_nxv8f16_2( %in) { ; CHECK-LABEL: extract_nxv2f16_nxv8f16_2: ; CHECK: // %bb.0: -; CHECK-NEXT: uunpklo z0.d, z0.s -; CHECK-NEXT: uunpkhi z0.s, z0.h +; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: uunpkhi z0.d, z0.s ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.extract.nxv2f16.nxv8f16( %in, i64 2) ret %res @@ -621,8 +621,8 @@ define @extract_nxv2f16_nxv8f16_2( %in) { define @extract_nxv2f16_nxv8f16_4( %in) { ; CHECK-LABEL: extract_nxv2f16_nxv8f16_4: ; CHECK: // %bb.0: -; CHECK-NEXT: uunpkhi z0.d, z0.s -; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: uunpkhi z0.s, z0.h +; CHECK-NEXT: uunpklo z0.d, z0.s ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.extract.nxv2f16.nxv8f16( %in, i64 4) ret %res @@ -631,8 +631,8 @@ define @extract_nxv2f16_nxv8f16_4( %in) { define @extract_nxv2f16_nxv8f16_6( %in) { ; CHECK-LABEL: extract_nxv2f16_nxv8f16_6: ; CHECK: // %bb.0: -; CHECK-NEXT: uunpkhi z0.d, z0.s ; CHECK-NEXT: uunpkhi z0.s, z0.h +; CHECK-NEXT: uunpkhi z0.d, z0.s ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.extract.nxv2f16.nxv8f16( %in, i64 6) ret %res @@ -669,8 +669,8 @@ declare @llvm.experimental.vector.extract.nxv4bf16.nxv8bf1 define @extract_nxv2bf16_nxv6bf16_0( %in) { ; CHECK-LABEL: extract_nxv2bf16_nxv6bf16_0: ; CHECK: // %bb.0: -; CHECK-NEXT: uunpklo z0.d, z0.s ; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: uunpklo z0.d, z0.s ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.extract.nxv2bf16.nxv6bf16( %in, i64 0) ret %res @@ -679,8 +679,8 @@ define @extract_nxv2bf16_nxv6bf16_0( define @extract_nxv2bf16_nxv6bf16_2( %in) { ; CHECK-LABEL: extract_nxv2bf16_nxv6bf16_2: ; CHECK: // %bb.0: -; CHECK-NEXT: uunpklo z0.d, z0.s -; CHECK-NEXT: uunpkhi z0.s, z0.h +; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: uunpkhi z0.d, z0.s ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.extract.nxv2bf16.nxv6bf16( %in, i64 2) ret %res @@ -689,8 +689,8 @@ define @extract_nxv2bf16_nxv6bf16_2( define @extract_nxv2bf16_nxv6bf16_4( %in) { ; CHECK-LABEL: extract_nxv2bf16_nxv6bf16_4: ; CHECK: // %bb.0: -; CHECK-NEXT: uunpkhi z0.d, z0.s -; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: uunpkhi z0.s, z0.h +; CHECK-NEXT: uunpklo z0.d, z0.s ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.extract.nxv2bf16.nxv6bf16( %in, i64 4) ret %res @@ -704,8 +704,8 @@ declare @llvm.experimental.vector.extract.nxv2bf16.nxv6bf1 define @extract_nxv2bf16_nxv8bf16_0( %in) { ; CHECK-LABEL: extract_nxv2bf16_nxv8bf16_0: ; CHECK: // %bb.0: -; CHECK-NEXT: uunpklo z0.d, z0.s ; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: uunpklo z0.d, z0.s ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.extract.nxv2bf16.nxv8bf16( %in, i64 0) ret %res @@ -714,8 +714,8 @@ define @extract_nxv2bf16_nxv8bf16_0( define @extract_nxv2bf16_nxv8bf16_2( %in) { ; CHECK-LABEL: extract_nxv2bf16_nxv8bf16_2: ; CHECK: // %bb.0: -; CHECK-NEXT: uunpklo z0.d, z0.s -; CHECK-NEXT: uunpkhi z0.s, z0.h +; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: uunpkhi z0.d, z0.s ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.extract.nxv2bf16.nxv8bf16( %in, i64 2) ret %res @@ -724,8 +724,8 @@ define @extract_nxv2bf16_nxv8bf16_2( define @extract_nxv2bf16_nxv8bf16_4( %in) { ; CHECK-LABEL: extract_nxv2bf16_nxv8bf16_4: ; CHECK: // %bb.0: -; CHECK-NEXT: uunpkhi z0.d, z0.s -; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: uunpkhi z0.s, z0.h +; CHECK-NEXT: uunpklo z0.d, z0.s ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.extract.nxv2bf16.nxv8bf16( %in, i64 4) ret %res @@ -734,8 +734,8 @@ define @extract_nxv2bf16_nxv8bf16_4( define @extract_nxv2bf16_nxv8bf16_6( %in) { ; CHECK-LABEL: extract_nxv2bf16_nxv8bf16_6: ; CHECK: // %bb.0: -; CHECK-NEXT: uunpkhi z0.d, z0.s ; CHECK-NEXT: uunpkhi z0.s, z0.h +; CHECK-NEXT: uunpkhi z0.d, z0.s ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.extract.nxv2bf16.nxv8bf16( %in, i64 6) ret %res -- 2.7.4