From 4585bff408bca58a883b9593f578652cff0f43c9 Mon Sep 17 00:00:00 2001 From: David Green Date: Wed, 13 Apr 2022 18:10:49 +0100 Subject: [PATCH] [AArch64] Add new shuffles tests, and regenerate aarch64-wide-shuffle.ll and neon-wide-splat.ll. NFC --- llvm/test/CodeGen/AArch64/aarch64-wide-shuffle.ll | 11 ++- llvm/test/CodeGen/AArch64/neon-wide-splat.ll | 55 +++++++---- llvm/test/CodeGen/AArch64/shuffles.ll | 112 ++++++++++++++++++++++ 3 files changed, 159 insertions(+), 19 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/shuffles.ll diff --git a/llvm/test/CodeGen/AArch64/aarch64-wide-shuffle.ll b/llvm/test/CodeGen/AArch64/aarch64-wide-shuffle.ll index d06df7a..64cd050 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-wide-shuffle.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-wide-shuffle.ll @@ -1,16 +1,21 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s | FileCheck %s target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" target triple = "aarch64-unknown-linux-gnu" define <4 x i16> @f(<4 x i32> %vqdmlal_v3.i, <8 x i16> %x5) { +; CHECK-LABEL: f: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: dup v0.4h, v0.h[0] +; CHECK-NEXT: ext v0.8b, v0.8b, v1.8b, #2 +; CHECK-NEXT: ext v0.8b, v0.8b, v0.8b, #4 +; CHECK-NEXT: ret entry: ; Check that we don't just dup the input vector. The code emitted is ext, dup, ext, ext ; but only match the last three instructions as the first two could be combined to ; a dup2 at some stage. - ; CHECK: dup - ; CHECK: ext - ; CHECK: ext %x4 = extractelement <4 x i32> %vqdmlal_v3.i, i32 2 %vgetq_lane = trunc i32 %x4 to i16 %vecinit.i = insertelement <4 x i16> undef, i16 %vgetq_lane, i32 0 diff --git a/llvm/test/CodeGen/AArch64/neon-wide-splat.ll b/llvm/test/CodeGen/AArch64/neon-wide-splat.ll index 6290f85..16386f5 100644 --- a/llvm/test/CodeGen/AArch64/neon-wide-splat.ll +++ b/llvm/test/CodeGen/AArch64/neon-wide-splat.ll @@ -1,8 +1,11 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s define <4 x i16> @shuffle1(<4 x i16> %v) { ; CHECK-LABEL: shuffle1: -; CHECK: dup v0.2s, v0.s[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: dup v0.2s, v0.s[0] ; CHECK-NEXT: ret entry: %res = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> @@ -11,7 +14,9 @@ entry: define <4 x i16> @shuffle2(<4 x i16> %v) { ; CHECK-LABEL: shuffle2: -; CHECK: dup v0.2s, v0.s[1] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: dup v0.2s, v0.s[1] ; CHECK-NEXT: ret entry: %res = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> @@ -20,7 +25,8 @@ entry: define <8 x i16> @shuffle3(<8 x i16> %v) { ; CHECK-LABEL: shuffle3: -; CHECK: dup v0.2d, v0.d[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: dup v0.2d, v0.d[0] ; CHECK-NEXT: ret entry: %res = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> @shuffle4(<4 x i32> %v) { ; CHECK-LABEL: shuffle4: -; CHECK: dup v0.2d, v0.d[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: dup v0.2d, v0.d[0] ; CHECK-NEXT: ret entry: %res = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> @@ -39,7 +46,8 @@ entry: define <16 x i8> @shuffle5(<16 x i8> %v) { ; CHECK-LABEL: shuffle5: -; CHECK: dup v0.4s, v0.s[2] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: dup v0.4s, v0.s[2] ; CHECK-NEXT: ret entry: %res = shufflevector <16 x i8> %v, <16 x i8> undef, <16 x i32> @shuffle6(<16 x i8> %v) { ; CHECK-LABEL: shuffle6: -; CHECK: dup v0.2d, v0.d[1] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: dup v0.2d, v0.d[1] ; CHECK-NEXT: ret entry: %res = shufflevector <16 x i8> %v, <16 x i8> undef, <16 x i32> @shuffle7(<8 x i8> %v) { ; CHECK-LABEL: shuffle7: -; CHECK: dup v0.2s, v0.s[1] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: dup v0.2s, v0.s[1] ; CHECK-NEXT: ret entry: %res = shufflevector <8 x i8> %v, <8 x i8> undef, <8 x i32> @shuffle8(<8 x i8> %v) { ; CHECK-LABEL: shuffle8: -; CHECK: dup v0.4h, v0.h[3] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: dup v0.4h, v0.h[3] ; CHECK-NEXT: ret entry: %res = shufflevector <8 x i8> %v, <8 x i8> undef, <8 x i32> @shuffle_not1(<16 x i8> %v) { ; CHECK-LABEL: shuffle_not1: -; CHECK: ext v0.16b, v0.16b, v0.16b, #2 +; CHECK: // %bb.0: +; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #2 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret %res = shufflevector <16 x i8> %v, <16 x i8> undef, <8 x i32> ret <8 x i8> %res } @@ -92,9 +108,11 @@ define <8 x i8> @shuffle_not1(<16 x i8> %v) { ; Block is not a proper lane define <4 x i32> @shuffle_not2(<4 x i32> %v) { ; CHECK-LABEL: shuffle_not2: -; CHECK-NOT: dup -; CHECK: ext -; CHECK: ret +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #4 +; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #8 +; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #8 +; CHECK-NEXT: ret entry: %res = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> ret <4 x i32> %res @@ -103,8 +121,8 @@ entry: ; Block size is equal to vector size define <4 x i16> @shuffle_not3(<4 x i16> %v) { ; CHECK-LABEL: shuffle_not3: -; CHECK-NOT: dup -; CHECK: ret +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ret entry: %res = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> ret <4 x i16> %res @@ -113,8 +131,13 @@ entry: ; Blocks mismatch define <8 x i8> @shuffle_not4(<8 x i8> %v) { ; CHECK-LABEL: shuffle_not4: -; CHECK-NOT: dup -; CHECK: ret +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: adrp x8, .LCPI11_0 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov v0.d[1], v0.d[0] +; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI11_0] +; CHECK-NEXT: tbl v0.8b, { v0.16b }, v1.8b +; CHECK-NEXT: ret entry: %res = shufflevector <8 x i8> %v, <8 x i8> undef, <8 x i32> diff --git a/llvm/test/CodeGen/AArch64/shuffles.ll b/llvm/test/CodeGen/AArch64/shuffles.ll new file mode 100644 index 0000000..9365344 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/shuffles.ll @@ -0,0 +1,112 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64--linux-gnu | FileCheck %s + +define <16 x i32> @test_shuf1(<16 x i32> %x, <16 x i32> %y) { +; CHECK-LABEL: test_shuf1: +; CHECK: // %bb.0: +; CHECK-NEXT: ext v3.16b, v6.16b, v7.16b, #8 +; CHECK-NEXT: ext v5.16b, v6.16b, v4.16b, #12 +; CHECK-NEXT: uzp1 v6.4s, v1.4s, v0.4s +; CHECK-NEXT: uzp2 v4.4s, v2.4s, v4.4s +; CHECK-NEXT: ext v3.16b, v3.16b, v3.16b, #12 +; CHECK-NEXT: ext v5.16b, v7.16b, v5.16b, #8 +; CHECK-NEXT: trn2 v6.4s, v6.4s, v1.4s +; CHECK-NEXT: trn1 v2.4s, v4.4s, v2.4s +; CHECK-NEXT: ext v4.16b, v1.16b, v1.16b, #12 +; CHECK-NEXT: ext v3.16b, v1.16b, v3.16b, #8 +; CHECK-NEXT: rev64 v16.4s, v5.4s +; CHECK-NEXT: dup v7.4s, v7.s[0] +; CHECK-NEXT: ext v1.16b, v0.16b, v6.16b, #12 +; CHECK-NEXT: mov v2.s[3], v7.s[3] +; CHECK-NEXT: ext v0.16b, v3.16b, v4.16b, #8 +; CHECK-NEXT: ext v3.16b, v5.16b, v16.16b, #8 +; CHECK-NEXT: ret + %s3 = shufflevector <16 x i32> %x, <16 x i32> %y, <16 x i32> + ret <16 x i32> %s3 +} + +define <4 x i32> @test_shuf2(<16 x i32> %x, <16 x i32> %y) { +; CHECK-LABEL: test_shuf2: +; CHECK: // %bb.0: +; CHECK-NEXT: ext v0.16b, v6.16b, v7.16b, #8 +; CHECK-NEXT: ext v2.16b, v1.16b, v1.16b, #12 +; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #12 +; CHECK-NEXT: ext v0.16b, v1.16b, v0.16b, #8 +; CHECK-NEXT: ext v0.16b, v0.16b, v2.16b, #8 +; CHECK-NEXT: ret + %s3 = shufflevector <16 x i32> %x, <16 x i32> %y, <4 x i32> + ret <4 x i32> %s3 +} + +define <4 x i32> @test_shuf3(<16 x i32> %x, <16 x i32> %y) { +; CHECK-LABEL: test_shuf3: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1 v2.4s, v1.4s, v0.4s +; CHECK-NEXT: trn2 v1.4s, v2.4s, v1.4s +; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #12 +; CHECK-NEXT: ret + %s3 = shufflevector <16 x i32> %x, <16 x i32> %y, <4 x i32> + ret <4 x i32> %s3 +} + +define <4 x i32> @test_shuf4(<16 x i32> %x, <16 x i32> %y) { +; CHECK-LABEL: test_shuf4: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp2 v0.4s, v2.4s, v4.4s +; CHECK-NEXT: dup v1.4s, v7.s[0] +; CHECK-NEXT: trn1 v0.4s, v0.4s, v2.4s +; CHECK-NEXT: mov v0.s[3], v1.s[3] +; CHECK-NEXT: ret + %s3 = shufflevector <16 x i32> %x, <16 x i32> %y, <4 x i32> + ret <4 x i32> %s3 +} + +define <4 x i32> @test_shuf5(<16 x i32> %x, <16 x i32> %y) { +; CHECK-LABEL: test_shuf5: +; CHECK: // %bb.0: +; CHECK-NEXT: ext v0.16b, v6.16b, v4.16b, #12 +; CHECK-NEXT: ext v0.16b, v7.16b, v0.16b, #8 +; CHECK-NEXT: rev64 v1.4s, v0.4s +; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #8 +; CHECK-NEXT: ret + %s3 = shufflevector <16 x i32> %x, <16 x i32> %y, <4 x i32> + ret <4 x i32> %s3 +} + +define <4 x i32> @test1503(<4 x i32> %a, <4 x i32> %b) +; CHECK-LABEL: test1503: +; CHECK: // %bb.0: +; CHECK-NEXT: ext v1.16b, v0.16b, v1.16b, #12 +; CHECK-NEXT: zip1 v0.4s, v0.4s, v1.4s +; CHECK-NEXT: trn1 v1.4s, v0.4s, v1.4s +; CHECK-NEXT: ext v0.16b, v1.16b, v0.16b, #8 +; CHECK-NEXT: ret +{ + %r = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %r +} + +define <4 x i32> @test4366(<4 x i32> %a, <4 x i32> %b) +; CHECK-LABEL: test4366: +; CHECK: // %bb.0: +; CHECK-NEXT: zip2 v2.4s, v1.4s, v0.4s +; CHECK-NEXT: uzp1 v1.4s, v0.4s, v1.4s +; CHECK-NEXT: ext v0.16b, v0.16b, v2.16b, #4 +; CHECK-NEXT: zip2 v0.4s, v1.4s, v0.4s +; CHECK-NEXT: ret +{ + %r = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %r +} + +define <4 x i32> @test7367(<4 x i32> %a, <4 x i32> %b) +; CHECK-LABEL: test7367: +; CHECK: // %bb.0: +; CHECK-NEXT: trn2 v0.4s, v0.4s, v1.4s +; CHECK-NEXT: zip2 v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #12 +; CHECK-NEXT: ret +{ + %r = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %r +} -- 2.7.4