From 9f2014269ad8e4f5dc4567ca12919c6135e62f16 Mon Sep 17 00:00:00 2001 From: David Green Date: Sun, 12 Feb 2023 16:52:38 +0000 Subject: [PATCH] [AArch64] Regenerate and extend zip1 tests. NFC This cleans up the existing tests and adds some extra cases that can be lowered to zip instructions. --- llvm/test/CodeGen/AArch64/arm64-zip.ll | 373 ++++++++++++++++++++++++--------- 1 file changed, 273 insertions(+), 100 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/arm64-zip.ll b/llvm/test/CodeGen/AArch64/arm64-zip.ll index 14772e7..6d5fe17 100644 --- a/llvm/test/CodeGen/AArch64/arm64-zip.ll +++ b/llvm/test/CodeGen/AArch64/arm64-zip.ll @@ -1,107 +1,280 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s -define <8 x i8> @vzipi8(ptr %A, ptr %B) nounwind { -;CHECK-LABEL: vzipi8: -;CHECK: zip1.8b -;CHECK: zip2.8b -;CHECK-NEXT: add.8b - %tmp1 = load <8 x i8>, ptr %A - %tmp2 = load <8 x i8>, ptr %B - %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> - %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> - %tmp5 = add <8 x i8> %tmp3, %tmp4 - ret <8 x i8> %tmp5 -} - -define <4 x i16> @vzipi16(ptr %A, ptr %B) nounwind { -;CHECK-LABEL: vzipi16: -;CHECK: zip1.4h -;CHECK: zip2.4h -;CHECK-NEXT: add.4h - %tmp1 = load <4 x i16>, ptr %A - %tmp2 = load <4 x i16>, ptr %B - %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> - %tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> - %tmp5 = add <4 x i16> %tmp3, %tmp4 - ret <4 x i16> %tmp5 -} - -define <16 x i8> @vzipQi8(ptr %A, ptr %B) nounwind { -;CHECK-LABEL: vzipQi8: -;CHECK: zip1.16b -;CHECK: zip2.16b -;CHECK-NEXT: add.16b - %tmp1 = load <16 x i8>, ptr %A - %tmp2 = load <16 x i8>, ptr %B - %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> - %tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> - %tmp5 = add <16 x i8> %tmp3, %tmp4 - ret <16 x i8> %tmp5 -} - -define <8 x i16> @vzipQi16(ptr %A, ptr %B) nounwind { -;CHECK-LABEL: vzipQi16: -;CHECK: zip1.8h -;CHECK: zip2.8h -;CHECK-NEXT: add.8h - %tmp1 = load <8 x i16>, ptr %A - %tmp2 = load <8 x i16>, ptr %B - %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> - %tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> - %tmp5 = add <8 x i16> %tmp3, %tmp4 - ret <8 x i16> %tmp5 -} - -define <4 x i32> @vzipQi32(ptr %A, ptr %B) nounwind { -;CHECK-LABEL: vzipQi32: -;CHECK: zip1.4s -;CHECK: zip2.4s -;CHECK-NEXT: add.4s - %tmp1 = load <4 x i32>, ptr %A - %tmp2 = load <4 x i32>, ptr %B - %tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> - %tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> - %tmp5 = add <4 x i32> %tmp3, %tmp4 - ret <4 x i32> %tmp5 -} - -define <4 x float> @vzipQf(ptr %A, ptr %B) nounwind { -;CHECK-LABEL: vzipQf: -;CHECK: zip1.4s -;CHECK: zip2.4s -;CHECK-NEXT: fadd.4s - %tmp1 = load <4 x float>, ptr %A - %tmp2 = load <4 x float>, ptr %B - %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> - %tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> - %tmp5 = fadd <4 x float> %tmp3, %tmp4 - ret <4 x float> %tmp5 +define <8 x i8> @vzipi8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +; CHECK-LABEL: vzipi8: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: zip1.8b v2, v0, v1 +; CHECK-NEXT: zip2.8b v0, v0, v1 +; CHECK-NEXT: add.8b v0, v2, v0 +; CHECK-NEXT: ret + %tmp1 = load <8 x i8>, <8 x i8>* %A + %tmp2 = load <8 x i8>, <8 x i8>* %B + %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> + %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> + %tmp5 = add <8 x i8> %tmp3, %tmp4 + ret <8 x i8> %tmp5 +} + +define <4 x i16> @vzipi16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +; CHECK-LABEL: vzipi16: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: zip1.4h v2, v0, v1 +; CHECK-NEXT: zip2.4h v0, v0, v1 +; CHECK-NEXT: add.4h v0, v2, v0 +; CHECK-NEXT: ret + %tmp1 = load <4 x i16>, <4 x i16>* %A + %tmp2 = load <4 x i16>, <4 x i16>* %B + %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> + %tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> + %tmp5 = add <4 x i16> %tmp3, %tmp4 + ret <4 x i16> %tmp5 +} + +define <16 x i8> @vzipQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +; CHECK-LABEL: vzipQi8: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: zip1.16b v2, v0, v1 +; CHECK-NEXT: zip2.16b v0, v0, v1 +; CHECK-NEXT: add.16b v0, v2, v0 +; CHECK-NEXT: ret + %tmp1 = load <16 x i8>, <16 x i8>* %A + %tmp2 = load <16 x i8>, <16 x i8>* %B + %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> + %tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> + %tmp5 = add <16 x i8> %tmp3, %tmp4 + ret <16 x i8> %tmp5 +} + +define <8 x i16> @vzipQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +; CHECK-LABEL: vzipQi16: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: zip1.8h v2, v0, v1 +; CHECK-NEXT: zip2.8h v0, v0, v1 +; CHECK-NEXT: add.8h v0, v2, v0 +; CHECK-NEXT: ret + %tmp1 = load <8 x i16>, <8 x i16>* %A + %tmp2 = load <8 x i16>, <8 x i16>* %B + %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> + %tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> + %tmp5 = add <8 x i16> %tmp3, %tmp4 + ret <8 x i16> %tmp5 +} + +define <4 x i32> @vzipQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +; CHECK-LABEL: vzipQi32: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: zip1.4s v2, v0, v1 +; CHECK-NEXT: zip2.4s v0, v0, v1 +; CHECK-NEXT: add.4s v0, v2, v0 +; CHECK-NEXT: ret + %tmp1 = load <4 x i32>, <4 x i32>* %A + %tmp2 = load <4 x i32>, <4 x i32>* %B + %tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> + %tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> + %tmp5 = add <4 x i32> %tmp3, %tmp4 + ret <4 x i32> %tmp5 +} + +define <4 x float> @vzipQf(<4 x float>* %A, <4 x float>* %B) nounwind { +; CHECK-LABEL: vzipQf: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: zip1.4s v2, v0, v1 +; CHECK-NEXT: zip2.4s v0, v0, v1 +; CHECK-NEXT: fadd.4s v0, v2, v0 +; CHECK-NEXT: ret + %tmp1 = load <4 x float>, <4 x float>* %A + %tmp2 = load <4 x float>, <4 x float>* %B + %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> + %tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> + %tmp5 = fadd <4 x float> %tmp3, %tmp4 + ret <4 x float> %tmp5 } ; Undef shuffle indices should not prevent matching to VZIP: -define <8 x i8> @vzipi8_undef(ptr %A, ptr %B) nounwind { -;CHECK-LABEL: vzipi8_undef: -;CHECK: zip1.8b -;CHECK: zip2.8b -;CHECK-NEXT: add.8b - %tmp1 = load <8 x i8>, ptr %A - %tmp2 = load <8 x i8>, ptr %B - %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> - %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> - %tmp5 = add <8 x i8> %tmp3, %tmp4 - ret <8 x i8> %tmp5 -} - -define <16 x i8> @vzipQi8_undef(ptr %A, ptr %B) nounwind { -;CHECK-LABEL: vzipQi8_undef: -;CHECK: zip1.16b -;CHECK: zip2.16b -;CHECK-NEXT: add.16b - %tmp1 = load <16 x i8>, ptr %A - %tmp2 = load <16 x i8>, ptr %B - %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> - %tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> - %tmp5 = add <16 x i8> %tmp3, %tmp4 - ret <16 x i8> %tmp5 +define <8 x i8> @vzipi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind { +; CHECK-LABEL: vzipi8_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: zip1.8b v2, v0, v1 +; CHECK-NEXT: zip2.8b v0, v0, v1 +; CHECK-NEXT: add.8b v0, v2, v0 +; CHECK-NEXT: ret + %tmp1 = load <8 x i8>, <8 x i8>* %A + %tmp2 = load <8 x i8>, <8 x i8>* %B + %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> + %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> + %tmp5 = add <8 x i8> %tmp3, %tmp4 + ret <8 x i8> %tmp5 +} + +define <16 x i8> @vzipQi8_undef(<16 x i8>* %A, <16 x i8>* %B) nounwind { +; CHECK-LABEL: vzipQi8_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: zip1.16b v2, v0, v1 +; CHECK-NEXT: zip2.16b v0, v0, v1 +; CHECK-NEXT: add.16b v0, v2, v0 +; CHECK-NEXT: ret + %tmp1 = load <16 x i8>, <16 x i8>* %A + %tmp2 = load <16 x i8>, <16 x i8>* %B + %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> + %tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> + %tmp5 = add <16 x i8> %tmp3, %tmp4 + ret <16 x i8> %tmp5 +} + +define <16 x i8> @combine_v16i8(<8 x i8> %0, <8 x i8> %1) { +; CHECK-LABEL: combine_v16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: zip2.8b v2, v0, v1 +; CHECK-NEXT: zip1.8b v0, v0, v1 +; CHECK-NEXT: mov.d v0[1], v2[0] +; CHECK-NEXT: ret + %3 = shufflevector <8 x i8> %0, <8 x i8> %1, <16 x i32> + ret <16 x i8> %3 +} + +define <16 x i8> @combine2_v16i8(<8 x i8> %0, <8 x i8> %1) { +; CHECK-LABEL: combine2_v16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: zip1.8b v2, v0, v1 +; CHECK-NEXT: zip2.8b v0, v0, v1 +; CHECK-NEXT: mov.d v2[1], v0[0] +; CHECK-NEXT: mov.16b v0, v2 +; CHECK-NEXT: ret + %3 = shufflevector <8 x i8> %0, <8 x i8> %1, <8 x i32> + %4 = shufflevector <8 x i8> %0, <8 x i8> %1, <8 x i32> + %5 = shufflevector <8 x i8> %3, <8 x i8> %4, <16 x i32> + ret <16 x i8> %5 +} + +define <8 x i16> @combine_v8i16(<4 x i16> %0, <4 x i16> %1) { +; CHECK-LABEL: combine_v8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: zip2.4h v2, v0, v1 +; CHECK-NEXT: zip1.4h v0, v0, v1 +; CHECK-NEXT: mov.d v0[1], v2[0] +; CHECK-NEXT: ret + %3 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> + ret <8 x i16> %3 +} + +define <8 x i16> @combine2_v8i16(<4 x i16> %0, <4 x i16> %1) { +; CHECK-LABEL: combine2_v8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: zip1.4h v2, v0, v1 +; CHECK-NEXT: zip2.4h v0, v0, v1 +; CHECK-NEXT: mov.d v2[1], v0[0] +; CHECK-NEXT: mov.16b v0, v2 +; CHECK-NEXT: ret + %3 = shufflevector <4 x i16> %0, <4 x i16> %1, <4 x i32> + %4 = shufflevector <4 x i16> %0, <4 x i16> %1, <4 x i32> + %5 = shufflevector <4 x i16> %3, <4 x i16> %4, <8 x i32> + ret <8 x i16> %5 +} + +define <4 x i32> @combine_v4i32(<2 x i32> %0, <2 x i32> %1) { +; CHECK-LABEL: combine_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: zip2.2s v2, v0, v1 +; CHECK-NEXT: zip1.2s v0, v0, v1 +; CHECK-NEXT: mov.d v0[1], v2[0] +; CHECK-NEXT: ret + %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <4 x i32> + ret <4 x i32> %3 +} + +define <4 x i32> @combine2_v4i32(<2 x i32> %0, <2 x i32> %1) { +; CHECK-LABEL: combine2_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: zip1.2s v2, v0, v1 +; CHECK-NEXT: zip2.2s v0, v0, v1 +; CHECK-NEXT: mov.d v2[1], v0[0] +; CHECK-NEXT: mov.16b v0, v2 +; CHECK-NEXT: ret + %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <2 x i32> + %4 = shufflevector <2 x i32> %0, <2 x i32> %1, <2 x i32> + %5 = shufflevector <2 x i32> %3, <2 x i32> %4, <4 x i32> + ret <4 x i32> %5 +} + +define <16 x i8> @combine_v16i8_undef(<8 x i8> %0, <8 x i8> %1) { +; CHECK-LABEL: combine_v16i8_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: zip2.8b v2, v0, v1 +; CHECK-NEXT: zip1.8b v0, v0, v1 +; CHECK-NEXT: mov.d v0[1], v2[0] +; CHECK-NEXT: ret + %3 = shufflevector <8 x i8> %0, <8 x i8> %1, <16 x i32> + ret <16 x i8> %3 +} + +define <16 x i8> @combine2_v16i8_undef(<8 x i8> %0, <8 x i8> %1) { +; CHECK-LABEL: combine2_v16i8_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: zip1.8b v2, v0, v1 +; CHECK-NEXT: zip2.8b v0, v0, v1 +; CHECK-NEXT: mov.d v2[1], v0[0] +; CHECK-NEXT: mov.16b v0, v2 +; CHECK-NEXT: ret + %3 = shufflevector <8 x i8> %0, <8 x i8> %1, <8 x i32> + %4 = shufflevector <8 x i8> %0, <8 x i8> %1, <8 x i32> + %5 = shufflevector <8 x i8> %3, <8 x i8> %4, <16 x i32> + ret <16 x i8> %5 +} + +define <8 x i16> @combine_v8i16_undef(<4 x i16> %0, <4 x i16> %1) { +; CHECK-LABEL: combine_v8i16_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: zip2.4h v2, v0, v1 +; CHECK-NEXT: zip1.4h v0, v0, v1 +; CHECK-NEXT: mov.d v0[1], v2[0] +; CHECK-NEXT: ret + %3 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> + ret <8 x i16> %3 +} + +; FIXME: This could be zip1 too, 8,0,9,1... pattern is handled +define <16 x i8> @combine_v8i16_8first(<8 x i8> %0, <8 x i8> %1) { +; CHECK-LABEL: combine_v8i16_8first: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI17_0 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1_q2 +; CHECK-NEXT: fmov d2, d0 +; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI17_0] +; CHECK-NEXT: tbl.16b v0, { v1, v2 }, v3 +; CHECK-NEXT: ret + %3 = shufflevector <8 x i8> %1, <8 x i8> %0, <16 x i32> + ret <16 x i8> %3 +} + + +; FIXME: This could be zip1 too, 8,0,9,1... pattern is handled +define <16 x i8> @combine_v8i16_8firstundef(<8 x i8> %0, <8 x i8> %1) { +; CHECK-LABEL: combine_v8i16_8firstundef: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI18_0 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1_q2 +; CHECK-NEXT: fmov d2, d0 +; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI18_0] +; CHECK-NEXT: tbl.16b v0, { v1, v2 }, v3 +; CHECK-NEXT: ret + %3 = shufflevector <8 x i8> %1, <8 x i8> %0, <16 x i32> + ret <16 x i8> %3 } -- 2.7.4