From 03065ecd85366d0d0f8502469be7543be6fece70 Mon Sep 17 00:00:00 2001 From: Bradley Smith Date: Fri, 8 Oct 2021 14:23:29 +0000 Subject: [PATCH] [AArch64][SVE] Ensure LowerEXTRACT_SUBVECTOR is not called for illegal types The lowering for EXTRACT_SUBVECTOR should not be called during type legalization, only as part of lowering, hence return SDValue() when called on illegal types. This also adds missing tests for extracting fixed types from illegal scalable types. Differential Revision: https://reviews.llvm.org/D111412 --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 4 + .../CodeGen/AArch64/sve-extract-fixed-vector.ll | 238 +++++++++++++++++++++ 2 files changed, 242 insertions(+) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 8d8280f..069f30e 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -10719,6 +10719,10 @@ SDValue AArch64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, unsigned Idx = cast(Op.getOperand(1))->getZExtValue(); unsigned Size = Op.getValueSizeInBits(); + // If we don't have legal types yet, do nothing + if (!DAG.getTargetLoweringInfo().isTypeLegal(InVT)) + return SDValue(); + if (InVT.isScalableVector()) { // This will be matched by custom code during ISelDAGToDAG. if (Idx == 0 && isPackedVectorType(InVT, DAG)) diff --git a/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll index e65d29e..ff8be09 100644 --- a/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll @@ -67,6 +67,43 @@ define <4 x i32> @extract_v4i32_nxv4i32_idx4( %vec) nounwind { ret <4 x i32> %retval } +; Should codegen to uzps, since idx is zero and type is illegal. +define <4 x i32> @extract_v4i32_nxv2i32( %vec) nounwind #1 { +; CHECK-LABEL: extract_v4i32_nxv2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: ret + %retval = call <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv2i32( %vec, i64 0) + ret <4 x i32> %retval +} + +; Goes through memory currently; idx != 0. +define <4 x i32> @extract_v4i32_nxv2i32_idx4( %vec) nounwind #1 { +; CHECK-LABEL: extract_v4i32_nxv2i32_idx4: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: cntd x8 +; CHECK-NEXT: mov w9, #4 +; CHECK-NEXT: subs x8, x8, #4 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: csel x8, xzr, x8, lo +; CHECK-NEXT: st1d { z0.d }, p0, [sp] +; CHECK-NEXT: cmp x8, #4 +; CHECK-NEXT: ptrue p0.d, vl4 +; CHECK-NEXT: csel x8, x8, x9, lo +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x9, x8, lsl #3] +; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %retval = call <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv2i32( %vec, i64 4) + ret <4 x i32> %retval +} + ; Should codegen to a nop, since idx is zero. define <8 x i16> @extract_v8i16_nxv8i16( %vec) nounwind { ; CHECK-LABEL: extract_v8i16_nxv8i16: @@ -100,6 +137,82 @@ define <8 x i16> @extract_v8i16_nxv8i16_idx8( %vec) nounwind { ret <8 x i16> %retval } +; Should codegen to uzps, since idx is zero and type is illegal. +define <8 x i16> @extract_v8i16_nxv4i16( %vec) nounwind #1 { +; CHECK-LABEL: extract_v8i16_nxv4i16: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: ret + %retval = call <8 x i16> @llvm.experimental.vector.extract.v8i16.nxv4i16( %vec, i64 0) + ret <8 x i16> %retval +} + +; Goes through memory currently; idx != 0. +define <8 x i16> @extract_v8i16_nxv4i16_idx8( %vec) nounwind #1 { +; CHECK-LABEL: extract_v8i16_nxv4i16_idx8: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: cntw x8 +; CHECK-NEXT: mov w9, #8 +; CHECK-NEXT: subs x8, x8, #8 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: csel x8, xzr, x8, lo +; CHECK-NEXT: st1w { z0.s }, p0, [sp] +; CHECK-NEXT: cmp x8, #8 +; CHECK-NEXT: ptrue p0.s, vl8 +; CHECK-NEXT: csel x8, x8, x9, lo +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x9, x8, lsl #2] +; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %retval = call <8 x i16> @llvm.experimental.vector.extract.v8i16.nxv4i16( %vec, i64 8) + ret <8 x i16> %retval +} + +; Should codegen to uzps, since idx is zero and type is illegal. +define <8 x i16> @extract_v8i16_nxv2i16( %vec) nounwind #1 { +; CHECK-LABEL: extract_v8i16_nxv2i16: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s +; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: ret + %retval = call <8 x i16> @llvm.experimental.vector.extract.v8i16.nxv2i16( %vec, i64 0) + ret <8 x i16> %retval +} + +; Goes through memory currently; idx != 0. +define <8 x i16> @extract_v8i16_nxv2i16_idx8( %vec) nounwind #1 { +; CHECK-LABEL: extract_v8i16_nxv2i16_idx8: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: cntd x8 +; CHECK-NEXT: mov w9, #8 +; CHECK-NEXT: subs x8, x8, #8 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: csel x8, xzr, x8, lo +; CHECK-NEXT: st1d { z0.d }, p0, [sp] +; CHECK-NEXT: cmp x8, #8 +; CHECK-NEXT: ptrue p0.d, vl8 +; CHECK-NEXT: csel x8, x8, x9, lo +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x9, x8, lsl #3] +; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s +; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %retval = call <8 x i16> @llvm.experimental.vector.extract.v8i16.nxv2i16( %vec, i64 8) + ret <8 x i16> %retval +} + ; Should codegen to a nop, since idx is zero. define <16 x i8> @extract_v16i8_nxv16i8( %vec) nounwind { ; CHECK-LABEL: extract_v16i8_nxv16i8: @@ -132,6 +245,121 @@ define <16 x i8> @extract_v16i8_nxv16i8_idx16( %vec) nounwind ret <16 x i8> %retval } +; Should codegen to uzps, since idx is zero and type is illegal. +define <16 x i8> @extract_v16i8_nxv8i8( %vec) nounwind #1 { +; CHECK-LABEL: extract_v16i8_nxv8i8: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: ret + %retval = call <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv8i8( %vec, i64 0) + ret <16 x i8> %retval +} + +; Goes through memory currently; idx != 0. +define <16 x i8> @extract_v16i8_nxv8i8_idx16( %vec) nounwind #1 { +; CHECK-LABEL: extract_v16i8_nxv8i8_idx16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: cnth x8 +; CHECK-NEXT: mov w9, #16 +; CHECK-NEXT: subs x8, x8, #16 +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: csel x8, xzr, x8, lo +; CHECK-NEXT: st1h { z0.h }, p0, [sp] +; CHECK-NEXT: cmp x8, #16 +; CHECK-NEXT: ptrue p0.h, vl16 +; CHECK-NEXT: csel x8, x8, x9, lo +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: ld1h { z0.h }, p0/z, [x9, x8, lsl #1] +; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %retval = call <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv8i8( %vec, i64 16) + ret <16 x i8> %retval +} + +; Should codegen to uzps, since idx is zero and type is illegal. +define <16 x i8> @extract_v16i8_nxv4i8( %vec) nounwind #1 { +; CHECK-LABEL: extract_v16i8_nxv4i8: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h +; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: ret + %retval = call <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv4i8( %vec, i64 0) + ret <16 x i8> %retval +} + +; Goes through memory currently; idx != 0. +define <16 x i8> @extract_v16i8_nxv4i8_idx16( %vec) nounwind #1 { +; CHECK-LABEL: extract_v16i8_nxv4i8_idx16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: cntw x8 +; CHECK-NEXT: mov w9, #16 +; CHECK-NEXT: subs x8, x8, #16 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: csel x8, xzr, x8, lo +; CHECK-NEXT: st1w { z0.s }, p0, [sp] +; CHECK-NEXT: cmp x8, #16 +; CHECK-NEXT: ptrue p0.s, vl16 +; CHECK-NEXT: csel x8, x8, x9, lo +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x9, x8, lsl #2] +; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h +; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %retval = call <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv4i8( %vec, i64 16) + ret <16 x i8> %retval +} + +; Should codegen to uzps, since idx is zero and type is illegal. +define <16 x i8> @extract_v16i8_nxv2i8( %vec) nounwind #1 { +; CHECK-LABEL: extract_v16i8_nxv2i8: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s +; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h +; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: ret + %retval = call <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv2i8( %vec, i64 0) + ret <16 x i8> %retval +} + +; Goes through memory currently; idx != 0. +define <16 x i8> @extract_v16i8_nxv2i8_idx16( %vec) nounwind #1 { +; CHECK-LABEL: extract_v16i8_nxv2i8_idx16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: cntd x8 +; CHECK-NEXT: mov w9, #16 +; CHECK-NEXT: subs x8, x8, #16 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: csel x8, xzr, x8, lo +; CHECK-NEXT: st1d { z0.d }, p0, [sp] +; CHECK-NEXT: cmp x8, #16 +; CHECK-NEXT: csel x8, x8, x9, lo +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x9, x8, lsl #3] +; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s +; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h +; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %retval = call <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv2i8( %vec, i64 16) + ret <16 x i8> %retval +} ; Fixed length clamping @@ -181,10 +409,20 @@ define <4 x i64> @extract_fixed_v4i64_nxv2i64( %vec) nounwind } attributes #0 = { vscale_range(2,2) } +attributes #1 = { vscale_range(8,8) } declare <2 x i64> @llvm.experimental.vector.extract.v2i64.nxv2i64(, i64) + declare <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv4i32(, i64) +declare <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv2i32(, i64) + declare <8 x i16> @llvm.experimental.vector.extract.v8i16.nxv8i16(, i64) +declare <8 x i16> @llvm.experimental.vector.extract.v8i16.nxv4i16(, i64) +declare <8 x i16> @llvm.experimental.vector.extract.v8i16.nxv2i16(, i64) + declare <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv16i8(, i64) +declare <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv8i8(, i64) +declare <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv4i8(, i64) +declare <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv2i8(, i64) declare <4 x i64> @llvm.experimental.vector.extract.v4i64.nxv2i64(, i64) -- 2.7.4