From: Andrew Savonichev Date: Thu, 29 Apr 2021 16:34:39 +0000 (+0300) Subject: [AArch64] Fix scalar imm variants of SIMD shift left instructions X-Git-Tag: llvmorg-14-init~7563 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=1ee50b473168735752a2f80ae9b356cfa70a76d0;p=platform%2Fupstream%2Fllvm.git [AArch64] Fix scalar imm variants of SIMD shift left instructions This issue was reported in PR50057: Cannot select: t10: i64 = AArch64ISD::VSHL t2, Constant:i32<2> Shift intrinsics (llvm.aarch64.neon.ushl.i64 and sshl) with a constant shift operand are lowered into AArch64ISD::VSHL in tryCombineShiftImm. VSHL has i64 and v1i64 patterns for a right shift, but only v1i64 for a left shift. This patch adds the missing i64 pattern for AArch64ISD::VSHL, and LIT tests to cover scalar variants (i64 and v1i64) of all shift intrinsics (only ushl and sshl cases fail without the patch, others were just not covered). Differential Revision: https://reviews.llvm.org/D101580 --- diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index 3f00e8c..f6d26083 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -8958,10 +8958,13 @@ multiclass SIMDScalarLShiftD opc, string asm, SDPatternOperator OpNode> { def d : BaseSIMDScalarShift { + [(set (i64 FPR64:$Rd), + (OpNode (i64 FPR64:$Rn), (i32 vecshiftL64:$imm)))]> { let Inst{21-16} = imm{5-0}; } + + def : Pat<(v1i64 (OpNode (v1i64 FPR64:$Rn), (i32 vecshiftL64:$imm))), + (!cast(NAME # "d") FPR64:$Rn, vecshiftL64:$imm)>; } let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in diff --git a/llvm/test/CodeGen/AArch64/arm64-vshift.ll b/llvm/test/CodeGen/AArch64/arm64-vshift.ll index 8c78bf6..c63f339 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vshift.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vshift.ll @@ -27,6 +27,40 @@ define <2 x i32> @sqshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { ret <2 x i32> %tmp3 } +define <1 x i64> @sqshl1d(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK-LABEL: sqshl1d: +;CHECK: sqshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} + %tmp1 = load <1 x i64>, <1 x i64>* %A + %tmp2 = load <1 x i64>, <1 x i64>* %B + %tmp3 = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) + ret <1 x i64> %tmp3 +} + +define <1 x i64> @sqshl1d_constant(<1 x i64>* %A) nounwind { +;CHECK-LABEL: sqshl1d_constant: +;CHECK: sqshl {{d[0-9]+}}, {{d[0-9]+}}, #1 + %tmp1 = load <1 x i64>, <1 x i64>* %A + %tmp3 = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> %tmp1, <1 x i64> ) + ret <1 x i64> %tmp3 +} + +define i64 @sqshl_scalar(i64* %A, i64* %B) nounwind { +;CHECK-LABEL: sqshl_scalar: +;CHECK: sqshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} + %tmp1 = load i64, i64* %A + %tmp2 = load i64, i64* %B + %tmp3 = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %tmp1, i64 %tmp2) + ret i64 %tmp3 +} + +define i64 @sqshl_scalar_constant(i64* %A) nounwind { +;CHECK-LABEL: sqshl_scalar_constant: +;CHECK: sqshl {{d[0-9]+}}, {{d[0-9]+}}, #1 + %tmp1 = load i64, i64* %A + %tmp3 = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %tmp1, i64 1) + ret i64 %tmp3 +} + define <8 x i8> @uqshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: uqshl8b: ;CHECK: uqshl.8b @@ -126,15 +160,52 @@ define <2 x i64> @uqshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind { ret <2 x i64> %tmp3 } +define <1 x i64> @uqshl1d(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK-LABEL: uqshl1d: +;CHECK: uqshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} + %tmp1 = load <1 x i64>, <1 x i64>* %A + %tmp2 = load <1 x i64>, <1 x i64>* %B + %tmp3 = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) + ret <1 x i64> %tmp3 +} + +define <1 x i64> @uqshl1d_constant(<1 x i64>* %A) nounwind { +;CHECK-LABEL: uqshl1d_constant: +;CHECK: uqshl {{d[0-9]+}}, {{d[0-9]+}}, #1 + %tmp1 = load <1 x i64>, <1 x i64>* %A + %tmp3 = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> %tmp1, <1 x i64> ) + ret <1 x i64> %tmp3 +} + +define i64 @uqshl_scalar(i64* %A, i64* %B) nounwind { +;CHECK-LABEL: uqshl_scalar: +;CHECK: uqshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} + %tmp1 = load i64, i64* %A + %tmp2 = load i64, i64* %B + %tmp3 = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %tmp1, i64 %tmp2) + ret i64 %tmp3 +} + +define i64 @uqshl_scalar_constant(i64* %A) nounwind { +;CHECK-LABEL: uqshl_scalar_constant: +;CHECK: uqshl {{d[0-9]+}}, {{d[0-9]+}}, #1 + %tmp1 = load i64, i64* %A + %tmp3 = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %tmp1, i64 1) + ret i64 %tmp3 +} + declare <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone declare <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone declare <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone declare <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone +declare i64 @llvm.aarch64.neon.sqshl.i64(i64, i64) nounwind readnone + declare <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone declare <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone declare <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone declare <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone +declare i64 @llvm.aarch64.neon.uqshl.i64(i64, i64) nounwind readnone declare <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone declare <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone @@ -173,6 +244,44 @@ define <2 x i32> @srshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { ret <2 x i32> %tmp3 } +define <1 x i64> @srshl1d(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK-LABEL: srshl1d: +;CHECK: srshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} + %tmp1 = load <1 x i64>, <1 x i64>* %A + %tmp2 = load <1 x i64>, <1 x i64>* %B + %tmp3 = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) + ret <1 x i64> %tmp3 +} + +define <1 x i64> @srshl1d_constant(<1 x i64>* %A) nounwind { +;CHECK-LABEL: srshl1d_constant: +;CHECK: mov w[[GCONST:[0-9]+]], #1 +;CHECK: fmov d[[DCONST:[0-9]+]], x[[GCONST]] +;CHECK: srshl {{d[0-9]+}}, {{d[0-9]+}}, d[[DCONST]] + %tmp1 = load <1 x i64>, <1 x i64>* %A + %tmp3 = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %tmp1, <1 x i64> ) + ret <1 x i64> %tmp3 +} + +define i64 @srshl_scalar(i64* %A, i64* %B) nounwind { +;CHECK-LABEL: srshl_scalar: +;CHECK: srshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} + %tmp1 = load i64, i64* %A + %tmp2 = load i64, i64* %B + %tmp3 = call i64 @llvm.aarch64.neon.srshl.i64(i64 %tmp1, i64 %tmp2) + ret i64 %tmp3 +} + +define i64 @srshl_scalar_constant(i64* %A) nounwind { +;CHECK-LABEL: srshl_scalar_constant: +;CHECK: mov w[[GCONST:[0-9]+]], #1 +;CHECK: fmov d[[DCONST:[0-9]+]], x[[GCONST]] +;CHECK: srshl {{d[0-9]+}}, {{d[0-9]+}}, d[[DCONST]] + %tmp1 = load i64, i64* %A + %tmp3 = call i64 @llvm.aarch64.neon.srshl.i64(i64 %tmp1, i64 1) + ret i64 %tmp3 +} + define <8 x i8> @urshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: urshl8b: ;CHECK: urshl.8b @@ -200,6 +309,44 @@ define <2 x i32> @urshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { ret <2 x i32> %tmp3 } +define <1 x i64> @urshl1d(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK-LABEL: urshl1d: +;CHECK: urshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} + %tmp1 = load <1 x i64>, <1 x i64>* %A + %tmp2 = load <1 x i64>, <1 x i64>* %B + %tmp3 = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) + ret <1 x i64> %tmp3 +} + +define <1 x i64> @urshl1d_constant(<1 x i64>* %A) nounwind { +;CHECK-LABEL: urshl1d_constant: +;CHECK: mov w[[GCONST:[0-9]+]], #1 +;CHECK: fmov d[[DCONST:[0-9]+]], x[[GCONST]] +;CHECK: urshl {{d[0-9]+}}, {{d[0-9]+}}, d[[DCONST]] + %tmp1 = load <1 x i64>, <1 x i64>* %A + %tmp3 = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %tmp1, <1 x i64> ) + ret <1 x i64> %tmp3 +} + +define i64 @urshl_scalar(i64* %A, i64* %B) nounwind { +;CHECK-LABEL: urshl_scalar: +;CHECK: urshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} + %tmp1 = load i64, i64* %A + %tmp2 = load i64, i64* %B + %tmp3 = call i64 @llvm.aarch64.neon.urshl.i64(i64 %tmp1, i64 %tmp2) + ret i64 %tmp3 +} + +define i64 @urshl_scalar_constant(i64* %A) nounwind { +;CHECK-LABEL: urshl_scalar_constant: +;CHECK: mov w[[GCONST:[0-9]+]], #1 +;CHECK: fmov d[[DCONST:[0-9]+]], x[[GCONST]] +;CHECK: urshl {{d[0-9]+}}, {{d[0-9]+}}, d[[DCONST]] + %tmp1 = load i64, i64* %A + %tmp3 = call i64 @llvm.aarch64.neon.urshl.i64(i64 %tmp1, i64 1) + ret i64 %tmp3 +} + define <16 x i8> @srshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { ;CHECK-LABEL: srshl16b: ;CHECK: srshl.16b @@ -276,11 +423,13 @@ declare <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8>, <8 x i8>) nounwind rea declare <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone declare <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone declare <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone +declare i64 @llvm.aarch64.neon.srshl.i64(i64, i64) nounwind readnone declare <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone declare <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone declare <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone declare <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone +declare i64 @llvm.aarch64.neon.urshl.i64(i64, i64) nounwind readnone declare <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone declare <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone @@ -382,6 +531,44 @@ define <2 x i64> @sqrshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind { ret <2 x i64> %tmp3 } +define <1 x i64> @sqrshl1d(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK-LABEL: sqrshl1d: +;CHECK: sqrshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} + %tmp1 = load <1 x i64>, <1 x i64>* %A + %tmp2 = load <1 x i64>, <1 x i64>* %B + %tmp3 = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) + ret <1 x i64> %tmp3 +} + +define <1 x i64> @sqrshl1d_constant(<1 x i64>* %A) nounwind { +;CHECK-LABEL: sqrshl1d_constant: +;CHECK: mov w[[GCONST:[0-9]+]], #1 +;CHECK: fmov d[[DCONST:[0-9]+]], x[[GCONST]] +;CHECK: sqrshl {{d[0-9]+}}, {{d[0-9]+}}, d[[DCONST]] + %tmp1 = load <1 x i64>, <1 x i64>* %A + %tmp3 = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> %tmp1, <1 x i64> ) + ret <1 x i64> %tmp3 +} + +define i64 @sqrshl_scalar(i64* %A, i64* %B) nounwind { +;CHECK-LABEL: sqrshl_scalar: +;CHECK: sqrshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} + %tmp1 = load i64, i64* %A + %tmp2 = load i64, i64* %B + %tmp3 = call i64 @llvm.aarch64.neon.sqrshl.i64(i64 %tmp1, i64 %tmp2) + ret i64 %tmp3 +} + +define i64 @sqrshl_scalar_constant(i64* %A) nounwind { +;CHECK-LABEL: sqrshl_scalar_constant: +;CHECK: mov w[[GCONST:[0-9]+]], #1 +;CHECK: fmov d[[DCONST:[0-9]+]], x[[GCONST]] +;CHECK: sqrshl {{d[0-9]+}}, {{d[0-9]+}}, d[[DCONST]] + %tmp1 = load i64, i64* %A + %tmp3 = call i64 @llvm.aarch64.neon.sqrshl.i64(i64 %tmp1, i64 1) + ret i64 %tmp3 +} + define <16 x i8> @uqrshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { ;CHECK-LABEL: uqrshl16b: ;CHECK: uqrshl.16b @@ -418,15 +605,55 @@ define <2 x i64> @uqrshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind { ret <2 x i64> %tmp3 } +define <1 x i64> @uqrshl1d(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK-LABEL: uqrshl1d: +;CHECK: uqrshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} + %tmp1 = load <1 x i64>, <1 x i64>* %A + %tmp2 = load <1 x i64>, <1 x i64>* %B + %tmp3 = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) + ret <1 x i64> %tmp3 +} + +define <1 x i64> @uqrshl1d_constant(<1 x i64>* %A) nounwind { +;CHECK-LABEL: uqrshl1d_constant: +;CHECK: mov w[[GCONST:[0-9]+]], #1 +;CHECK: fmov d[[DCONST:[0-9]+]], x[[GCONST]] +;CHECK: uqrshl {{d[0-9]+}}, {{d[0-9]+}}, d[[DCONST]] + %tmp1 = load <1 x i64>, <1 x i64>* %A + %tmp3 = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> %tmp1, <1 x i64> ) + ret <1 x i64> %tmp3 +} + +define i64 @uqrshl_scalar(i64* %A, i64* %B) nounwind { +;CHECK-LABEL: uqrshl_scalar: +;CHECK: uqrshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} + %tmp1 = load i64, i64* %A + %tmp2 = load i64, i64* %B + %tmp3 = call i64 @llvm.aarch64.neon.uqrshl.i64(i64 %tmp1, i64 %tmp2) + ret i64 %tmp3 +} + +define i64 @uqrshl_scalar_constant(i64* %A) nounwind { +;CHECK-LABEL: uqrshl_scalar_constant: +;CHECK: mov w[[GCONST:[0-9]+]], #1 +;CHECK: fmov d[[DCONST:[0-9]+]], x[[GCONST]] +;CHECK: uqrshl {{d[0-9]+}}, {{d[0-9]+}}, d[[DCONST]] + %tmp1 = load i64, i64* %A + %tmp3 = call i64 @llvm.aarch64.neon.uqrshl.i64(i64 %tmp1, i64 1) + ret i64 %tmp3 +} + declare <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone declare <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone declare <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone declare <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone +declare i64 @llvm.aarch64.neon.sqrshl.i64(i64, i64) nounwind readnone declare <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone declare <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone declare <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone declare <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone +declare i64 @llvm.aarch64.neon.uqrshl.i64(i64, i64) nounwind readnone declare <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone declare <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone @@ -494,6 +721,22 @@ define <2 x i64> @urshr2d(<2 x i64>* %A) nounwind { ret <2 x i64> %tmp3 } +define <1 x i64> @urshr1d(<1 x i64>* %A) nounwind { +;CHECK-LABEL: urshr1d: +;CHECK: urshr {{d[0-9]+}}, {{d[0-9]+}}, #1 + %tmp1 = load <1 x i64>, <1 x i64>* %A + %tmp3 = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %tmp1, <1 x i64> ) + ret <1 x i64> %tmp3 +} + +define i64 @urshr_scalar(i64* %A) nounwind { +;CHECK-LABEL: urshr_scalar: +;CHECK: urshr {{d[0-9]+}}, {{d[0-9]+}}, #1 + %tmp1 = load i64, i64* %A + %tmp3 = call i64 @llvm.aarch64.neon.urshl.i64(i64 %tmp1, i64 -1) + ret i64 %tmp3 +} + define <8 x i8> @srshr8b(<8 x i8>* %A) nounwind { ;CHECK-LABEL: srshr8b: ;CHECK: srshr.8b @@ -550,6 +793,22 @@ define <2 x i64> @srshr2d(<2 x i64>* %A) nounwind { ret <2 x i64> %tmp3 } +define <1 x i64> @srshr1d(<1 x i64>* %A) nounwind { +;CHECK-LABEL: srshr1d: +;CHECK: srshr {{d[0-9]+}}, {{d[0-9]+}}, #1 + %tmp1 = load <1 x i64>, <1 x i64>* %A + %tmp3 = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %tmp1, <1 x i64> ) + ret <1 x i64> %tmp3 +} + +define i64 @srshr_scalar(i64* %A) nounwind { +;CHECK-LABEL: srshr_scalar: +;CHECK: srshr {{d[0-9]+}}, {{d[0-9]+}}, #1 + %tmp1 = load i64, i64* %A + %tmp3 = call i64 @llvm.aarch64.neon.srshl.i64(i64 %tmp1, i64 -1) + ret i64 %tmp3 +} + define <8 x i8> @sqshlu8b(<8 x i8>* %A) nounwind { ;CHECK-LABEL: sqshlu8b: ;CHECK: sqshlu.8b v0, {{v[0-9]+}}, #1 @@ -606,10 +865,27 @@ define <2 x i64> @sqshlu2d(<2 x i64>* %A) nounwind { ret <2 x i64> %tmp3 } +define <1 x i64> @sqshlu1d_constant(<1 x i64>* %A) nounwind { +;CHECK-LABEL: sqshlu1d_constant: +;CHECK: sqshlu {{d[0-9]+}}, {{d[0-9]+}}, #1 + %tmp1 = load <1 x i64>, <1 x i64>* %A + %tmp3 = call <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64> %tmp1, <1 x i64> ) + ret <1 x i64> %tmp3 +} + +define i64 @sqshlu_scalar_constant(i64* %A) nounwind { +;CHECK-LABEL: sqshlu_scalar_constant: +;CHECK: sqshlu {{d[0-9]+}}, {{d[0-9]+}}, #1 + %tmp1 = load i64, i64* %A + %tmp3 = call i64 @llvm.aarch64.neon.sqshlu.i64(i64 %tmp1, i64 1) + ret i64 %tmp3 +} + declare <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone declare <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone declare <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone declare <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone +declare i64 @llvm.aarch64.neon.sqshlu.i64(i64, i64) nounwind readnone declare <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone declare <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone @@ -1196,6 +1472,8 @@ declare <16 x i8> @llvm.aarch64.neon.ushl.v16i8(<16 x i8>, <16 x i8>) declare <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16>, <8 x i16>) declare <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32>, <4 x i32>) declare <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64>, <2 x i64>) +declare <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64>, <1 x i64>) +declare i64 @llvm.aarch64.neon.ushl.i64(i64, i64) define <8 x i16> @neon.ushll8h_constant_shift(<8 x i8>* %A) nounwind { ;CHECK-LABEL: neon.ushll8h_constant_shift @@ -1282,6 +1560,24 @@ define <2 x i64> @neon.ushll2d_constant_shift(<2 x i32>* %A) nounwind { ret <2 x i64> %tmp3 } +define <1 x i64> @neon.ushl_vscalar_constant_shift(<1 x i32>* %A) nounwind { +;CHECK-LABEL: neon.ushl_vscalar_constant_shift +;CHECK: shl {{d[0-9]+}}, {{d[0-9]+}}, #1 + %tmp1 = load <1 x i32>, <1 x i32>* %A + %tmp2 = zext <1 x i32> %tmp1 to <1 x i64> + %tmp3 = call <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64> %tmp2, <1 x i64> ) + ret <1 x i64> %tmp3 +} + +define i64 @neon.ushl_scalar_constant_shift(i32* %A) nounwind { +;CHECK-LABEL: neon.ushl_scalar_constant_shift +;CHECK: shl {{d[0-9]+}}, {{d[0-9]+}}, #1 + %tmp1 = load i32, i32* %A + %tmp2 = zext i32 %tmp1 to i64 + %tmp3 = call i64 @llvm.aarch64.neon.ushl.i64(i64 %tmp2, i64 1) + ret i64 %tmp3 +} + define <8 x i16> @sshll8h(<8 x i8>* %A) nounwind { ;CHECK-LABEL: sshll8h: ;CHECK: sshll.8h v0, {{v[0-9]+}}, #1 @@ -1304,6 +1600,8 @@ declare <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8>, <16 x i8>) declare <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16>, <8 x i16>) declare <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32>, <4 x i32>) declare <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64>, <2 x i64>) +declare <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64>, <1 x i64>) +declare i64 @llvm.aarch64.neon.sshl.i64(i64, i64) define <16 x i8> @neon.sshl16b_constant_shift(<16 x i8>* %A) nounwind { ;CHECK-LABEL: neon.sshl16b_constant_shift @@ -1400,6 +1698,24 @@ define <2 x i64> @neon.sshll2d_constant_shift(<2 x i32>* %A) nounwind { ret <2 x i64> %tmp3 } +define <1 x i64> @neon.sshll_vscalar_constant_shift(<1 x i32>* %A) nounwind { +;CHECK-LABEL: neon.sshll_vscalar_constant_shift +;CHECK: shl {{d[0-9]+}}, {{d[0-9]+}}, #1 + %tmp1 = load <1 x i32>, <1 x i32>* %A + %tmp2 = zext <1 x i32> %tmp1 to <1 x i64> + %tmp3 = call <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64> %tmp2, <1 x i64> ) + ret <1 x i64> %tmp3 +} + +define i64 @neon.sshll_scalar_constant_shift(i32* %A) nounwind { +;CHECK-LABEL: neon.sshll_scalar_constant_shift +;CHECK: shl {{d[0-9]+}}, {{d[0-9]+}}, #1 + %tmp1 = load i32, i32* %A + %tmp2 = zext i32 %tmp1 to i64 + %tmp3 = call i64 @llvm.aarch64.neon.sshl.i64(i64 %tmp2, i64 1) + ret i64 %tmp3 +} + ; FIXME: should be constant folded. define <2 x i64> @neon.sshl2d_constant_fold() nounwind { ;CHECK-LABEL: neon.sshl2d_constant_fold @@ -1637,6 +1953,26 @@ define <2 x i64> @ursra2d(<2 x i64>* %A, <2 x i64>* %B) nounwind { ret <2 x i64> %tmp5 } +define <1 x i64> @ursra1d(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK-LABEL: ursra1d: +;CHECK: ursra {{d[0-9]+}}, {{d[0-9]+}}, #1 + %tmp1 = load <1 x i64>, <1 x i64>* %A + %tmp3 = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %tmp1, <1 x i64> ) + %tmp4 = load <1 x i64>, <1 x i64>* %B + %tmp5 = add <1 x i64> %tmp3, %tmp4 + ret <1 x i64> %tmp5 +} + +define i64 @ursra_scalar(i64* %A, i64* %B) nounwind { +;CHECK-LABEL: ursra_scalar: +;CHECK: ursra {{d[0-9]+}}, {{d[0-9]+}}, #1 + %tmp1 = load i64, i64* %A + %tmp3 = call i64 @llvm.aarch64.neon.urshl.i64(i64 %tmp1, i64 -1) + %tmp4 = load i64, i64* %B + %tmp5 = add i64 %tmp3, %tmp4 + ret i64 %tmp5 +} + define <8 x i8> @srsra8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: srsra8b: ;CHECK: srsra.8b v0, {{v[0-9]+}}, #1 @@ -1707,6 +2043,26 @@ define <2 x i64> @srsra2d(<2 x i64>* %A, <2 x i64>* %B) nounwind { ret <2 x i64> %tmp5 } +define <1 x i64> @srsra1d(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK-LABEL: srsra1d: +;CHECK: srsra {{d[0-9]+}}, {{d[0-9]+}}, #1 + %tmp1 = load <1 x i64>, <1 x i64>* %A + %tmp3 = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %tmp1, <1 x i64> ) + %tmp4 = load <1 x i64>, <1 x i64>* %B + %tmp5 = add <1 x i64> %tmp3, %tmp4 + ret <1 x i64> %tmp5 +} + +define i64 @srsra_scalar(i64* %A, i64* %B) nounwind { +;CHECK-LABEL: srsra_scalar: +;CHECK: srsra {{d[0-9]+}}, {{d[0-9]+}}, #1 + %tmp1 = load i64, i64* %A + %tmp3 = call i64 @llvm.aarch64.neon.srshl.i64(i64 %tmp1, i64 -1) + %tmp4 = load i64, i64* %B + %tmp5 = add i64 %tmp3, %tmp4 + ret i64 %tmp5 +} + define <8 x i8> @usra8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: usra8b: ;CHECK: usra.8b v0, {{v[0-9]+}}, #1 @@ -1777,6 +2133,16 @@ define <2 x i64> @usra2d(<2 x i64>* %A, <2 x i64>* %B) nounwind { ret <2 x i64> %tmp5 } +define <1 x i64> @usra1d(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK-LABEL: usra1d: +;CHECK: usra {{d[0-9]+}}, {{d[0-9]+}}, #1 + %tmp1 = load <1 x i64>, <1 x i64>* %A + %tmp3 = lshr <1 x i64> %tmp1, + %tmp4 = load <1 x i64>, <1 x i64>* %B + %tmp5 = add <1 x i64> %tmp3, %tmp4 + ret <1 x i64> %tmp5 +} + define <8 x i8> @ssra8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: ssra8b: ;CHECK: ssra.8b v0, {{v[0-9]+}}, #1