From 35cf9a1fc5d2ab7c171c968737092874676ce8ea Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Mon, 4 Nov 2019 14:46:42 -0800 Subject: [PATCH] [AArch64] Re-add patterns for (s/u)mull2. These patterns were added in D46009, but removed in D54276 due to missing test coverage. Differential Revision: https://reviews.llvm.org/D69831 --- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 19 +++++++++ llvm/test/CodeGen/AArch64/aarch64-smull.ll | 60 +++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 77bee60..7ae5b5d 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -4441,6 +4441,25 @@ defm : Neon_mul_widen_patterns; +// Patterns for smull2/umull2. +multiclass Neon_mul_high_patterns { + def : Pat<(v8i16 (opnode (extract_high_v16i8 V128:$Rn), + (extract_high_v16i8 V128:$Rm))), + (INST8B V128:$Rn, V128:$Rm)>; + def : Pat<(v4i32 (opnode (extract_high_v8i16 V128:$Rn), + (extract_high_v8i16 V128:$Rm))), + (INST4H V128:$Rn, V128:$Rm)>; + def : Pat<(v2i64 (opnode (extract_high_v4i32 V128:$Rn), + (extract_high_v4i32 V128:$Rm))), + (INST2S V128:$Rn, V128:$Rm)>; +} + +defm : Neon_mul_high_patterns; +defm : Neon_mul_high_patterns; + // Additional patterns for SMLAL/SMLSL and UMLAL/UMLSL multiclass Neon_mulacc_widen_patterns { diff --git a/llvm/test/CodeGen/AArch64/aarch64-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-smull.ll index 8922ae9..5828106 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-smull.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-smull.ll @@ -327,6 +327,66 @@ entry: ret void } +define <16 x i16> @umull2_i8(<16 x i8> %arg1, <16 x i8> %arg2) { +; CHECK-LABEL: umull2_i8: +; CHECK-DAG: umull2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +; CHECK-DAG: umull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b + %arg1_ext = zext <16 x i8> %arg1 to <16 x i16> + %arg2_ext = zext <16 x i8> %arg2 to <16 x i16> + %mul = mul <16 x i16> %arg1_ext, %arg2_ext + ret <16 x i16> %mul +} + +define <16 x i16> @smull2_i8(<16 x i8> %arg1, <16 x i8> %arg2) { +; CHECK-LABEL: smull2_i8: +; CHECK-DAG: smull2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +; CHECK-DAG: smull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b + %arg1_ext = sext <16 x i8> %arg1 to <16 x i16> + %arg2_ext = sext <16 x i8> %arg2 to <16 x i16> + %mul = mul <16 x i16> %arg1_ext, %arg2_ext + ret <16 x i16> %mul +} + +define <8 x i32> @umull2_i16(<8 x i16> %arg1, <8 x i16> %arg2) { +; CHECK-LABEL: umull2_i16: +; CHECK-DAG: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +; CHECK-DAG: umull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h + %arg1_ext = zext <8 x i16> %arg1 to <8 x i32> + %arg2_ext = zext <8 x i16> %arg2 to <8 x i32> + %mul = mul <8 x i32> %arg1_ext, %arg2_ext + ret <8 x i32> %mul +} + +define <8 x i32> @smull2_i16(<8 x i16> %arg1, <8 x i16> %arg2) { +; CHECK-LABEL: smull2_i16: +; CHECK-DAG: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +; CHECK-DAG: smull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h + %arg1_ext = sext <8 x i16> %arg1 to <8 x i32> + %arg2_ext = sext <8 x i16> %arg2 to <8 x i32> + %mul = mul <8 x i32> %arg1_ext, %arg2_ext + ret <8 x i32> %mul +} + +define <4 x i64> @umull2_i32(<4 x i32> %arg1, <4 x i32> %arg2) { +; CHECK-LABEL: umull2_i32: +; CHECK-DAG: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +; CHECK-DAG: umull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s + %arg1_ext = zext <4 x i32> %arg1 to <4 x i64> + %arg2_ext = zext <4 x i32> %arg2 to <4 x i64> + %mul = mul <4 x i64> %arg1_ext, %arg2_ext + ret <4 x i64> %mul +} + +define <4 x i64> @smull2_i32(<4 x i32> %arg1, <4 x i32> %arg2) { +; CHECK-LABEL: smull2_i32: +; CHECK-DAG: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +; CHECK-DAG: smull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s + %arg1_ext = sext <4 x i32> %arg1 to <4 x i64> + %arg2_ext = sext <4 x i32> %arg2 to <4 x i64> + %mul = mul <4 x i64> %arg1_ext, %arg2_ext + ret <4 x i64> %mul +} + declare <16 x i8> @llvm.aarch64.neon.vld1.v16i8(i8*, i32) nounwind readonly declare void @llvm.aarch64.neon.vst1.v8i16(i8*, <8 x i16>, i32) nounwind -- 2.7.4