class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>;
class UnOpFrag<dag res> : PatFrag<(ops node:$LHS), res>;
-// Helper fragment for an extract of the high portion of a 128-bit vector.
+// Helper fragment for an extract of the high portion of a 128-bit vector. The
+// ComplexPattern match both extract_subvector and bitcast(extract_subvector(..)).
def extract_high_v16i8 :
- UnOpFrag<(extract_subvector (v16i8 node:$LHS), (i64 8))>;
+ ComplexPattern<v8i8, 1, "SelectExtractHigh", [extract_subvector, bitconvert]>;
def extract_high_v8i16 :
- UnOpFrag<(extract_subvector (v8i16 node:$LHS), (i64 4))>;
+ ComplexPattern<v4i16, 1, "SelectExtractHigh", [extract_subvector, bitconvert]>;
def extract_high_v4i32 :
- UnOpFrag<(extract_subvector (v4i32 node:$LHS), (i64 2))>;
-def extract_high_v2i64 :
- UnOpFrag<(extract_subvector (v2i64 node:$LHS), (i64 1))>;
+ ComplexPattern<v2i32, 1, "SelectExtractHigh", [extract_subvector, bitconvert]>;
+
+def extract_high_dup_v8i16 :
+ BinOpFrag<(extract_subvector (v8i16 (AArch64duplane16 (v8i16 node:$LHS), node:$RHS)), (i64 4))>;
+def extract_high_dup_v4i32 :
+ BinOpFrag<(extract_subvector (v4i32 (AArch64duplane32 (v4i32 node:$LHS), node:$RHS)), (i64 2))>;
//===----------------------------------------------------------------------===//
// Asm Operand Classes.
asm#"2", ".1q", ".2d", ".2d", []>;
}
- def : Pat<(v8i16 (IntOp (v8i8 (extract_high_v16i8 V128:$Rn)),
- (v8i8 (extract_high_v16i8 V128:$Rm)))),
+ def : Pat<(v8i16 (IntOp (v8i8 (extract_high_v16i8 (v16i8 V128:$Rn))),
+ (v8i8 (extract_high_v16i8 (v16i8 V128:$Rm))))),
(!cast<Instruction>(NAME#"v16i8") V128:$Rn, V128:$Rm)>;
}
def v8i16_v4i32 : BaseSIMDDifferentThreeVector<U, 0b011, opc,
V128, V128, V128,
asm#"2", ".4s", ".8h", ".8h",
- [(set (v4i32 V128:$Rd), (OpNode (extract_high_v8i16 V128:$Rn),
- (extract_high_v8i16 V128:$Rm)))]>;
+ [(set (v4i32 V128:$Rd), (OpNode (extract_high_v8i16 (v8i16 V128:$Rn)),
+ (extract_high_v8i16 (v8i16 V128:$Rm))))]>;
def v2i32_v2i64 : BaseSIMDDifferentThreeVector<U, 0b100, opc,
V128, V64, V64,
asm, ".2d", ".2s", ".2s",
def v4i32_v2i64 : BaseSIMDDifferentThreeVector<U, 0b101, opc,
V128, V128, V128,
asm#"2", ".2d", ".4s", ".4s",
- [(set (v2i64 V128:$Rd), (OpNode (extract_high_v4i32 V128:$Rn),
- (extract_high_v4i32 V128:$Rm)))]>;
+ [(set (v2i64 V128:$Rd), (OpNode (extract_high_v4i32 (v4i32 V128:$Rn)),
+ (extract_high_v4i32 (v4i32 V128:$Rm))))]>;
}
multiclass SIMDLongThreeVectorBHSabdl<bit U, bits<4> opc, string asm,
V128, V128, V128,
asm#"2", ".8h", ".16b", ".16b",
[(set (v8i16 V128:$Rd),
- (zext (v8i8 (OpNode (extract_high_v16i8 V128:$Rn),
- (extract_high_v16i8 V128:$Rm)))))]>;
+ (zext (v8i8 (OpNode (extract_high_v16i8 (v16i8 V128:$Rn)),
+ (extract_high_v16i8 (v16i8 V128:$Rm))))))]>;
def v4i16_v4i32 : BaseSIMDDifferentThreeVector<U, 0b010, opc,
V128, V64, V64,
asm, ".4s", ".4h", ".4h",
V128, V128, V128,
asm#"2", ".4s", ".8h", ".8h",
[(set (v4i32 V128:$Rd),
- (zext (v4i16 (OpNode (extract_high_v8i16 V128:$Rn),
- (extract_high_v8i16 V128:$Rm)))))]>;
+ (zext (v4i16 (OpNode (extract_high_v8i16 (v8i16 V128:$Rn)),
+ (extract_high_v8i16 (v8i16 V128:$Rm))))))]>;
def v2i32_v2i64 : BaseSIMDDifferentThreeVector<U, 0b100, opc,
V128, V64, V64,
asm, ".2d", ".2s", ".2s",
V128, V128, V128,
asm#"2", ".2d", ".4s", ".4s",
[(set (v2i64 V128:$Rd),
- (zext (v2i32 (OpNode (extract_high_v4i32 V128:$Rn),
- (extract_high_v4i32 V128:$Rm)))))]>;
+ (zext (v2i32 (OpNode (extract_high_v4i32 (v4i32 V128:$Rn)),
+ (extract_high_v4i32 (v4i32 V128:$Rm))))))]>;
}
multiclass SIMDLongThreeVectorTiedBHSabal<bit U, bits<4> opc,
asm#"2", ".8h", ".16b", ".16b",
[(set (v8i16 V128:$dst),
(add (v8i16 V128:$Rd),
- (zext (v8i8 (OpNode (extract_high_v16i8 V128:$Rn),
- (extract_high_v16i8 V128:$Rm))))))]>;
+ (zext (v8i8 (OpNode (extract_high_v16i8 (v16i8 V128:$Rn)),
+ (extract_high_v16i8 (v16i8 V128:$Rm)))))))]>;
def v4i16_v4i32 : BaseSIMDDifferentThreeVectorTied<U, 0b010, opc,
V128, V64, V64,
asm, ".4s", ".4h", ".4h",
asm#"2", ".4s", ".8h", ".8h",
[(set (v4i32 V128:$dst),
(add (v4i32 V128:$Rd),
- (zext (v4i16 (OpNode (extract_high_v8i16 V128:$Rn),
- (extract_high_v8i16 V128:$Rm))))))]>;
+ (zext (v4i16 (OpNode (extract_high_v8i16 (v8i16 V128:$Rn)),
+ (extract_high_v8i16 (v8i16 V128:$Rm)))))))]>;
def v2i32_v2i64 : BaseSIMDDifferentThreeVectorTied<U, 0b100, opc,
V128, V64, V64,
asm, ".2d", ".2s", ".2s",
asm#"2", ".2d", ".4s", ".4s",
[(set (v2i64 V128:$dst),
(add (v2i64 V128:$Rd),
- (zext (v2i32 (OpNode (extract_high_v4i32 V128:$Rn),
- (extract_high_v4i32 V128:$Rm))))))]>;
+ (zext (v2i32 (OpNode (extract_high_v4i32 (v4i32 V128:$Rn)),
+ (extract_high_v4i32 (v4i32 V128:$Rm)))))))]>;
}
multiclass SIMDLongThreeVectorBHS<bit U, bits<4> opc, string asm,
def v16i8_v8i16 : BaseSIMDDifferentThreeVector<U, 0b001, opc,
V128, V128, V128,
asm#"2", ".8h", ".16b", ".16b",
- [(set (v8i16 V128:$Rd), (OpNode (extract_high_v16i8 V128:$Rn),
- (extract_high_v16i8 V128:$Rm)))]>;
+ [(set (v8i16 V128:$Rd), (OpNode (extract_high_v16i8 (v16i8 V128:$Rn)),
+ (extract_high_v16i8 (v16i8 V128:$Rm))))]>;
def v4i16_v4i32 : BaseSIMDDifferentThreeVector<U, 0b010, opc,
V128, V64, V64,
asm, ".4s", ".4h", ".4h",
def v8i16_v4i32 : BaseSIMDDifferentThreeVector<U, 0b011, opc,
V128, V128, V128,
asm#"2", ".4s", ".8h", ".8h",
- [(set (v4i32 V128:$Rd), (OpNode (extract_high_v8i16 V128:$Rn),
- (extract_high_v8i16 V128:$Rm)))]>;
+ [(set (v4i32 V128:$Rd), (OpNode (extract_high_v8i16 (v8i16 V128:$Rn)),
+ (extract_high_v8i16 (v8i16 V128:$Rm))))]>;
def v2i32_v2i64 : BaseSIMDDifferentThreeVector<U, 0b100, opc,
V128, V64, V64,
asm, ".2d", ".2s", ".2s",
def v4i32_v2i64 : BaseSIMDDifferentThreeVector<U, 0b101, opc,
V128, V128, V128,
asm#"2", ".2d", ".4s", ".4s",
- [(set (v2i64 V128:$Rd), (OpNode (extract_high_v4i32 V128:$Rn),
- (extract_high_v4i32 V128:$Rm)))]>;
+ [(set (v2i64 V128:$Rd), (OpNode (extract_high_v4i32 (v4i32 V128:$Rn)),
+ (extract_high_v4i32 (v4i32 V128:$Rm))))]>;
}
multiclass SIMDLongThreeVectorTiedBHS<bit U, bits<4> opc,
asm#"2", ".8h", ".16b", ".16b",
[(set (v8i16 V128:$dst),
(OpNode (v8i16 V128:$Rd),
- (extract_high_v16i8 V128:$Rn),
- (extract_high_v16i8 V128:$Rm)))]>;
+ (extract_high_v16i8 (v16i8 V128:$Rn)),
+ (extract_high_v16i8 (v16i8 V128:$Rm))))]>;
def v4i16_v4i32 : BaseSIMDDifferentThreeVectorTied<U, 0b010, opc,
V128, V64, V64,
asm, ".4s", ".4h", ".4h",
asm#"2", ".4s", ".8h", ".8h",
[(set (v4i32 V128:$dst),
(OpNode (v4i32 V128:$Rd),
- (extract_high_v8i16 V128:$Rn),
- (extract_high_v8i16 V128:$Rm)))]>;
+ (extract_high_v8i16 (v8i16 V128:$Rn)),
+ (extract_high_v8i16 (v8i16 V128:$Rm))))]>;
def v2i32_v2i64 : BaseSIMDDifferentThreeVectorTied<U, 0b100, opc,
V128, V64, V64,
asm, ".2d", ".2s", ".2s",
asm#"2", ".2d", ".4s", ".4s",
[(set (v2i64 V128:$dst),
(OpNode (v2i64 V128:$Rd),
- (extract_high_v4i32 V128:$Rn),
- (extract_high_v4i32 V128:$Rm)))]>;
+ (extract_high_v4i32 (v4i32 V128:$Rn)),
+ (extract_high_v4i32 (v4i32 V128:$Rm))))]>;
}
multiclass SIMDLongThreeVectorSQDMLXTiedHS<bit U, bits<4> opc, string asm,
asm#"2", ".4s", ".8h", ".8h",
[(set (v4i32 V128:$dst),
(Accum (v4i32 V128:$Rd),
- (v4i32 (int_aarch64_neon_sqdmull (extract_high_v8i16 V128:$Rn),
- (extract_high_v8i16 V128:$Rm)))))]>;
+ (v4i32 (int_aarch64_neon_sqdmull (extract_high_v8i16 (v8i16 V128:$Rn)),
+ (extract_high_v8i16 (v8i16 V128:$Rm))))))]>;
def v2i32_v2i64 : BaseSIMDDifferentThreeVectorTied<U, 0b100, opc,
V128, V64, V64,
asm, ".2d", ".2s", ".2s",
asm#"2", ".2d", ".4s", ".4s",
[(set (v2i64 V128:$dst),
(Accum (v2i64 V128:$Rd),
- (v2i64 (int_aarch64_neon_sqdmull (extract_high_v4i32 V128:$Rn),
- (extract_high_v4i32 V128:$Rm)))))]>;
+ (v2i64 (int_aarch64_neon_sqdmull (extract_high_v4i32 (v4i32 V128:$Rn)),
+ (extract_high_v4i32 (v4i32 V128:$Rm))))))]>;
}
multiclass SIMDWideThreeVectorBHS<bit U, bits<4> opc, string asm,
V128, V128, V128,
asm#"2", ".8h", ".8h", ".16b",
[(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn),
- (extract_high_v16i8 V128:$Rm)))]>;
+ (extract_high_v16i8 (v16i8 V128:$Rm))))]>;
def v4i16_v4i32 : BaseSIMDDifferentThreeVector<U, 0b010, opc,
V128, V128, V64,
asm, ".4s", ".4s", ".4h",
V128, V128, V128,
asm#"2", ".4s", ".4s", ".8h",
[(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn),
- (extract_high_v8i16 V128:$Rm)))]>;
+ (extract_high_v8i16 (v8i16 V128:$Rm))))]>;
def v2i32_v2i64 : BaseSIMDDifferentThreeVector<U, 0b100, opc,
V128, V128, V64,
asm, ".2d", ".2d", ".2s",
V128, V128, V128,
asm#"2", ".2d", ".2d", ".4s",
[(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn),
- (extract_high_v4i32 V128:$Rm)))]>;
+ (extract_high_v4i32 (v4i32 V128:$Rm))))]>;
}
//----------------------------------------------------------------------------
V128_lo, VectorIndexH,
asm#"2", ".4s", ".4s", ".8h", ".h",
[(set (v4i32 V128:$Rd),
- (OpNode (extract_high_v8i16 V128:$Rn),
- (extract_high_v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm),
- VectorIndexH:$idx))))]> {
+ (OpNode (extract_high_v8i16 (v8i16 V128:$Rn)),
+ (extract_high_dup_v8i16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx)))]> {
bits<3> idx;
let Inst{11} = idx{2};
V128, VectorIndexS,
asm#"2", ".2d", ".2d", ".4s", ".s",
[(set (v2i64 V128:$Rd),
- (OpNode (extract_high_v4i32 V128:$Rn),
- (extract_high_v4i32 (AArch64duplane32 (v4i32 V128:$Rm),
- VectorIndexS:$idx))))]> {
+ (OpNode (extract_high_v4i32 (v4i32 V128:$Rn)),
+ (extract_high_dup_v4i32 (v4i32 V128:$Rm), VectorIndexS:$idx)))]> {
bits<2> idx;
let Inst{11} = idx{1};
let Inst{21} = idx{0};
[(set (v4i32 V128:$dst),
(Accum (v4i32 V128:$Rd),
(v4i32 (int_aarch64_neon_sqdmull
- (extract_high_v8i16 V128:$Rn),
- (extract_high_v8i16
- (AArch64duplane16 (v8i16 V128_lo:$Rm),
- VectorIndexH:$idx))))))]> {
+ (extract_high_v8i16 (v8i16 V128:$Rn)),
+ (extract_high_dup_v8i16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx)))))]> {
bits<3> idx;
let Inst{11} = idx{2};
let Inst{21} = idx{1};
[(set (v2i64 V128:$dst),
(Accum (v2i64 V128:$Rd),
(v2i64 (int_aarch64_neon_sqdmull
- (extract_high_v4i32 V128:$Rn),
- (extract_high_v4i32
- (AArch64duplane32 (v4i32 V128:$Rm),
- VectorIndexS:$idx))))))]> {
+ (extract_high_v4i32 (v4i32 V128:$Rn)),
+ (extract_high_dup_v4i32 (v4i32 V128:$Rm), VectorIndexS:$idx)))))]> {
bits<2> idx;
let Inst{11} = idx{1};
let Inst{21} = idx{0};
V128_lo, VectorIndexH,
asm#"2", ".4s", ".4s", ".8h", ".h",
[(set (v4i32 V128:$Rd),
- (OpNode (extract_high_v8i16 V128:$Rn),
- (extract_high_v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm),
- VectorIndexH:$idx))))]> {
+ (OpNode (extract_high_v8i16 (v8i16 V128:$Rn)),
+ (extract_high_dup_v8i16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx)))]> {
bits<3> idx;
let Inst{11} = idx{2};
V128, VectorIndexS,
asm#"2", ".2d", ".2d", ".4s", ".s",
[(set (v2i64 V128:$Rd),
- (OpNode (extract_high_v4i32 V128:$Rn),
- (extract_high_v4i32 (AArch64duplane32 (v4i32 V128:$Rm),
- VectorIndexS:$idx))))]> {
+ (OpNode (extract_high_v4i32 (v4i32 V128:$Rn)),
+ (extract_high_dup_v4i32 (v4i32 V128:$Rm), VectorIndexS:$idx)))]> {
bits<2> idx;
let Inst{11} = idx{1};
let Inst{21} = idx{0};
asm#"2", ".4s", ".4s", ".8h", ".h",
[(set (v4i32 V128:$dst),
(OpNode (v4i32 V128:$Rd),
- (extract_high_v8i16 V128:$Rn),
- (extract_high_v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm),
- VectorIndexH:$idx))))]> {
+ (extract_high_v8i16 (v8i16 V128:$Rn)),
+ (extract_high_dup_v8i16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx)))]> {
bits<3> idx;
let Inst{11} = idx{2};
let Inst{21} = idx{1};
asm#"2", ".2d", ".2d", ".4s", ".s",
[(set (v2i64 V128:$dst),
(OpNode (v2i64 V128:$Rd),
- (extract_high_v4i32 V128:$Rn),
- (extract_high_v4i32 (AArch64duplane32 (v4i32 V128:$Rm),
- VectorIndexS:$idx))))]> {
+ (extract_high_v4i32 (v4i32 V128:$Rn)),
+ (extract_high_dup_v4i32 (v4i32 V128:$Rm), VectorIndexS:$idx)))]> {
bits<2> idx;
let Inst{11} = idx{1};
let Inst{21} = idx{0};
V128, V128, vecshiftL8,
asm#"2", ".8h", ".16b",
[(set (v8i16 V128:$Rd),
- (OpNode (extract_high_v16i8 V128:$Rn), vecshiftL8:$imm))]> {
+ (OpNode (extract_high_v16i8 (v16i8 V128:$Rn)), vecshiftL8:$imm))]> {
bits<3> imm;
let Inst{18-16} = imm;
}
V128, V128, vecshiftL16,
asm#"2", ".4s", ".8h",
[(set (v4i32 V128:$Rd),
- (OpNode (extract_high_v8i16 V128:$Rn), vecshiftL16:$imm))]> {
+ (OpNode (extract_high_v8i16 (v8i16 V128:$Rn)), vecshiftL16:$imm))]> {
bits<4> imm;
let Inst{19-16} = imm;
V128, V128, vecshiftL32,
asm#"2", ".2d", ".4s",
[(set (v2i64 V128:$Rd),
- (OpNode (extract_high_v4i32 V128:$Rn), vecshiftL32:$imm))]> {
+ (OpNode (extract_high_v4i32 (v4i32 V128:$Rn)), vecshiftL32:$imm))]> {
bits<5> imm;
let Inst{20-16} = imm;
}
define <4 x i32> @test_smull_high_s16_bitcasta1(<2 x i64> %aa, <8 x i16> %b) #0 {
; CHECK-LE-LABEL: test_smull_high_s16_bitcasta1:
; CHECK-LE: // %bb.0: // %entry
-; CHECK-LE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-LE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
-; CHECK-LE-NEXT: smull v0.4s, v0.4h, v1.4h
+; CHECK-LE-NEXT: smull2 v0.4s, v0.8h, v1.8h
; CHECK-LE-NEXT: ret
;
; CHECK-BE-LABEL: test_smull_high_s16_bitcasta1:
define <4 x i32> @test_smull_high_s16_bitcastb1(<8 x i16> %a, <16 x i8> %bb) #0 {
; CHECK-LE-LABEL: test_smull_high_s16_bitcastb1:
; CHECK-LE: // %bb.0: // %entry
-; CHECK-LE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-LE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
-; CHECK-LE-NEXT: smull v0.4s, v0.4h, v1.4h
+; CHECK-LE-NEXT: smull2 v0.4s, v0.8h, v1.8h
; CHECK-LE-NEXT: ret
;
; CHECK-BE-LABEL: test_smull_high_s16_bitcastb1:
define <4 x i32> @test_smull_high_s16_bitcasta2(<2 x i64> %a, <8 x i16> %b) #0 {
; CHECK-LE-LABEL: test_smull_high_s16_bitcasta2:
; CHECK-LE: // %bb.0: // %entry
-; CHECK-LE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-LE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
-; CHECK-LE-NEXT: smull v0.4s, v0.4h, v1.4h
+; CHECK-LE-NEXT: smull2 v0.4s, v0.8h, v1.8h
; CHECK-LE-NEXT: ret
;
; CHECK-BE-LABEL: test_smull_high_s16_bitcasta2:
define <4 x i32> @test_smull_high_s16_bitcastb2(<8 x i16> %a, <16 x i8> %b) #0 {
; CHECK-LE-LABEL: test_smull_high_s16_bitcastb2:
; CHECK-LE: // %bb.0: // %entry
-; CHECK-LE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-LE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
-; CHECK-LE-NEXT: smull v0.4s, v0.4h, v1.4h
+; CHECK-LE-NEXT: smull2 v0.4s, v0.8h, v1.8h
; CHECK-LE-NEXT: ret
;
; CHECK-BE-LABEL: test_smull_high_s16_bitcastb2:
define <4 x i32> @test_umull_high_s16_bitcasta1(<2 x i64> %aa, <8 x i16> %b) #0 {
; CHECK-LE-LABEL: test_umull_high_s16_bitcasta1:
; CHECK-LE: // %bb.0: // %entry
-; CHECK-LE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-LE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
-; CHECK-LE-NEXT: umull v0.4s, v0.4h, v1.4h
+; CHECK-LE-NEXT: umull2 v0.4s, v0.8h, v1.8h
; CHECK-LE-NEXT: ret
;
; CHECK-BE-LABEL: test_umull_high_s16_bitcasta1:
define <8 x i16> @test_vabdl_high_u82(<16 x i8> %a, <8 x i16> %bb) {
; CHECK-LE-LABEL: test_vabdl_high_u82:
; CHECK-LE: // %bb.0: // %entry
-; CHECK-LE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-LE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
-; CHECK-LE-NEXT: uabdl v0.8h, v0.8b, v1.8b
+; CHECK-LE-NEXT: uabdl2 v0.8h, v0.16b, v1.16b
; CHECK-LE-NEXT: ret
;
; CHECK-BE-LABEL: test_vabdl_high_u82:
define <8 x i16> @test_vabdl_high_s82(<16 x i8> %a, <8 x i16> %bb) {
; CHECK-LE-LABEL: test_vabdl_high_s82:
; CHECK-LE: // %bb.0: // %entry
-; CHECK-LE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-LE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
-; CHECK-LE-NEXT: sabdl v0.8h, v0.8b, v1.8b
+; CHECK-LE-NEXT: sabdl2 v0.8h, v0.16b, v1.16b
; CHECK-LE-NEXT: ret
;
; CHECK-BE-LABEL: test_vabdl_high_s82:
define <4 x i32> @test_vqdmlal_high_s16_bitcast(<4 x i32> %a, <8 x i16> %b, <16 x i8> %cc) {
; CHECK-LE-LABEL: test_vqdmlal_high_s16_bitcast:
; CHECK-LE: // %bb.0: // %entry
-; CHECK-LE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
-; CHECK-LE-NEXT: ext v2.16b, v2.16b, v2.16b, #8
-; CHECK-LE-NEXT: sqdmlal v0.4s, v1.4h, v2.4h
+; CHECK-LE-NEXT: sqdmlal2 v0.4s, v1.8h, v2.8h
; CHECK-LE-NEXT: ret
;
; CHECK-BE-LABEL: test_vqdmlal_high_s16_bitcast:
define <8 x i16> @test_pmull_high_p8_64(<2 x i64> %aa, <2 x i64> %bb) {
; CHECK-LE-LABEL: test_pmull_high_p8_64:
; CHECK-LE: // %bb.0: // %entry
-; CHECK-LE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-LE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
-; CHECK-LE-NEXT: pmull v0.8h, v0.8b, v1.8b
+; CHECK-LE-NEXT: pmull2 v0.8h, v0.16b, v1.16b
; CHECK-LE-NEXT: ret
;
; CHECK-BE-LABEL: test_pmull_high_p8_64:
define <2 x i64> @hadd32_zext_asr(<16 x i8> %src1a) {
; CHECK-LE-LABEL: hadd32_zext_asr:
; CHECK-LE: // %bb.0:
-; CHECK-LE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-LE-NEXT: ushll v0.2d, v0.2s, #1
+; CHECK-LE-NEXT: ushll2 v0.2d, v0.4s, #1
; CHECK-LE-NEXT: ret
;
; CHECK-BE-LABEL: hadd32_zext_asr: