From: Paul Walker Date: Mon, 16 Jan 2023 01:37:50 +0000 (+0000) Subject: [SVE][CodeGen] Relax all true isel requirement for predicated operations that have... X-Git-Tag: upstream/17.0.6~18597 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=4b051b4248bb6f9971dd1cf87fe311ebe9be917e;p=platform%2Fupstream%2Fllvm.git [SVE][CodeGen] Relax all true isel requirement for predicated operations that have no side effects. We have isel patterns to allow predicated operations to emit unpredicated instructions when the predicate is all true. However, the predicated operations named #_PRED have no requirement for the result of the inactive lanes and so when those operations have no side effects, floating point exceptions for example, we can also safely emit unpredicated instructions. Doing this allows better register allocation, instruction scheduling and also enables more usage of instructions that take immediate operands. NOTE: This patch does not convert all possible instances but instead focuses on the cases that are testable once D141937 lands. Depends on D141937 Differential Revision: https://reviews.llvm.org/D141938 --- diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 08c3f47..93df312 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -464,6 +464,7 @@ private: SDValue &Offset, unsigned Scale = 1); bool SelectAllActivePredicate(SDValue N); + bool SelectAnyPredicate(SDValue N); }; } // end anonymous namespace @@ -6480,6 +6481,11 @@ bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) { return TLI->isAllActivePredicate(*CurDAG, N); } +bool AArch64DAGToDAGISel::SelectAnyPredicate(SDValue N) { + EVT VT = N.getValueType(); + return VT.isScalableVector() && VT.getVectorElementType() == MVT::i1; +} + bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned MaxSize, SDValue &Base, SDValue &Offset, unsigned Scale) { diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 275fdee..079365d 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -285,11 +285,11 @@ def AArch64uaba : PatFrags<(ops node:$op1, node:$op2, node:$op3), def AArch64usra : PatFrags<(ops node:$op1, node:$op2, node:$op3), [(int_aarch64_sve_usra node:$op1, node:$op2, node:$op3), - (add node:$op1, (AArch64lsr_p (SVEAllActive), node:$op2, (SVEShiftSplatImmR (i32 node:$op3))))]>; + (add node:$op1, (AArch64lsr_p (SVEAnyPredicate), node:$op2, (SVEShiftSplatImmR (i32 node:$op3))))]>; def AArch64ssra : PatFrags<(ops node:$op1, node:$op2, node:$op3), [(int_aarch64_sve_ssra node:$op1, node:$op2, node:$op3), - (add node:$op1, (AArch64asr_p (SVEAllActive), node:$op2, (SVEShiftSplatImmR (i32 node:$op3))))]>; + (add node:$op1, (AArch64asr_p (SVEAnyPredicate), node:$op2, (SVEShiftSplatImmR (i32 node:$op3))))]>; def SDT_AArch64FCVT : SDTypeProfile<1, 3, [ SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>, diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index cb2a52a..f45ff71 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -280,6 +280,7 @@ def SVEShiftImmR64 : ComplexPattern", [] def SVEShiftSplatImmR : ComplexPattern; def SVEAllActive : ComplexPattern; +def SVEAnyPredicate : ComplexPattern; class SVEExactFPImm : AsmOperandClass { let Name = "SVEExactFPImmOperand" # Suffix; @@ -434,9 +435,11 @@ class SVE_1_Op_Imm_OptLsl_Pat; -class SVE_1_Op_Imm_Arith_All_Active - : Pat<(vt (op (pt (SVEAllActive)), (vt zprty:$Op1), (vt (splat_vector (it (cpx i32:$imm)))))), +class SVE_1_Op_Imm_Arith_Any_Predicate + : Pat<(vt (op (pt (SVEAnyPredicate)), (vt zprty:$Op1), (vt (splat_vector (it (cpx i32:$imm)))))), (inst $Op1, i32:$imm)>; class SVE_1_Op_Imm_Log_Pat; +class SVE_2_Op_Pred_Any_Predicate +: Pat<(vtd (op (pt (SVEAnyPredicate)), vt1:$Op1, vt2:$Op2)), + (inst $Op1, $Op2)>; + class SVE_3_Op_Pat : Pat<(vtd (op vt1:$Op1, vt2:$Op2, vt3:$Op3)), @@ -546,10 +555,10 @@ class SVE_Shift_DupImm_Pred_Pat; -class SVE_Shift_DupImm_All_Active_Pat -: Pat<(vt (op (pt (SVEAllActive)), vt:$Rn, (vt (splat_vector (it (cast i32:$imm)))))), +class SVE_Shift_DupImm_Any_Predicate_Pat +: Pat<(vt (op (pt (SVEAnyPredicate)), vt:$Rn, (vt (splat_vector (it (cast i32:$imm)))))), (inst $Rn, i32:$imm)>; class SVE_2_Op_Fp_Imm_Pat opc, string asm, SDPatternOperator op, def : SVE_2_Op_Pat(NAME # _S)>; def : SVE_2_Op_Pat(NAME # _D)>; - def : SVE_2_Op_Pred_All_Active(NAME # _B)>; - def : SVE_2_Op_Pred_All_Active(NAME # _H)>; - def : SVE_2_Op_Pred_All_Active(NAME # _S)>; - def : SVE_2_Op_Pred_All_Active(NAME # _D)>; + def : SVE_2_Op_Pred_Any_Predicate(NAME # _B)>; + def : SVE_2_Op_Pred_Any_Predicate(NAME # _H)>; + def : SVE_2_Op_Pred_Any_Predicate(NAME # _S)>; + def : SVE_2_Op_Pred_Any_Predicate(NAME # _D)>; } multiclass sve2_int_mul_single opc, string asm, SDPatternOperator op> { @@ -4645,10 +4654,10 @@ multiclass sve_int_arith_imm1 opc, string asm, SDPatternOperator op> { def _S : sve_int_arith_imm<0b10, { 0b1010, opc }, asm, ZPR32, simm8_32b>; def _D : sve_int_arith_imm<0b11, { 0b1010, opc }, asm, ZPR64, simm8_32b>; - def : SVE_1_Op_Imm_Arith_All_Active(NAME # _B)>; - def : SVE_1_Op_Imm_Arith_All_Active(NAME # _H)>; - def : SVE_1_Op_Imm_Arith_All_Active(NAME # _S)>; - def : SVE_1_Op_Imm_Arith_All_Active(NAME # _D)>; + def : SVE_1_Op_Imm_Arith_Any_Predicate(NAME # _B)>; + def : SVE_1_Op_Imm_Arith_Any_Predicate(NAME # _H)>; + def : SVE_1_Op_Imm_Arith_Any_Predicate(NAME # _S)>; + def : SVE_1_Op_Imm_Arith_Any_Predicate(NAME # _D)>; } multiclass sve_int_arith_imm1_unsigned opc, string asm, SDPatternOperator op> { @@ -4657,10 +4666,10 @@ multiclass sve_int_arith_imm1_unsigned opc, string asm, SDPatternOperato def _S : sve_int_arith_imm<0b10, { 0b1010, opc }, asm, ZPR32, imm0_255>; def _D : sve_int_arith_imm<0b11, { 0b1010, opc }, asm, ZPR64, imm0_255>; - def : SVE_1_Op_Imm_Arith_All_Active(NAME # _B)>; - def : SVE_1_Op_Imm_Arith_All_Active(NAME # _H)>; - def : SVE_1_Op_Imm_Arith_All_Active(NAME # _S)>; - def : SVE_1_Op_Imm_Arith_All_Active(NAME # _D)>; + def : SVE_1_Op_Imm_Arith_Any_Predicate(NAME # _B)>; + def : SVE_1_Op_Imm_Arith_Any_Predicate(NAME # _H)>; + def : SVE_1_Op_Imm_Arith_Any_Predicate(NAME # _S)>; + def : SVE_1_Op_Imm_Arith_Any_Predicate(NAME # _D)>; } multiclass sve_int_arith_imm2 { @@ -4669,10 +4678,10 @@ multiclass sve_int_arith_imm2 { def _S : sve_int_arith_imm<0b10, 0b110000, asm, ZPR32, simm8_32b>; def _D : sve_int_arith_imm<0b11, 0b110000, asm, ZPR64, simm8_32b>; - def : SVE_1_Op_Imm_Arith_All_Active(NAME # _B)>; - def : SVE_1_Op_Imm_Arith_All_Active(NAME # _H)>; - def : SVE_1_Op_Imm_Arith_All_Active(NAME # _S)>; - def : SVE_1_Op_Imm_Arith_All_Active(NAME # _D)>; + def : SVE_1_Op_Imm_Arith_Any_Predicate(NAME # _B)>; + def : SVE_1_Op_Imm_Arith_Any_Predicate(NAME # _H)>; + def : SVE_1_Op_Imm_Arith_Any_Predicate(NAME # _S)>; + def : SVE_1_Op_Imm_Arith_Any_Predicate(NAME # _D)>; } //===----------------------------------------------------------------------===// @@ -5923,10 +5932,10 @@ multiclass sve_int_bin_cons_shift_imm_left opc, string asm, let Inst{20-19} = imm{4-3}; } - def : SVE_Shift_DupImm_All_Active_Pat(NAME # _B)>; - def : SVE_Shift_DupImm_All_Active_Pat(NAME # _H)>; - def : SVE_Shift_DupImm_All_Active_Pat(NAME # _S)>; - def : SVE_Shift_DupImm_All_Active_Pat(NAME # _D)>; + def : SVE_Shift_DupImm_Any_Predicate_Pat(NAME # _B)>; + def : SVE_Shift_DupImm_Any_Predicate_Pat(NAME # _H)>; + def : SVE_Shift_DupImm_Any_Predicate_Pat(NAME # _S)>; + def : SVE_Shift_DupImm_Any_Predicate_Pat(NAME # _D)>; } multiclass sve_int_bin_cons_shift_imm_right opc, string asm, @@ -5943,10 +5952,10 @@ multiclass sve_int_bin_cons_shift_imm_right opc, string asm, let Inst{20-19} = imm{4-3}; } - def : SVE_Shift_DupImm_All_Active_Pat(NAME # _B)>; - def : SVE_Shift_DupImm_All_Active_Pat(NAME # _H)>; - def : SVE_Shift_DupImm_All_Active_Pat(NAME # _S)>; - def : SVE_Shift_DupImm_All_Active_Pat(NAME # _D)>; + def : SVE_Shift_DupImm_Any_Predicate_Pat(NAME # _B)>; + def : SVE_Shift_DupImm_Any_Predicate_Pat(NAME # _H)>; + def : SVE_Shift_DupImm_Any_Predicate_Pat(NAME # _S)>; + def : SVE_Shift_DupImm_Any_Predicate_Pat(NAME # _D)>; } //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-extends.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-extends.ll index 8521da6..76b5df7 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-extends.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-extends.ll @@ -19,8 +19,8 @@ define void @sext_v8i1_v8i32(<8 x i1> %a, ptr %out) vscale_range(2,0) #0 { ; CHECK-NEXT: ptrue p0.s, vl8 ; CHECK-NEXT: uunpklo z0.h, z0.b ; CHECK-NEXT: uunpklo z0.s, z0.h -; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #31 -; CHECK-NEXT: asr z0.s, p0/m, z0.s, #31 +; CHECK-NEXT: lsl z0.s, z0.s, #31 +; CHECK-NEXT: asr z0.s, z0.s, #31 ; CHECK-NEXT: st1w { z0.s }, p0, [x0] ; CHECK-NEXT: ret %b = sext <8 x i1> %a to <8 x i32> @@ -42,8 +42,8 @@ define void @sext_v4i3_v4i64(<4 x i3> %a, ptr %out) vscale_range(2,0) #0 { ; CHECK-NEXT: ptrue p0.d, vl4 ; CHECK-NEXT: uunpklo z0.s, z0.h ; CHECK-NEXT: uunpklo z0.d, z0.s -; CHECK-NEXT: lsl z0.d, p0/m, z0.d, #61 -; CHECK-NEXT: asr z0.d, p0/m, z0.d, #61 +; CHECK-NEXT: lsl z0.d, z0.d, #61 +; CHECK-NEXT: asr z0.d, z0.d, #61 ; CHECK-NEXT: st1d { z0.d }, p0, [x0] ; CHECK-NEXT: ret %b = sext <4 x i3> %a to <4 x i64> diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-immediates.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-immediates.ll index 3ff9c90..53b56d6 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-immediates.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-immediates.ll @@ -152,7 +152,7 @@ define void @ashr_v64i8(ptr %a) #0 { ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.b, vl64 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] -; CHECK-NEXT: asr z0.b, p0/m, z0.b, #7 +; CHECK-NEXT: asr z0.b, z0.b, #7 ; CHECK-NEXT: st1b { z0.b }, p0, [x0] ; CHECK-NEXT: ret %op1 = load <64 x i8>, ptr %a @@ -168,7 +168,7 @@ define void @ashr_v32i16(ptr %a) #0 { ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h, vl32 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] -; CHECK-NEXT: asr z0.h, p0/m, z0.h, #15 +; CHECK-NEXT: asr z0.h, z0.h, #15 ; CHECK-NEXT: st1h { z0.h }, p0, [x0] ; CHECK-NEXT: ret %op1 = load <32 x i16>, ptr %a @@ -184,7 +184,7 @@ define void @ashr_v16i32(ptr %a) #0 { ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s, vl16 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] -; CHECK-NEXT: asr z0.s, p0/m, z0.s, #31 +; CHECK-NEXT: asr z0.s, z0.s, #31 ; CHECK-NEXT: st1w { z0.s }, p0, [x0] ; CHECK-NEXT: ret %op1 = load <16 x i32>, ptr %a @@ -200,7 +200,7 @@ define void @ashr_v8i64(ptr %a) #0 { ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d, vl8 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] -; CHECK-NEXT: asr z0.d, p0/m, z0.d, #63 +; CHECK-NEXT: asr z0.d, z0.d, #63 ; CHECK-NEXT: st1d { z0.d }, p0, [x0] ; CHECK-NEXT: ret %op1 = load <8 x i64>, ptr %a @@ -296,7 +296,7 @@ define void @lshr_v64i8(ptr %a) #0 { ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.b, vl64 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] -; CHECK-NEXT: lsr z0.b, p0/m, z0.b, #7 +; CHECK-NEXT: lsr z0.b, z0.b, #7 ; CHECK-NEXT: st1b { z0.b }, p0, [x0] ; CHECK-NEXT: ret %op1 = load <64 x i8>, ptr %a @@ -312,7 +312,7 @@ define void @lshr_v32i16(ptr %a) #0 { ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h, vl32 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] -; CHECK-NEXT: lsr z0.h, p0/m, z0.h, #15 +; CHECK-NEXT: lsr z0.h, z0.h, #15 ; CHECK-NEXT: st1h { z0.h }, p0, [x0] ; CHECK-NEXT: ret %op1 = load <32 x i16>, ptr %a @@ -328,7 +328,7 @@ define void @lshr_v16i32(ptr %a) #0 { ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s, vl16 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] -; CHECK-NEXT: lsr z0.s, p0/m, z0.s, #31 +; CHECK-NEXT: lsr z0.s, z0.s, #31 ; CHECK-NEXT: st1w { z0.s }, p0, [x0] ; CHECK-NEXT: ret %op1 = load <16 x i32>, ptr %a @@ -344,7 +344,7 @@ define void @lshr_v8i64(ptr %a) #0 { ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d, vl8 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] -; CHECK-NEXT: lsr z0.d, p0/m, z0.d, #63 +; CHECK-NEXT: lsr z0.d, z0.d, #63 ; CHECK-NEXT: st1d { z0.d }, p0, [x0] ; CHECK-NEXT: ret %op1 = load <8 x i64>, ptr %a @@ -363,9 +363,8 @@ define void @mul_v64i8(ptr %a) #0 { ; CHECK-LABEL: mul_v64i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.b, vl64 -; CHECK-NEXT: mov z1.b, #7 // =0x7 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] -; CHECK-NEXT: mul z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: mul z0.b, z0.b, #7 ; CHECK-NEXT: st1b { z0.b }, p0, [x0] ; CHECK-NEXT: ret %op1 = load <64 x i8>, ptr %a @@ -380,9 +379,8 @@ define void @mul_v32i16(ptr %a) #0 { ; CHECK-LABEL: mul_v32i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h, vl32 -; CHECK-NEXT: mov z1.h, #15 // =0xf ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] -; CHECK-NEXT: mul z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: mul z0.h, z0.h, #15 ; CHECK-NEXT: st1h { z0.h }, p0, [x0] ; CHECK-NEXT: ret %op1 = load <32 x i16>, ptr %a @@ -397,9 +395,8 @@ define void @mul_v16i32(ptr %a) #0 { ; CHECK-LABEL: mul_v16i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s, vl16 -; CHECK-NEXT: mov z1.s, #31 // =0x1f ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] -; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: mul z0.s, z0.s, #31 ; CHECK-NEXT: st1w { z0.s }, p0, [x0] ; CHECK-NEXT: ret %op1 = load <16 x i32>, ptr %a @@ -414,9 +411,8 @@ define void @mul_v8i64(ptr %a) #0 { ; CHECK-LABEL: mul_v8i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d, vl8 -; CHECK-NEXT: mov z1.d, #63 // =0x3f ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] -; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: mul z0.d, z0.d, #63 ; CHECK-NEXT: st1d { z0.d }, p0, [x0] ; CHECK-NEXT: ret %op1 = load <8 x i64>, ptr %a @@ -504,7 +500,7 @@ define void @shl_v64i8(ptr %a) #0 { ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.b, vl64 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] -; CHECK-NEXT: lsl z0.b, p0/m, z0.b, #7 +; CHECK-NEXT: lsl z0.b, z0.b, #7 ; CHECK-NEXT: st1b { z0.b }, p0, [x0] ; CHECK-NEXT: ret %op1 = load <64 x i8>, ptr %a @@ -520,7 +516,7 @@ define void @shl_v32i16(ptr %a) #0 { ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h, vl32 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] -; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #15 +; CHECK-NEXT: lsl z0.h, z0.h, #15 ; CHECK-NEXT: st1h { z0.h }, p0, [x0] ; CHECK-NEXT: ret %op1 = load <32 x i16>, ptr %a @@ -536,7 +532,7 @@ define void @shl_v16i32(ptr %a) #0 { ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s, vl16 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] -; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #31 +; CHECK-NEXT: lsl z0.s, z0.s, #31 ; CHECK-NEXT: st1w { z0.s }, p0, [x0] ; CHECK-NEXT: ret %op1 = load <16 x i32>, ptr %a @@ -552,7 +548,7 @@ define void @shl_v8i64(ptr %a) #0 { ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d, vl8 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] -; CHECK-NEXT: lsl z0.d, p0/m, z0.d, #63 +; CHECK-NEXT: lsl z0.d, z0.d, #63 ; CHECK-NEXT: st1d { z0.d }, p0, [x0] ; CHECK-NEXT: ret %op1 = load <8 x i64>, ptr %a @@ -571,9 +567,8 @@ define void @smax_v64i8(ptr %a) #0 { ; CHECK-LABEL: smax_v64i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.b, vl64 -; CHECK-NEXT: mov z1.b, #7 // =0x7 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] -; CHECK-NEXT: smax z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: smax z0.b, z0.b, #7 ; CHECK-NEXT: st1b { z0.b }, p0, [x0] ; CHECK-NEXT: ret %op1 = load <64 x i8>, ptr %a @@ -588,9 +583,8 @@ define void @smax_v32i16(ptr %a) #0 { ; CHECK-LABEL: smax_v32i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h, vl32 -; CHECK-NEXT: mov z1.h, #15 // =0xf ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] -; CHECK-NEXT: smax z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: smax z0.h, z0.h, #15 ; CHECK-NEXT: st1h { z0.h }, p0, [x0] ; CHECK-NEXT: ret %op1 = load <32 x i16>, ptr %a @@ -605,9 +599,8 @@ define void @smax_v16i32(ptr %a) #0 { ; CHECK-LABEL: smax_v16i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s, vl16 -; CHECK-NEXT: mov z1.s, #31 // =0x1f ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] -; CHECK-NEXT: smax z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: smax z0.s, z0.s, #31 ; CHECK-NEXT: st1w { z0.s }, p0, [x0] ; CHECK-NEXT: ret %op1 = load <16 x i32>, ptr %a @@ -622,9 +615,8 @@ define void @smax_v8i64(ptr %a) #0 { ; CHECK-LABEL: smax_v8i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d, vl8 -; CHECK-NEXT: mov z1.d, #63 // =0x3f ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] -; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: smax z0.d, z0.d, #63 ; CHECK-NEXT: st1d { z0.d }, p0, [x0] ; CHECK-NEXT: ret %op1 = load <8 x i64>, ptr %a @@ -643,9 +635,8 @@ define void @smin_v64i8(ptr %a) #0 { ; CHECK-LABEL: smin_v64i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.b, vl64 -; CHECK-NEXT: mov z1.b, #7 // =0x7 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] -; CHECK-NEXT: smin z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: smin z0.b, z0.b, #7 ; CHECK-NEXT: st1b { z0.b }, p0, [x0] ; CHECK-NEXT: ret %op1 = load <64 x i8>, ptr %a @@ -660,9 +651,8 @@ define void @smin_v32i16(ptr %a) #0 { ; CHECK-LABEL: smin_v32i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h, vl32 -; CHECK-NEXT: mov z1.h, #15 // =0xf ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] -; CHECK-NEXT: smin z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: smin z0.h, z0.h, #15 ; CHECK-NEXT: st1h { z0.h }, p0, [x0] ; CHECK-NEXT: ret %op1 = load <32 x i16>, ptr %a @@ -677,9 +667,8 @@ define void @smin_v16i32(ptr %a) #0 { ; CHECK-LABEL: smin_v16i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s, vl16 -; CHECK-NEXT: mov z1.s, #31 // =0x1f ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] -; CHECK-NEXT: smin z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: smin z0.s, z0.s, #31 ; CHECK-NEXT: st1w { z0.s }, p0, [x0] ; CHECK-NEXT: ret %op1 = load <16 x i32>, ptr %a @@ -694,9 +683,8 @@ define void @smin_v8i64(ptr %a) #0 { ; CHECK-LABEL: smin_v8i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d, vl8 -; CHECK-NEXT: mov z1.d, #63 // =0x3f ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] -; CHECK-NEXT: smin z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: smin z0.d, z0.d, #63 ; CHECK-NEXT: st1d { z0.d }, p0, [x0] ; CHECK-NEXT: ret %op1 = load <8 x i64>, ptr %a @@ -783,9 +771,8 @@ define void @umax_v64i8(ptr %a) #0 { ; CHECK-LABEL: umax_v64i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.b, vl64 -; CHECK-NEXT: mov z1.b, #7 // =0x7 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] -; CHECK-NEXT: umax z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: umax z0.b, z0.b, #7 ; CHECK-NEXT: st1b { z0.b }, p0, [x0] ; CHECK-NEXT: ret %op1 = load <64 x i8>, ptr %a @@ -800,9 +787,8 @@ define void @umax_v32i16(ptr %a) #0 { ; CHECK-LABEL: umax_v32i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h, vl32 -; CHECK-NEXT: mov z1.h, #15 // =0xf ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] -; CHECK-NEXT: umax z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: umax z0.h, z0.h, #15 ; CHECK-NEXT: st1h { z0.h }, p0, [x0] ; CHECK-NEXT: ret %op1 = load <32 x i16>, ptr %a @@ -817,9 +803,8 @@ define void @umax_v16i32(ptr %a) #0 { ; CHECK-LABEL: umax_v16i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s, vl16 -; CHECK-NEXT: mov z1.s, #31 // =0x1f ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] -; CHECK-NEXT: umax z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: umax z0.s, z0.s, #31 ; CHECK-NEXT: st1w { z0.s }, p0, [x0] ; CHECK-NEXT: ret %op1 = load <16 x i32>, ptr %a @@ -834,9 +819,8 @@ define void @umax_v8i64(ptr %a) #0 { ; CHECK-LABEL: umax_v8i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d, vl8 -; CHECK-NEXT: mov z1.d, #63 // =0x3f ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] -; CHECK-NEXT: umax z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: umax z0.d, z0.d, #63 ; CHECK-NEXT: st1d { z0.d }, p0, [x0] ; CHECK-NEXT: ret %op1 = load <8 x i64>, ptr %a @@ -855,9 +839,8 @@ define void @umin_v64i8(ptr %a) #0 { ; CHECK-LABEL: umin_v64i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.b, vl64 -; CHECK-NEXT: mov z1.b, #7 // =0x7 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] -; CHECK-NEXT: umin z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: umin z0.b, z0.b, #7 ; CHECK-NEXT: st1b { z0.b }, p0, [x0] ; CHECK-NEXT: ret %op1 = load <64 x i8>, ptr %a @@ -872,9 +855,8 @@ define void @umin_v32i16(ptr %a) #0 { ; CHECK-LABEL: umin_v32i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h, vl32 -; CHECK-NEXT: mov z1.h, #15 // =0xf ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] -; CHECK-NEXT: umin z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: umin z0.h, z0.h, #15 ; CHECK-NEXT: st1h { z0.h }, p0, [x0] ; CHECK-NEXT: ret %op1 = load <32 x i16>, ptr %a @@ -889,9 +871,8 @@ define void @umin_v16i32(ptr %a) #0 { ; CHECK-LABEL: umin_v16i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s, vl16 -; CHECK-NEXT: mov z1.s, #31 // =0x1f ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] -; CHECK-NEXT: umin z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: umin z0.s, z0.s, #31 ; CHECK-NEXT: st1w { z0.s }, p0, [x0] ; CHECK-NEXT: ret %op1 = load <16 x i32>, ptr %a @@ -906,9 +887,8 @@ define void @umin_v8i64(ptr %a) #0 { ; CHECK-LABEL: umin_v8i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d, vl8 -; CHECK-NEXT: mov z1.d, #63 // =0x3f ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] -; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: umin z0.d, z0.d, #63 ; CHECK-NEXT: st1d { z0.d }, p0, [x0] ; CHECK-NEXT: ret %op1 = load <8 x i64>, ptr %a diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-mulh.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-mulh.ll index bf9a477..c431438 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-mulh.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-mulh.ll @@ -105,16 +105,15 @@ define void @smulh_v64i8(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: sunpklo z1.h, z1.b ; VBITS_GE_256-NEXT: sunpklo z2.h, z2.b ; VBITS_GE_256-NEXT: sunpklo z3.h, z3.b +; VBITS_GE_256-NEXT: mul z4.h, p1/m, z4.h, z6.h ; VBITS_GE_256-NEXT: mul z0.h, p1/m, z0.h, z2.h ; VBITS_GE_256-NEXT: movprfx z2, z5 ; VBITS_GE_256-NEXT: mul z2.h, p1/m, z2.h, z7.h ; VBITS_GE_256-NEXT: mul z1.h, p1/m, z1.h, z3.h -; VBITS_GE_256-NEXT: mul z4.h, p1/m, z4.h, z6.h -; VBITS_GE_256-NEXT: lsr z0.h, p1/m, z0.h, #8 -; VBITS_GE_256-NEXT: movprfx z3, z4 -; VBITS_GE_256-NEXT: lsr z3.h, p1/m, z3.h, #8 -; VBITS_GE_256-NEXT: lsr z1.h, p1/m, z1.h, #8 -; VBITS_GE_256-NEXT: lsr z2.h, p1/m, z2.h, #8 +; VBITS_GE_256-NEXT: lsr z0.h, z0.h, #8 +; VBITS_GE_256-NEXT: lsr z3.h, z4.h, #8 +; VBITS_GE_256-NEXT: lsr z1.h, z1.h, #8 +; VBITS_GE_256-NEXT: lsr z2.h, z2.h, #8 ; VBITS_GE_256-NEXT: uzp1 z3.b, z3.b, z3.b ; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b ; VBITS_GE_256-NEXT: ptrue p1.b, vl16 @@ -755,16 +754,15 @@ define void @umulh_v64i8(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: uunpklo z1.h, z1.b ; VBITS_GE_256-NEXT: uunpklo z2.h, z2.b ; VBITS_GE_256-NEXT: uunpklo z3.h, z3.b +; VBITS_GE_256-NEXT: mul z4.h, p1/m, z4.h, z6.h ; VBITS_GE_256-NEXT: mul z0.h, p1/m, z0.h, z2.h ; VBITS_GE_256-NEXT: movprfx z2, z5 ; VBITS_GE_256-NEXT: mul z2.h, p1/m, z2.h, z7.h ; VBITS_GE_256-NEXT: mul z1.h, p1/m, z1.h, z3.h -; VBITS_GE_256-NEXT: mul z4.h, p1/m, z4.h, z6.h -; VBITS_GE_256-NEXT: lsr z0.h, p1/m, z0.h, #8 -; VBITS_GE_256-NEXT: movprfx z3, z4 -; VBITS_GE_256-NEXT: lsr z3.h, p1/m, z3.h, #8 -; VBITS_GE_256-NEXT: lsr z1.h, p1/m, z1.h, #8 -; VBITS_GE_256-NEXT: lsr z2.h, p1/m, z2.h, #8 +; VBITS_GE_256-NEXT: lsr z0.h, z0.h, #8 +; VBITS_GE_256-NEXT: lsr z3.h, z4.h, #8 +; VBITS_GE_256-NEXT: lsr z1.h, z1.h, #8 +; VBITS_GE_256-NEXT: lsr z2.h, z2.h, #8 ; VBITS_GE_256-NEXT: uzp1 z3.b, z3.b, z3.b ; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b ; VBITS_GE_256-NEXT: ptrue p1.b, vl16 diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-undef.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-undef.ll index 4645b1f..55e235a 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-undef.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-undef.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mattr=+sve < %s | FileCheck %s -; RUN: llc -mattr=+sve2 < %s | FileCheck %s +; RUN: llc -mattr=+sve < %s | FileCheck %s -check-prefixes=CHECK,SVE1 +; RUN: llc -mattr=+sve2 < %s | FileCheck %s -check-prefixes=CHECK,SVE2 target triple = "aarch64-unknown-linux-gnu" @@ -113,10 +113,15 @@ define @add_imm_i64( %pg, ; define @mul_i8( %pg, %a, %b) { -; CHECK-LABEL: mul_i8: -; CHECK: // %bb.0: -; CHECK-NEXT: mul z0.b, p0/m, z0.b, z1.b -; CHECK-NEXT: ret +; SVE1-LABEL: mul_i8: +; SVE1: // %bb.0: +; SVE1-NEXT: mul z0.b, p0/m, z0.b, z1.b +; SVE1-NEXT: ret +; +; SVE2-LABEL: mul_i8: +; SVE2: // %bb.0: +; SVE2-NEXT: mul z0.b, z0.b, z1.b +; SVE2-NEXT: ret %out = call @llvm.aarch64.sve.mul.u.nxv16i8( %pg, %a, %b) @@ -124,10 +129,15 @@ define @mul_i8( %pg, %a, } define @mul_i16( %pg, %a, %b) { -; CHECK-LABEL: mul_i16: -; CHECK: // %bb.0: -; CHECK-NEXT: mul z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: ret +; SVE1-LABEL: mul_i16: +; SVE1: // %bb.0: +; SVE1-NEXT: mul z0.h, p0/m, z0.h, z1.h +; SVE1-NEXT: ret +; +; SVE2-LABEL: mul_i16: +; SVE2: // %bb.0: +; SVE2-NEXT: mul z0.h, z0.h, z1.h +; SVE2-NEXT: ret %out = call @llvm.aarch64.sve.mul.u.nxv8i16( %pg, %a, %b) @@ -135,10 +145,15 @@ define @mul_i16( %pg, %a, } define @mul_i32( %pg, %a, %b) { -; CHECK-LABEL: mul_i32: -; CHECK: // %bb.0: -; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: ret +; SVE1-LABEL: mul_i32: +; SVE1: // %bb.0: +; SVE1-NEXT: mul z0.s, p0/m, z0.s, z1.s +; SVE1-NEXT: ret +; +; SVE2-LABEL: mul_i32: +; SVE2: // %bb.0: +; SVE2-NEXT: mul z0.s, z0.s, z1.s +; SVE2-NEXT: ret %out = call @llvm.aarch64.sve.mul.u.nxv4i32( %pg, %a, %b) @@ -146,10 +161,15 @@ define @mul_i32( %pg, %a, } define @mul_i64( %pg, %a, %b) { -; CHECK-LABEL: mul_i64: -; CHECK: // %bb.0: -; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: ret +; SVE1-LABEL: mul_i64: +; SVE1: // %bb.0: +; SVE1-NEXT: mul z0.d, p0/m, z0.d, z1.d +; SVE1-NEXT: ret +; +; SVE2-LABEL: mul_i64: +; SVE2: // %bb.0: +; SVE2-NEXT: mul z0.d, z0.d, z1.d +; SVE2-NEXT: ret %out = call @llvm.aarch64.sve.mul.u.nxv2i64( %pg, %a, %b) @@ -163,8 +183,7 @@ define @mul_i64( %pg, %a, define @mul_imm_i8( %pg, %a) { ; CHECK-LABEL: mul_imm_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.b, #3 // =0x3 -; CHECK-NEXT: mul z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: mul z0.b, z0.b, #3 ; CHECK-NEXT: ret %imm = insertelement undef, i8 3, i32 0 %imm.splat = shufflevector %imm, undef, zeroinitializer @@ -177,8 +196,7 @@ define @mul_imm_i8( %pg, define @mul_imm_i16( %pg, %a) { ; CHECK-LABEL: mul_imm_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.h, #4 // =0x4 -; CHECK-NEXT: mul z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: mul z0.h, z0.h, #4 ; CHECK-NEXT: ret %imm = insertelement undef, i16 4, i32 0 %imm.splat = shufflevector %imm, undef, zeroinitializer @@ -191,8 +209,7 @@ define @mul_imm_i16( %pg, define @mul_imm_i32( %pg, %a) { ; CHECK-LABEL: mul_imm_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.s, #5 // =0x5 -; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: mul z0.s, z0.s, #5 ; CHECK-NEXT: ret %imm = insertelement undef, i32 5, i32 0 %imm.splat = shufflevector %imm, undef, zeroinitializer @@ -205,8 +222,7 @@ define @mul_imm_i32( %pg, define @mul_imm_i64( %pg, %a) { ; CHECK-LABEL: mul_imm_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.d, #6 // =0x6 -; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: mul z0.d, z0.d, #6 ; CHECK-NEXT: ret %imm = insertelement undef, i64 6, i32 0 %imm.splat = shufflevector %imm, undef, zeroinitializer @@ -375,8 +391,7 @@ define @smax_i64( %pg, %a define @smax_imm_i8( %pg, %a) { ; CHECK-LABEL: smax_imm_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.b, #3 // =0x3 -; CHECK-NEXT: smax z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: smax z0.b, z0.b, #3 ; CHECK-NEXT: ret %imm = insertelement undef, i8 3, i32 0 %imm.splat = shufflevector %imm, undef, zeroinitializer @@ -389,8 +404,7 @@ define @smax_imm_i8( %pg, @smax_imm_i16( %pg, %a) { ; CHECK-LABEL: smax_imm_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.h, #4 // =0x4 -; CHECK-NEXT: smax z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: smax z0.h, z0.h, #4 ; CHECK-NEXT: ret %imm = insertelement undef, i16 4, i32 0 %imm.splat = shufflevector %imm, undef, zeroinitializer @@ -403,8 +417,7 @@ define @smax_imm_i16( %pg, @smax_imm_i32( %pg, %a) { ; CHECK-LABEL: smax_imm_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.s, #5 // =0x5 -; CHECK-NEXT: smax z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: smax z0.s, z0.s, #5 ; CHECK-NEXT: ret %imm = insertelement undef, i32 5, i32 0 %imm.splat = shufflevector %imm, undef, zeroinitializer @@ -417,8 +430,7 @@ define @smax_imm_i32( %pg, @smax_imm_i64( %pg, %a) { ; CHECK-LABEL: smax_imm_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.d, #6 // =0x6 -; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: smax z0.d, z0.d, #6 ; CHECK-NEXT: ret %imm = insertelement undef, i64 6, i32 0 %imm.splat = shufflevector %imm, undef, zeroinitializer @@ -483,8 +495,7 @@ define @smin_i64( %pg, %a define @smin_imm_i8( %pg, %a) { ; CHECK-LABEL: smin_imm_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.b, #3 // =0x3 -; CHECK-NEXT: smin z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: smin z0.b, z0.b, #3 ; CHECK-NEXT: ret %imm = insertelement undef, i8 3, i32 0 %imm.splat = shufflevector %imm, undef, zeroinitializer @@ -497,8 +508,7 @@ define @smin_imm_i8( %pg, @smin_imm_i16( %pg, %a) { ; CHECK-LABEL: smin_imm_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.h, #4 // =0x4 -; CHECK-NEXT: smin z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: smin z0.h, z0.h, #4 ; CHECK-NEXT: ret %imm = insertelement undef, i16 4, i32 0 %imm.splat = shufflevector %imm, undef, zeroinitializer @@ -511,8 +521,7 @@ define @smin_imm_i16( %pg, @smin_imm_i32( %pg, %a) { ; CHECK-LABEL: smin_imm_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.s, #5 // =0x5 -; CHECK-NEXT: smin z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: smin z0.s, z0.s, #5 ; CHECK-NEXT: ret %imm = insertelement undef, i32 5, i32 0 %imm.splat = shufflevector %imm, undef, zeroinitializer @@ -525,8 +534,7 @@ define @smin_imm_i32( %pg, @smin_imm_i64( %pg, %a) { ; CHECK-LABEL: smin_imm_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.d, #6 // =0x6 -; CHECK-NEXT: smin z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: smin z0.d, z0.d, #6 ; CHECK-NEXT: ret %imm = insertelement undef, i64 6, i32 0 %imm.splat = shufflevector %imm, undef, zeroinitializer @@ -541,10 +549,15 @@ define @smin_imm_i64( %pg, @smulh_i8( %pg, %a, %b) { -; CHECK-LABEL: smulh_i8: -; CHECK: // %bb.0: -; CHECK-NEXT: smulh z0.b, p0/m, z0.b, z1.b -; CHECK-NEXT: ret +; SVE1-LABEL: smulh_i8: +; SVE1: // %bb.0: +; SVE1-NEXT: smulh z0.b, p0/m, z0.b, z1.b +; SVE1-NEXT: ret +; +; SVE2-LABEL: smulh_i8: +; SVE2: // %bb.0: +; SVE2-NEXT: smulh z0.b, z0.b, z1.b +; SVE2-NEXT: ret %out = call @llvm.aarch64.sve.smulh.u.nxv16i8( %pg, %a, %b) @@ -552,10 +565,15 @@ define @smulh_i8( %pg, % } define @smulh_i16( %pg, %a, %b) { -; CHECK-LABEL: smulh_i16: -; CHECK: // %bb.0: -; CHECK-NEXT: smulh z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: ret +; SVE1-LABEL: smulh_i16: +; SVE1: // %bb.0: +; SVE1-NEXT: smulh z0.h, p0/m, z0.h, z1.h +; SVE1-NEXT: ret +; +; SVE2-LABEL: smulh_i16: +; SVE2: // %bb.0: +; SVE2-NEXT: smulh z0.h, z0.h, z1.h +; SVE2-NEXT: ret %out = call @llvm.aarch64.sve.smulh.u.nxv8i16( %pg, %a, %b) @@ -563,10 +581,15 @@ define @smulh_i16( %pg, % } define @smulh_i32( %pg, %a, %b) { -; CHECK-LABEL: smulh_i32: -; CHECK: // %bb.0: -; CHECK-NEXT: smulh z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: ret +; SVE1-LABEL: smulh_i32: +; SVE1: // %bb.0: +; SVE1-NEXT: smulh z0.s, p0/m, z0.s, z1.s +; SVE1-NEXT: ret +; +; SVE2-LABEL: smulh_i32: +; SVE2: // %bb.0: +; SVE2-NEXT: smulh z0.s, z0.s, z1.s +; SVE2-NEXT: ret %out = call @llvm.aarch64.sve.smulh.u.nxv4i32( %pg, %a, %b) @@ -574,10 +597,15 @@ define @smulh_i32( %pg, % } define @smulh_i64( %pg, %a, %b) { -; CHECK-LABEL: smulh_i64: -; CHECK: // %bb.0: -; CHECK-NEXT: smulh z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: ret +; SVE1-LABEL: smulh_i64: +; SVE1: // %bb.0: +; SVE1-NEXT: smulh z0.d, p0/m, z0.d, z1.d +; SVE1-NEXT: ret +; +; SVE2-LABEL: smulh_i64: +; SVE2: // %bb.0: +; SVE2-NEXT: smulh z0.d, z0.d, z1.d +; SVE2-NEXT: ret %out = call @llvm.aarch64.sve.smulh.u.nxv2i64( %pg, %a, %b) @@ -951,8 +979,7 @@ define @umax_i64( %pg, %a define @umax_imm_i8( %pg, %a) { ; CHECK-LABEL: umax_imm_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.b, #3 // =0x3 -; CHECK-NEXT: umax z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: umax z0.b, z0.b, #3 ; CHECK-NEXT: ret %imm = insertelement undef, i8 3, i32 0 %imm.splat = shufflevector %imm, undef, zeroinitializer @@ -965,8 +992,7 @@ define @umax_imm_i8( %pg, @umax_imm_i16( %pg, %a) { ; CHECK-LABEL: umax_imm_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.h, #4 // =0x4 -; CHECK-NEXT: umax z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: umax z0.h, z0.h, #4 ; CHECK-NEXT: ret %imm = insertelement undef, i16 4, i32 0 %imm.splat = shufflevector %imm, undef, zeroinitializer @@ -979,8 +1005,7 @@ define @umax_imm_i16( %pg, @umax_imm_i32( %pg, %a) { ; CHECK-LABEL: umax_imm_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.s, #5 // =0x5 -; CHECK-NEXT: umax z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: umax z0.s, z0.s, #5 ; CHECK-NEXT: ret %imm = insertelement undef, i32 5, i32 0 %imm.splat = shufflevector %imm, undef, zeroinitializer @@ -993,8 +1018,7 @@ define @umax_imm_i32( %pg, @umax_imm_i64( %pg, %a) { ; CHECK-LABEL: umax_imm_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.d, #6 // =0x6 -; CHECK-NEXT: umax z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: umax z0.d, z0.d, #6 ; CHECK-NEXT: ret %imm = insertelement undef, i64 6, i32 0 %imm.splat = shufflevector %imm, undef, zeroinitializer @@ -1059,8 +1083,7 @@ define @umin_i64( %pg, %a define @umin_imm_i8( %pg, %a) { ; CHECK-LABEL: umin_imm_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.b, #3 // =0x3 -; CHECK-NEXT: umin z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: umin z0.b, z0.b, #3 ; CHECK-NEXT: ret %imm = insertelement undef, i8 3, i32 0 %imm.splat = shufflevector %imm, undef, zeroinitializer @@ -1073,8 +1096,7 @@ define @umin_imm_i8( %pg, @umin_imm_i16( %pg, %a) { ; CHECK-LABEL: umin_imm_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.h, #4 // =0x4 -; CHECK-NEXT: umin z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: umin z0.h, z0.h, #4 ; CHECK-NEXT: ret %imm = insertelement undef, i16 4, i32 0 %imm.splat = shufflevector %imm, undef, zeroinitializer @@ -1087,8 +1109,7 @@ define @umin_imm_i16( %pg, @umin_imm_i32( %pg, %a) { ; CHECK-LABEL: umin_imm_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.s, #5 // =0x5 -; CHECK-NEXT: umin z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: umin z0.s, z0.s, #5 ; CHECK-NEXT: ret %imm = insertelement undef, i32 5, i32 0 %imm.splat = shufflevector %imm, undef, zeroinitializer @@ -1101,8 +1122,7 @@ define @umin_imm_i32( %pg, @umin_imm_i64( %pg, %a) { ; CHECK-LABEL: umin_imm_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.d, #6 // =0x6 -; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: umin z0.d, z0.d, #6 ; CHECK-NEXT: ret %imm = insertelement undef, i64 6, i32 0 %imm.splat = shufflevector %imm, undef, zeroinitializer @@ -1117,10 +1137,15 @@ define @umin_imm_i64( %pg, @umulh_i8( %pg, %a, %b) { -; CHECK-LABEL: umulh_i8: -; CHECK: // %bb.0: -; CHECK-NEXT: umulh z0.b, p0/m, z0.b, z1.b -; CHECK-NEXT: ret +; SVE1-LABEL: umulh_i8: +; SVE1: // %bb.0: +; SVE1-NEXT: umulh z0.b, p0/m, z0.b, z1.b +; SVE1-NEXT: ret +; +; SVE2-LABEL: umulh_i8: +; SVE2: // %bb.0: +; SVE2-NEXT: umulh z0.b, z0.b, z1.b +; SVE2-NEXT: ret %out = call @llvm.aarch64.sve.umulh.u.nxv16i8( %pg, %a, %b) @@ -1128,10 +1153,15 @@ define @umulh_i8( %pg, % } define @umulh_i16( %pg, %a, %b) { -; CHECK-LABEL: umulh_i16: -; CHECK: // %bb.0: -; CHECK-NEXT: umulh z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: ret +; SVE1-LABEL: umulh_i16: +; SVE1: // %bb.0: +; SVE1-NEXT: umulh z0.h, p0/m, z0.h, z1.h +; SVE1-NEXT: ret +; +; SVE2-LABEL: umulh_i16: +; SVE2: // %bb.0: +; SVE2-NEXT: umulh z0.h, z0.h, z1.h +; SVE2-NEXT: ret %out = call @llvm.aarch64.sve.umulh.u.nxv8i16( %pg, %a, %b) @@ -1139,10 +1169,15 @@ define @umulh_i16( %pg, % } define @umulh_i32( %pg, %a, %b) { -; CHECK-LABEL: umulh_i32: -; CHECK: // %bb.0: -; CHECK-NEXT: umulh z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: ret +; SVE1-LABEL: umulh_i32: +; SVE1: // %bb.0: +; SVE1-NEXT: umulh z0.s, p0/m, z0.s, z1.s +; SVE1-NEXT: ret +; +; SVE2-LABEL: umulh_i32: +; SVE2: // %bb.0: +; SVE2-NEXT: umulh z0.s, z0.s, z1.s +; SVE2-NEXT: ret %out = call @llvm.aarch64.sve.umulh.u.nxv4i32( %pg, %a, %b) @@ -1150,10 +1185,15 @@ define @umulh_i32( %pg, % } define @umulh_i64( %pg, %a, %b) { -; CHECK-LABEL: umulh_i64: -; CHECK: // %bb.0: -; CHECK-NEXT: umulh z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: ret +; SVE1-LABEL: umulh_i64: +; SVE1: // %bb.0: +; SVE1-NEXT: umulh z0.d, p0/m, z0.d, z1.d +; SVE1-NEXT: ret +; +; SVE2-LABEL: umulh_i64: +; SVE2: // %bb.0: +; SVE2-NEXT: umulh z0.d, z0.d, z1.d +; SVE2-NEXT: ret %out = call @llvm.aarch64.sve.umulh.u.nxv2i64( %pg, %a, %b) diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts-undef.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts-undef.ll index 6f06fe8..70c2e52 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts-undef.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts-undef.ll @@ -58,7 +58,7 @@ define @asr_i64( %pg, %a, define @asr_imm_i8( %pg, %a) { ; CHECK-LABEL: asr_imm_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: asr z0.b, p0/m, z0.b, #3 +; CHECK-NEXT: asr z0.b, z0.b, #3 ; CHECK-NEXT: ret %imm = insertelement undef, i8 3, i32 0 %imm.splat = shufflevector %imm, undef, zeroinitializer @@ -71,7 +71,7 @@ define @asr_imm_i8( %pg, define @asr_imm_i16( %pg, %a) { ; CHECK-LABEL: asr_imm_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: asr z0.h, p0/m, z0.h, #4 +; CHECK-NEXT: asr z0.h, z0.h, #4 ; CHECK-NEXT: ret %imm = insertelement undef, i16 4, i32 0 %imm.splat = shufflevector %imm, undef, zeroinitializer @@ -84,7 +84,7 @@ define @asr_imm_i16( %pg, define @asr_imm_i32( %pg, %a) { ; CHECK-LABEL: asr_imm_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: asr z0.s, p0/m, z0.s, #5 +; CHECK-NEXT: asr z0.s, z0.s, #5 ; CHECK-NEXT: ret %imm = insertelement undef, i32 5, i32 0 %imm.splat = shufflevector %imm, undef, zeroinitializer @@ -97,7 +97,7 @@ define @asr_imm_i32( %pg, define @asr_imm_i64( %pg, %a) { ; CHECK-LABEL: asr_imm_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: asr z0.d, p0/m, z0.d, #6 +; CHECK-NEXT: asr z0.d, z0.d, #6 ; CHECK-NEXT: ret %imm = insertelement undef, i64 6, i32 0 %imm.splat = shufflevector %imm, undef, zeroinitializer @@ -162,7 +162,7 @@ define @lsl_i64( %pg, %a, define @lsl_imm_i8( %pg, %a) { ; CHECK-LABEL: lsl_imm_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: lsl z0.b, p0/m, z0.b, #7 +; CHECK-NEXT: lsl z0.b, z0.b, #7 ; CHECK-NEXT: ret %imm = insertelement undef, i8 7, i32 0 %imm.splat = shufflevector %imm, undef, zeroinitializer @@ -175,7 +175,7 @@ define @lsl_imm_i8( %pg, define @lsl_imm_i16( %pg, %a) { ; CHECK-LABEL: lsl_imm_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #8 +; CHECK-NEXT: lsl z0.h, z0.h, #8 ; CHECK-NEXT: ret %imm = insertelement undef, i16 8, i32 0 %imm.splat = shufflevector %imm, undef, zeroinitializer @@ -188,7 +188,7 @@ define @lsl_imm_i16( %pg, define @lsl_imm_i32( %pg, %a) { ; CHECK-LABEL: lsl_imm_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #9 +; CHECK-NEXT: lsl z0.s, z0.s, #9 ; CHECK-NEXT: ret %imm = insertelement undef, i32 9, i32 0 %imm.splat = shufflevector %imm, undef, zeroinitializer @@ -201,7 +201,7 @@ define @lsl_imm_i32( %pg, define @lsl_imm_i64( %pg, %a) { ; CHECK-LABEL: lsl_imm_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: lsl z0.d, p0/m, z0.d, #10 +; CHECK-NEXT: lsl z0.d, z0.d, #10 ; CHECK-NEXT: ret %imm = insertelement undef, i64 10, i32 0 %imm.splat = shufflevector %imm, undef, zeroinitializer @@ -266,7 +266,7 @@ define @lsr_i64( %pg, %a, define @lsr_imm_i8( %pg, %a) { ; CHECK-LABEL: lsr_imm_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: lsr z0.b, p0/m, z0.b, #8 +; CHECK-NEXT: lsr z0.b, z0.b, #8 ; CHECK-NEXT: ret %imm = insertelement undef, i8 8, i32 0 %imm.splat = shufflevector %imm, undef, zeroinitializer @@ -279,7 +279,7 @@ define @lsr_imm_i8( %pg, define @lsr_imm_i16( %pg, %a) { ; CHECK-LABEL: lsr_imm_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: lsr z0.h, p0/m, z0.h, #12 +; CHECK-NEXT: lsr z0.h, z0.h, #12 ; CHECK-NEXT: ret %imm = insertelement undef, i16 12, i32 0 %imm.splat = shufflevector %imm, undef, zeroinitializer @@ -292,7 +292,7 @@ define @lsr_imm_i16( %pg, define @lsr_imm_i32( %pg, %a) { ; CHECK-LABEL: lsr_imm_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: lsr z0.s, p0/m, z0.s, #13 +; CHECK-NEXT: lsr z0.s, z0.s, #13 ; CHECK-NEXT: ret %imm = insertelement undef, i32 13, i32 0 %imm.splat = shufflevector %imm, undef, zeroinitializer @@ -305,7 +305,7 @@ define @lsr_imm_i32( %pg, define @lsr_imm_i64( %pg, %a) { ; CHECK-LABEL: lsr_imm_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: lsr z0.d, p0/m, z0.d, #14 +; CHECK-NEXT: lsr z0.d, z0.d, #14 ; CHECK-NEXT: ret %imm = insertelement undef, i64 14, i32 0 %imm.splat = shufflevector %imm, undef, zeroinitializer diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll index bb4d70e..e6bae72 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll @@ -12,14 +12,13 @@ define <2 x half> @select_v2f16(<2 x half> %op1, <2 x half> %op2, <2 x i1> %mask ; CHECK-NEXT: mov z3.s, z2.s[1] ; CHECK-NEXT: fmov w8, s2 ; CHECK-NEXT: fmov w9, s3 -; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: strh w8, [sp, #8] ; CHECK-NEXT: strh w9, [sp, #10] ; CHECK-NEXT: ldr d2, [sp, #8] -; CHECK-NEXT: lsl z2.h, p0/m, z2.h, #15 -; CHECK-NEXT: asr z2.h, p0/m, z2.h, #15 +; CHECK-NEXT: lsl z2.h, z2.h, #15 +; CHECK-NEXT: asr z2.h, z2.h, #15 ; CHECK-NEXT: bic z1.d, z1.d, z2.d ; CHECK-NEXT: and z0.d, z0.d, z2.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d @@ -34,11 +33,10 @@ define <4 x half> @select_v4f16(<4 x half> %op1, <4 x half> %op2, <4 x i1> %mask ; CHECK-LABEL: select_v4f16: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2 -; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: lsl z2.h, p0/m, z2.h, #15 -; CHECK-NEXT: asr z2.h, p0/m, z2.h, #15 +; CHECK-NEXT: lsl z2.h, z2.h, #15 +; CHECK-NEXT: asr z2.h, z2.h, #15 ; CHECK-NEXT: bic z1.d, z1.d, z2.d ; CHECK-NEXT: and z0.d, z0.d, z2.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d @@ -52,12 +50,11 @@ define <8 x half> @select_v8f16(<8 x half> %op1, <8 x half> %op2, <8 x i1> %mask ; CHECK-LABEL: select_v8f16: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2 -; CHECK-NEXT: ptrue p0.h, vl8 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: uunpklo z2.h, z2.b -; CHECK-NEXT: lsl z2.h, p0/m, z2.h, #15 -; CHECK-NEXT: asr z2.h, p0/m, z2.h, #15 +; CHECK-NEXT: lsl z2.h, z2.h, #15 +; CHECK-NEXT: asr z2.h, z2.h, #15 ; CHECK-NEXT: bic z1.d, z1.d, z2.d ; CHECK-NEXT: and z0.d, z0.d, z2.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d @@ -97,11 +94,10 @@ define <2 x float> @select_v2f32(<2 x float> %op1, <2 x float> %op2, <2 x i1> %m ; CHECK-LABEL: select_v2f32: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2 -; CHECK-NEXT: ptrue p0.s, vl2 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: lsl z2.s, p0/m, z2.s, #31 -; CHECK-NEXT: asr z2.s, p0/m, z2.s, #31 +; CHECK-NEXT: lsl z2.s, z2.s, #31 +; CHECK-NEXT: asr z2.s, z2.s, #31 ; CHECK-NEXT: bic z1.d, z1.d, z2.d ; CHECK-NEXT: and z0.d, z0.d, z2.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d @@ -115,12 +111,11 @@ define <4 x float> @select_v4f32(<4 x float> %op1, <4 x float> %op2, <4 x i1> %m ; CHECK-LABEL: select_v4f32: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2 -; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: uunpklo z2.s, z2.h -; CHECK-NEXT: lsl z2.s, p0/m, z2.s, #31 -; CHECK-NEXT: asr z2.s, p0/m, z2.s, #31 +; CHECK-NEXT: lsl z2.s, z2.s, #31 +; CHECK-NEXT: asr z2.s, z2.s, #31 ; CHECK-NEXT: bic z1.d, z1.d, z2.d ; CHECK-NEXT: and z0.d, z0.d, z2.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d @@ -179,12 +174,11 @@ define <2 x double> @select_v2f64(<2 x double> %op1, <2 x double> %op2, <2 x i1> ; CHECK-LABEL: select_v2f64: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2 -; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: uunpklo z2.d, z2.s -; CHECK-NEXT: lsl z2.d, p0/m, z2.d, #63 -; CHECK-NEXT: asr z2.d, p0/m, z2.d, #63 +; CHECK-NEXT: lsl z2.d, z2.d, #63 +; CHECK-NEXT: asr z2.d, z2.d, #63 ; CHECK-NEXT: bic z1.d, z1.d, z2.d ; CHECK-NEXT: and z0.d, z0.d, z2.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll index 7a7d175..4b4c24a 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll @@ -589,8 +589,8 @@ define <4 x i8> @abs_v4i8(<4 x i8> %op1) #0 { ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #8 -; CHECK-NEXT: asr z0.h, p0/m, z0.h, #8 +; CHECK-NEXT: lsl z0.h, z0.h, #8 +; CHECK-NEXT: asr z0.h, z0.h, #8 ; CHECK-NEXT: abs z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -642,8 +642,8 @@ define <2 x i16> @abs_v2i16(<2 x i16> %op1) #0 { ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 -; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #16 -; CHECK-NEXT: asr z0.s, p0/m, z0.s, #16 +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: asr z0.s, z0.s, #16 ; CHECK-NEXT: abs z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll index c915351..219ba01 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll @@ -12,14 +12,13 @@ define <4 x i8> @sdiv_v4i8(<4 x i8> %op1, <4 x i8> %op2) #0 { ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: lsl z1.h, p0/m, z1.h, #8 -; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #8 -; CHECK-NEXT: asr z1.h, p0/m, z1.h, #8 -; CHECK-NEXT: asr z0.h, p0/m, z0.h, #8 +; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: lsl z0.h, z0.h, #8 +; CHECK-NEXT: lsl z1.h, z1.h, #8 +; CHECK-NEXT: asr z0.h, z0.h, #8 +; CHECK-NEXT: asr z1.h, z1.h, #8 ; CHECK-NEXT: sunpklo z1.s, z1.h ; CHECK-NEXT: sunpklo z0.s, z0.h -; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -190,10 +189,10 @@ define <2 x i16> @sdiv_v2i16(<2 x i16> %op1, <2 x i16> %op2) #0 { ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 -; CHECK-NEXT: lsl z1.s, p0/m, z1.s, #16 -; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #16 -; CHECK-NEXT: asr z1.s, p0/m, z1.s, #16 -; CHECK-NEXT: asr z0.s, p0/m, z0.s, #16 +; CHECK-NEXT: lsl z1.s, z1.s, #16 +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: asr z1.s, z1.s, #16 +; CHECK-NEXT: asr z0.s, z0.s, #16 ; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -741,12 +740,12 @@ define void @udiv_constantsplat_v8i32(ptr %a) #0 { ; CHECK-NEXT: umulh z2.s, p0/m, z2.s, z1.s ; CHECK-NEXT: sub z0.s, z0.s, z3.s ; CHECK-NEXT: sub z1.s, z1.s, z2.s -; CHECK-NEXT: lsr z0.s, p0/m, z0.s, #1 -; CHECK-NEXT: lsr z1.s, p0/m, z1.s, #1 +; CHECK-NEXT: lsr z0.s, z0.s, #1 +; CHECK-NEXT: lsr z1.s, z1.s, #1 ; CHECK-NEXT: add z0.s, z0.s, z3.s ; CHECK-NEXT: add z1.s, z1.s, z2.s -; CHECK-NEXT: lsr z0.s, p0/m, z0.s, #6 -; CHECK-NEXT: lsr z1.s, p0/m, z1.s, #6 +; CHECK-NEXT: lsr z0.s, z0.s, #6 +; CHECK-NEXT: lsr z1.s, z1.s, #6 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <8 x i32>, ptr %a diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll index a59306f..e9a64e7 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll @@ -14,15 +14,14 @@ define void @sext_v8i1_v8i32(<8 x i1> %a, ptr %out) #0 { ; CHECK-LABEL: sext_v8i1_v8i32: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: uunpklo z0.h, z0.b ; CHECK-NEXT: uunpklo z1.s, z0.h ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 ; CHECK-NEXT: uunpklo z0.s, z0.h -; CHECK-NEXT: lsl z1.s, p0/m, z1.s, #31 -; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #31 -; CHECK-NEXT: asr z1.s, p0/m, z1.s, #31 -; CHECK-NEXT: asr z0.s, p0/m, z0.s, #31 +; CHECK-NEXT: lsl z1.s, z1.s, #31 +; CHECK-NEXT: lsl z0.s, z0.s, #31 +; CHECK-NEXT: asr z1.s, z1.s, #31 +; CHECK-NEXT: asr z0.s, z0.s, #31 ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret %b = sext <8 x i1> %a to <8 x i32> @@ -41,15 +40,14 @@ define void @sext_v4i3_v4i64(<4 x i3> %a, ptr %out) #0 { ; CHECK-LABEL: sext_v4i3_v4i64: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: uunpklo z0.s, z0.h ; CHECK-NEXT: uunpklo z1.d, z0.s ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 ; CHECK-NEXT: uunpklo z0.d, z0.s -; CHECK-NEXT: lsl z1.d, p0/m, z1.d, #61 -; CHECK-NEXT: lsl z0.d, p0/m, z0.d, #61 -; CHECK-NEXT: asr z1.d, p0/m, z1.d, #61 -; CHECK-NEXT: asr z0.d, p0/m, z0.d, #61 +; CHECK-NEXT: lsl z1.d, z1.d, #61 +; CHECK-NEXT: lsl z0.d, z0.d, #61 +; CHECK-NEXT: asr z1.d, z1.d, #61 +; CHECK-NEXT: asr z0.d, z0.d, #61 ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret %b = sext <4 x i3> %a to <4 x i64> @@ -185,15 +183,14 @@ define void @sext_v4i8_v4i64(<4 x i8> %a, ptr %out) #0 { ; CHECK-LABEL: sext_v4i8_v4i64: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: uunpklo z0.s, z0.h ; CHECK-NEXT: uunpklo z1.d, z0.s ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 ; CHECK-NEXT: uunpklo z0.d, z0.s -; CHECK-NEXT: lsl z1.d, p0/m, z1.d, #56 -; CHECK-NEXT: lsl z0.d, p0/m, z0.d, #56 -; CHECK-NEXT: asr z1.d, p0/m, z1.d, #56 -; CHECK-NEXT: asr z0.d, p0/m, z0.d, #56 +; CHECK-NEXT: lsl z1.d, z1.d, #56 +; CHECK-NEXT: lsl z0.d, z0.d, #56 +; CHECK-NEXT: asr z1.d, z1.d, #56 +; CHECK-NEXT: asr z0.d, z0.d, #56 ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret %b = sext <4 x i8> %a to <4 x i64> diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-immediates.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-immediates.ll index 490b96c..05c91065 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-immediates.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-immediates.ll @@ -153,9 +153,8 @@ define void @ashr_v32i8(ptr %a) #0 { ; CHECK-LABEL: ashr_v32i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: ptrue p0.b, vl16 -; CHECK-NEXT: asr z0.b, p0/m, z0.b, #7 -; CHECK-NEXT: asr z1.b, p0/m, z1.b, #7 +; CHECK-NEXT: asr z0.b, z0.b, #7 +; CHECK-NEXT: asr z1.b, z1.b, #7 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <32 x i8>, ptr %a @@ -170,9 +169,8 @@ define void @ashr_v16i16(ptr %a) #0 { ; CHECK-LABEL: ashr_v16i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: ptrue p0.h, vl8 -; CHECK-NEXT: asr z0.h, p0/m, z0.h, #15 -; CHECK-NEXT: asr z1.h, p0/m, z1.h, #15 +; CHECK-NEXT: asr z0.h, z0.h, #15 +; CHECK-NEXT: asr z1.h, z1.h, #15 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <16 x i16>, ptr %a @@ -187,9 +185,8 @@ define void @ashr_v8i32(ptr %a) #0 { ; CHECK-LABEL: ashr_v8i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: asr z0.s, p0/m, z0.s, #31 -; CHECK-NEXT: asr z1.s, p0/m, z1.s, #31 +; CHECK-NEXT: asr z0.s, z0.s, #31 +; CHECK-NEXT: asr z1.s, z1.s, #31 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <8 x i32>, ptr %a @@ -204,9 +201,8 @@ define void @ashr_v4i64(ptr %a) #0 { ; CHECK-LABEL: ashr_v4i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: asr z0.d, p0/m, z0.d, #63 -; CHECK-NEXT: asr z1.d, p0/m, z1.d, #63 +; CHECK-NEXT: asr z0.d, z0.d, #63 +; CHECK-NEXT: asr z1.d, z1.d, #63 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <4 x i64>, ptr %a @@ -309,9 +305,8 @@ define void @lshr_v32i8(ptr %a) #0 { ; CHECK-LABEL: lshr_v32i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: ptrue p0.b, vl16 -; CHECK-NEXT: lsr z0.b, p0/m, z0.b, #7 -; CHECK-NEXT: lsr z1.b, p0/m, z1.b, #7 +; CHECK-NEXT: lsr z0.b, z0.b, #7 +; CHECK-NEXT: lsr z1.b, z1.b, #7 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <32 x i8>, ptr %a @@ -326,9 +321,8 @@ define void @lshr_v16i16(ptr %a) #0 { ; CHECK-LABEL: lshr_v16i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: ptrue p0.h, vl8 -; CHECK-NEXT: lsr z0.h, p0/m, z0.h, #15 -; CHECK-NEXT: lsr z1.h, p0/m, z1.h, #15 +; CHECK-NEXT: lsr z0.h, z0.h, #15 +; CHECK-NEXT: lsr z1.h, z1.h, #15 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <16 x i16>, ptr %a @@ -343,9 +337,8 @@ define void @lshr_v8i32(ptr %a) #0 { ; CHECK-LABEL: lshr_v8i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: lsr z0.s, p0/m, z0.s, #31 -; CHECK-NEXT: lsr z1.s, p0/m, z1.s, #31 +; CHECK-NEXT: lsr z0.s, z0.s, #31 +; CHECK-NEXT: lsr z1.s, z1.s, #31 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <8 x i32>, ptr %a @@ -360,9 +353,8 @@ define void @lshr_v4i64(ptr %a) #0 { ; CHECK-LABEL: lshr_v4i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: lsr z0.d, p0/m, z0.d, #63 -; CHECK-NEXT: lsr z1.d, p0/m, z1.d, #63 +; CHECK-NEXT: lsr z0.d, z0.d, #63 +; CHECK-NEXT: lsr z1.d, z1.d, #63 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <4 x i64>, ptr %a @@ -381,10 +373,8 @@ define void @mul_v32i8(ptr %a) #0 { ; CHECK-LABEL: mul_v32i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: mov z2.b, #7 // =0x7 -; CHECK-NEXT: ptrue p0.b, vl16 -; CHECK-NEXT: mul z0.b, p0/m, z0.b, z2.b -; CHECK-NEXT: mul z1.b, p0/m, z1.b, z2.b +; CHECK-NEXT: mul z0.b, z0.b, #7 +; CHECK-NEXT: mul z1.b, z1.b, #7 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <32 x i8>, ptr %a @@ -399,10 +389,8 @@ define void @mul_v16i16(ptr %a) #0 { ; CHECK-LABEL: mul_v16i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: mov z2.h, #15 // =0xf -; CHECK-NEXT: ptrue p0.h, vl8 -; CHECK-NEXT: mul z0.h, p0/m, z0.h, z2.h -; CHECK-NEXT: mul z1.h, p0/m, z1.h, z2.h +; CHECK-NEXT: mul z0.h, z0.h, #15 +; CHECK-NEXT: mul z1.h, z1.h, #15 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <16 x i16>, ptr %a @@ -417,10 +405,8 @@ define void @mul_v8i32(ptr %a) #0 { ; CHECK-LABEL: mul_v8i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: mov z2.s, #31 // =0x1f -; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: mul z0.s, p0/m, z0.s, z2.s -; CHECK-NEXT: mul z1.s, p0/m, z1.s, z2.s +; CHECK-NEXT: mul z0.s, z0.s, #31 +; CHECK-NEXT: mul z1.s, z1.s, #31 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <8 x i32>, ptr %a @@ -435,10 +421,8 @@ define void @mul_v4i64(ptr %a) #0 { ; CHECK-LABEL: mul_v4i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: mov z2.d, #63 // =0x3f -; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: mul z0.d, p0/m, z0.d, z2.d -; CHECK-NEXT: mul z1.d, p0/m, z1.d, z2.d +; CHECK-NEXT: mul z0.d, z0.d, #63 +; CHECK-NEXT: mul z1.d, z1.d, #63 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <4 x i64>, ptr %a @@ -525,9 +509,8 @@ define void @shl_v32i8(ptr %a) #0 { ; CHECK-LABEL: shl_v32i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: ptrue p0.b, vl16 -; CHECK-NEXT: lsl z0.b, p0/m, z0.b, #7 -; CHECK-NEXT: lsl z1.b, p0/m, z1.b, #7 +; CHECK-NEXT: lsl z0.b, z0.b, #7 +; CHECK-NEXT: lsl z1.b, z1.b, #7 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <32 x i8>, ptr %a @@ -542,9 +525,8 @@ define void @shl_v16i16(ptr %a) #0 { ; CHECK-LABEL: shl_v16i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: ptrue p0.h, vl8 -; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #15 -; CHECK-NEXT: lsl z1.h, p0/m, z1.h, #15 +; CHECK-NEXT: lsl z0.h, z0.h, #15 +; CHECK-NEXT: lsl z1.h, z1.h, #15 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <16 x i16>, ptr %a @@ -559,9 +541,8 @@ define void @shl_v8i32(ptr %a) #0 { ; CHECK-LABEL: shl_v8i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #31 -; CHECK-NEXT: lsl z1.s, p0/m, z1.s, #31 +; CHECK-NEXT: lsl z0.s, z0.s, #31 +; CHECK-NEXT: lsl z1.s, z1.s, #31 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <8 x i32>, ptr %a @@ -576,9 +557,8 @@ define void @shl_v4i64(ptr %a) #0 { ; CHECK-LABEL: shl_v4i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: lsl z0.d, p0/m, z0.d, #63 -; CHECK-NEXT: lsl z1.d, p0/m, z1.d, #63 +; CHECK-NEXT: lsl z0.d, z0.d, #63 +; CHECK-NEXT: lsl z1.d, z1.d, #63 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <4 x i64>, ptr %a @@ -597,10 +577,8 @@ define void @smax_v32i8(ptr %a) #0 { ; CHECK-LABEL: smax_v32i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: mov z2.b, #7 // =0x7 -; CHECK-NEXT: ptrue p0.b, vl16 -; CHECK-NEXT: smax z0.b, p0/m, z0.b, z2.b -; CHECK-NEXT: smax z1.b, p0/m, z1.b, z2.b +; CHECK-NEXT: smax z0.b, z0.b, #7 +; CHECK-NEXT: smax z1.b, z1.b, #7 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <32 x i8>, ptr %a @@ -615,10 +593,8 @@ define void @smax_v16i16(ptr %a) #0 { ; CHECK-LABEL: smax_v16i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: mov z2.h, #15 // =0xf -; CHECK-NEXT: ptrue p0.h, vl8 -; CHECK-NEXT: smax z0.h, p0/m, z0.h, z2.h -; CHECK-NEXT: smax z1.h, p0/m, z1.h, z2.h +; CHECK-NEXT: smax z0.h, z0.h, #15 +; CHECK-NEXT: smax z1.h, z1.h, #15 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <16 x i16>, ptr %a @@ -633,10 +609,8 @@ define void @smax_v8i32(ptr %a) #0 { ; CHECK-LABEL: smax_v8i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: mov z2.s, #31 // =0x1f -; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: smax z0.s, p0/m, z0.s, z2.s -; CHECK-NEXT: smax z1.s, p0/m, z1.s, z2.s +; CHECK-NEXT: smax z0.s, z0.s, #31 +; CHECK-NEXT: smax z1.s, z1.s, #31 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <8 x i32>, ptr %a @@ -651,10 +625,8 @@ define void @smax_v4i64(ptr %a) #0 { ; CHECK-LABEL: smax_v4i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: mov z2.d, #63 // =0x3f -; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: smax z0.d, p0/m, z0.d, z2.d -; CHECK-NEXT: smax z1.d, p0/m, z1.d, z2.d +; CHECK-NEXT: smax z0.d, z0.d, #63 +; CHECK-NEXT: smax z1.d, z1.d, #63 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <4 x i64>, ptr %a @@ -673,10 +645,8 @@ define void @smin_v32i8(ptr %a) #0 { ; CHECK-LABEL: smin_v32i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: mov z2.b, #7 // =0x7 -; CHECK-NEXT: ptrue p0.b, vl16 -; CHECK-NEXT: smin z0.b, p0/m, z0.b, z2.b -; CHECK-NEXT: smin z1.b, p0/m, z1.b, z2.b +; CHECK-NEXT: smin z0.b, z0.b, #7 +; CHECK-NEXT: smin z1.b, z1.b, #7 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <32 x i8>, ptr %a @@ -691,10 +661,8 @@ define void @smin_v16i16(ptr %a) #0 { ; CHECK-LABEL: smin_v16i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: mov z2.h, #15 // =0xf -; CHECK-NEXT: ptrue p0.h, vl8 -; CHECK-NEXT: smin z0.h, p0/m, z0.h, z2.h -; CHECK-NEXT: smin z1.h, p0/m, z1.h, z2.h +; CHECK-NEXT: smin z0.h, z0.h, #15 +; CHECK-NEXT: smin z1.h, z1.h, #15 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <16 x i16>, ptr %a @@ -709,10 +677,8 @@ define void @smin_v8i32(ptr %a) #0 { ; CHECK-LABEL: smin_v8i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: mov z2.s, #31 // =0x1f -; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: smin z0.s, p0/m, z0.s, z2.s -; CHECK-NEXT: smin z1.s, p0/m, z1.s, z2.s +; CHECK-NEXT: smin z0.s, z0.s, #31 +; CHECK-NEXT: smin z1.s, z1.s, #31 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <8 x i32>, ptr %a @@ -727,10 +693,8 @@ define void @smin_v4i64(ptr %a) #0 { ; CHECK-LABEL: smin_v4i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: mov z2.d, #63 // =0x3f -; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: smin z0.d, p0/m, z0.d, z2.d -; CHECK-NEXT: smin z1.d, p0/m, z1.d, z2.d +; CHECK-NEXT: smin z0.d, z0.d, #63 +; CHECK-NEXT: smin z1.d, z1.d, #63 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <4 x i64>, ptr %a @@ -817,10 +781,8 @@ define void @umax_v32i8(ptr %a) #0 { ; CHECK-LABEL: umax_v32i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: mov z2.b, #7 // =0x7 -; CHECK-NEXT: ptrue p0.b, vl16 -; CHECK-NEXT: umax z0.b, p0/m, z0.b, z2.b -; CHECK-NEXT: umax z1.b, p0/m, z1.b, z2.b +; CHECK-NEXT: umax z0.b, z0.b, #7 +; CHECK-NEXT: umax z1.b, z1.b, #7 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <32 x i8>, ptr %a @@ -835,10 +797,8 @@ define void @umax_v16i16(ptr %a) #0 { ; CHECK-LABEL: umax_v16i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: mov z2.h, #15 // =0xf -; CHECK-NEXT: ptrue p0.h, vl8 -; CHECK-NEXT: umax z0.h, p0/m, z0.h, z2.h -; CHECK-NEXT: umax z1.h, p0/m, z1.h, z2.h +; CHECK-NEXT: umax z0.h, z0.h, #15 +; CHECK-NEXT: umax z1.h, z1.h, #15 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <16 x i16>, ptr %a @@ -853,10 +813,8 @@ define void @umax_v8i32(ptr %a) #0 { ; CHECK-LABEL: umax_v8i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: mov z2.s, #31 // =0x1f -; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: umax z0.s, p0/m, z0.s, z2.s -; CHECK-NEXT: umax z1.s, p0/m, z1.s, z2.s +; CHECK-NEXT: umax z0.s, z0.s, #31 +; CHECK-NEXT: umax z1.s, z1.s, #31 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <8 x i32>, ptr %a @@ -871,10 +829,8 @@ define void @umax_v4i64(ptr %a) #0 { ; CHECK-LABEL: umax_v4i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: mov z2.d, #63 // =0x3f -; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: umax z0.d, p0/m, z0.d, z2.d -; CHECK-NEXT: umax z1.d, p0/m, z1.d, z2.d +; CHECK-NEXT: umax z0.d, z0.d, #63 +; CHECK-NEXT: umax z1.d, z1.d, #63 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <4 x i64>, ptr %a @@ -893,10 +849,8 @@ define void @umin_v32i8(ptr %a) #0 { ; CHECK-LABEL: umin_v32i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: mov z2.b, #7 // =0x7 -; CHECK-NEXT: ptrue p0.b, vl16 -; CHECK-NEXT: umin z0.b, p0/m, z0.b, z2.b -; CHECK-NEXT: umin z1.b, p0/m, z1.b, z2.b +; CHECK-NEXT: umin z0.b, z0.b, #7 +; CHECK-NEXT: umin z1.b, z1.b, #7 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <32 x i8>, ptr %a @@ -911,10 +865,8 @@ define void @umin_v16i16(ptr %a) #0 { ; CHECK-LABEL: umin_v16i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: mov z2.h, #15 // =0xf -; CHECK-NEXT: ptrue p0.h, vl8 -; CHECK-NEXT: umin z0.h, p0/m, z0.h, z2.h -; CHECK-NEXT: umin z1.h, p0/m, z1.h, z2.h +; CHECK-NEXT: umin z0.h, z0.h, #15 +; CHECK-NEXT: umin z1.h, z1.h, #15 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <16 x i16>, ptr %a @@ -929,10 +881,8 @@ define void @umin_v8i32(ptr %a) #0 { ; CHECK-LABEL: umin_v8i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: mov z2.s, #31 // =0x1f -; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: umin z0.s, p0/m, z0.s, z2.s -; CHECK-NEXT: umin z1.s, p0/m, z1.s, z2.s +; CHECK-NEXT: umin z0.s, z0.s, #31 +; CHECK-NEXT: umin z1.s, z1.s, #31 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <8 x i32>, ptr %a @@ -947,10 +897,8 @@ define void @umin_v4i64(ptr %a) #0 { ; CHECK-LABEL: umin_v4i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: mov z2.d, #63 // =0x3f -; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: umin z0.d, p0/m, z0.d, z2.d -; CHECK-NEXT: umin z1.d, p0/m, z1.d, z2.d +; CHECK-NEXT: umin z0.d, z0.d, #63 +; CHECK-NEXT: umin z1.d, z1.d, #63 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <4 x i64>, ptr %a diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll index c560516..38b8b40 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll @@ -16,12 +16,12 @@ define <4 x i8> @smulh_v4i8(<4 x i8> %op1, <4 x i8> %op2) #0 { ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #8 -; CHECK-NEXT: lsl z1.h, p0/m, z1.h, #8 -; CHECK-NEXT: asr z0.h, p0/m, z0.h, #8 -; CHECK-NEXT: asr z1.h, p0/m, z1.h, #8 +; CHECK-NEXT: lsl z0.h, z0.h, #8 +; CHECK-NEXT: lsl z1.h, z1.h, #8 +; CHECK-NEXT: asr z0.h, z0.h, #8 +; CHECK-NEXT: asr z1.h, z1.h, #8 ; CHECK-NEXT: mul z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: lsr z0.h, p0/m, z0.h, #4 +; CHECK-NEXT: lsr z0.h, z0.h, #4 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret %insert = insertelement <4 x i16> undef, i16 4, i64 0 @@ -94,10 +94,10 @@ define void @smulh_v32i8(ptr %a, ptr %b) #0 { ; CHECK-NEXT: mul z2.h, p0/m, z2.h, z7.h ; CHECK-NEXT: movprfx z3, z4 ; CHECK-NEXT: mul z3.h, p0/m, z3.h, z6.h -; CHECK-NEXT: lsr z1.h, p0/m, z1.h, #8 -; CHECK-NEXT: lsr z3.h, p0/m, z3.h, #8 -; CHECK-NEXT: lsr z2.h, p0/m, z2.h, #8 -; CHECK-NEXT: lsr z0.h, p0/m, z0.h, #8 +; CHECK-NEXT: lsr z1.h, z1.h, #8 +; CHECK-NEXT: lsr z3.h, z3.h, #8 +; CHECK-NEXT: lsr z2.h, z2.h, #8 +; CHECK-NEXT: lsr z0.h, z0.h, #8 ; CHECK-NEXT: ptrue p0.b, vl8 ; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b ; CHECK-NEXT: uzp1 z1.b, z1.b, z1.b @@ -124,12 +124,12 @@ define <2 x i16> @smulh_v2i16(<2 x i16> %op1, <2 x i16> %op2) #0 { ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 -; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #16 -; CHECK-NEXT: lsl z1.s, p0/m, z1.s, #16 -; CHECK-NEXT: asr z0.s, p0/m, z0.s, #16 -; CHECK-NEXT: asr z1.s, p0/m, z1.s, #16 +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: lsl z1.s, z1.s, #16 +; CHECK-NEXT: asr z0.s, z0.s, #16 +; CHECK-NEXT: asr z1.s, z1.s, #16 ; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: lsr z0.s, p0/m, z0.s, #16 +; CHECK-NEXT: lsr z0.s, z0.s, #16 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret %1 = sext <2 x i16> %op1 to <2 x i32> @@ -366,7 +366,7 @@ define <4 x i8> @umulh_v4i8(<4 x i8> %op1, <4 x i8> %op2) #0 { ; CHECK-NEXT: and z0.h, z0.h, #0xff ; CHECK-NEXT: and z1.h, z1.h, #0xff ; CHECK-NEXT: mul z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: lsr z0.h, p0/m, z0.h, #4 +; CHECK-NEXT: lsr z0.h, z0.h, #4 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret %1 = zext <4 x i8> %op1 to <4 x i16> @@ -435,10 +435,10 @@ define void @umulh_v32i8(ptr %a, ptr %b) #0 { ; CHECK-NEXT: mul z2.h, p0/m, z2.h, z7.h ; CHECK-NEXT: movprfx z3, z4 ; CHECK-NEXT: mul z3.h, p0/m, z3.h, z6.h -; CHECK-NEXT: lsr z1.h, p0/m, z1.h, #8 -; CHECK-NEXT: lsr z3.h, p0/m, z3.h, #8 -; CHECK-NEXT: lsr z2.h, p0/m, z2.h, #8 -; CHECK-NEXT: lsr z0.h, p0/m, z0.h, #8 +; CHECK-NEXT: lsr z1.h, z1.h, #8 +; CHECK-NEXT: lsr z3.h, z3.h, #8 +; CHECK-NEXT: lsr z2.h, z2.h, #8 +; CHECK-NEXT: lsr z0.h, z0.h, #8 ; CHECK-NEXT: ptrue p0.b, vl8 ; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b ; CHECK-NEXT: uzp1 z1.b, z1.b, z1.b @@ -468,7 +468,7 @@ define <2 x i16> @umulh_v2i16(<2 x i16> %op1, <2 x i16> %op2) #0 { ; CHECK-NEXT: and z0.s, z0.s, #0xffff ; CHECK-NEXT: and z1.s, z1.s, #0xffff ; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: lsr z0.s, p0/m, z0.s, #16 +; CHECK-NEXT: lsr z0.s, z0.s, #16 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret %1 = zext <2 x i16> %op1 to <2 x i32> diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll index 21b4230..8cecd75 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll @@ -12,15 +12,15 @@ define <4 x i8> @srem_v4i8(<4 x i8> %op1, <4 x i8> %op2) #0 { ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: ptrue p1.s, vl4 -; CHECK-NEXT: lsl z1.h, p0/m, z1.h, #8 -; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #8 -; CHECK-NEXT: asr z1.h, p0/m, z1.h, #8 -; CHECK-NEXT: asr z0.h, p0/m, z0.h, #8 +; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: lsl z0.h, z0.h, #8 +; CHECK-NEXT: lsl z1.h, z1.h, #8 +; CHECK-NEXT: asr z0.h, z0.h, #8 +; CHECK-NEXT: asr z1.h, z1.h, #8 ; CHECK-NEXT: sunpklo z2.s, z1.h ; CHECK-NEXT: sunpklo z3.s, z0.h -; CHECK-NEXT: sdivr z2.s, p1/m, z2.s, z3.s +; CHECK-NEXT: sdivr z2.s, p0/m, z2.s, z3.s +; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h ; CHECK-NEXT: mls z0.h, p0/m, z2.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll index 6ae31cd..4fc3699 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll @@ -11,11 +11,11 @@ define <4 x i8> @ashr_v4i8(<4 x i8> %op1, <4 x i8> %op2) #0 { ; CHECK-LABEL: ashr_v4i8: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 -; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #8 +; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: lsl z0.h, z0.h, #8 ; CHECK-NEXT: and z1.h, z1.h, #0xff -; CHECK-NEXT: asr z0.h, p0/m, z0.h, #8 +; CHECK-NEXT: asr z0.h, z0.h, #8 ; CHECK-NEXT: asr z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -70,11 +70,11 @@ define <2 x i16> @ashr_v2i16(<2 x i16> %op1, <2 x i16> %op2) #0 { ; CHECK-LABEL: ashr_v2i16: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: ptrue p0.s, vl2 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 -; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #16 +; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: lsl z0.s, z0.s, #16 ; CHECK-NEXT: and z1.s, z1.s, #0xffff -; CHECK-NEXT: asr z0.s, p0/m, z0.s, #16 +; CHECK-NEXT: asr z0.s, z0.s, #16 ; CHECK-NEXT: asr z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll index ab0c348..e6257c7 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll @@ -642,8 +642,8 @@ define <2 x float> @scvtf_v2i16_v2f32(<2 x i16> %op1) #0 { ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 -; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #16 -; CHECK-NEXT: asr z0.s, p0/m, z0.s, #16 +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: asr z0.s, z0.s, #16 ; CHECK-NEXT: scvtf z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -715,10 +715,9 @@ define <2 x double> @scvtf_v2i16_v2f64(<2 x i16> %op1) #0 { ; CHECK-LABEL: scvtf_v2i16_v2f64: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: ptrue p0.s, vl2 -; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #16 -; CHECK-NEXT: asr z0.s, p0/m, z0.s, #16 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: asr z0.s, z0.s, #16 ; CHECK-NEXT: sunpklo z0.d, z0.s ; CHECK-NEXT: scvtf z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll index 4f36876..e9e96cc 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll @@ -7,11 +7,10 @@ define <4 x i8> @select_v4i8(<4 x i8> %op1, <4 x i8> %op2, <4 x i1> %mask) #0 { ; CHECK-LABEL: select_v4i8: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2 -; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: lsl z2.h, p0/m, z2.h, #15 -; CHECK-NEXT: asr z2.h, p0/m, z2.h, #15 +; CHECK-NEXT: lsl z2.h, z2.h, #15 +; CHECK-NEXT: asr z2.h, z2.h, #15 ; CHECK-NEXT: bic z1.d, z1.d, z2.d ; CHECK-NEXT: and z0.d, z0.d, z2.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d @@ -25,11 +24,10 @@ define <8 x i8> @select_v8i8(<8 x i8> %op1, <8 x i8> %op2, <8 x i1> %mask) #0 { ; CHECK-LABEL: select_v8i8: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2 -; CHECK-NEXT: ptrue p0.b, vl8 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: lsl z2.b, p0/m, z2.b, #7 -; CHECK-NEXT: asr z2.b, p0/m, z2.b, #7 +; CHECK-NEXT: lsl z2.b, z2.b, #7 +; CHECK-NEXT: asr z2.b, z2.b, #7 ; CHECK-NEXT: bic z1.d, z1.d, z2.d ; CHECK-NEXT: and z0.d, z0.d, z2.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d @@ -43,11 +41,10 @@ define <16 x i8> @select_v16i8(<16 x i8> %op1, <16 x i8> %op2, <16 x i1> %mask) ; CHECK-LABEL: select_v16i8: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2 -; CHECK-NEXT: ptrue p0.b, vl16 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 -; CHECK-NEXT: lsl z2.b, p0/m, z2.b, #7 -; CHECK-NEXT: asr z2.b, p0/m, z2.b, #7 +; CHECK-NEXT: lsl z2.b, z2.b, #7 +; CHECK-NEXT: asr z2.b, z2.b, #7 ; CHECK-NEXT: bic z1.d, z1.d, z2.d ; CHECK-NEXT: and z0.d, z0.d, z2.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d @@ -87,11 +84,10 @@ define <2 x i16> @select_v2i16(<2 x i16> %op1, <2 x i16> %op2, <2 x i1> %mask) # ; CHECK-LABEL: select_v2i16: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2 -; CHECK-NEXT: ptrue p0.s, vl2 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: lsl z2.s, p0/m, z2.s, #31 -; CHECK-NEXT: asr z2.s, p0/m, z2.s, #31 +; CHECK-NEXT: lsl z2.s, z2.s, #31 +; CHECK-NEXT: asr z2.s, z2.s, #31 ; CHECK-NEXT: bic z1.d, z1.d, z2.d ; CHECK-NEXT: and z0.d, z0.d, z2.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d @@ -105,11 +101,10 @@ define <4 x i16> @select_v4i16(<4 x i16> %op1, <4 x i16> %op2, <4 x i1> %mask) # ; CHECK-LABEL: select_v4i16: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2 -; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: lsl z2.h, p0/m, z2.h, #15 -; CHECK-NEXT: asr z2.h, p0/m, z2.h, #15 +; CHECK-NEXT: lsl z2.h, z2.h, #15 +; CHECK-NEXT: asr z2.h, z2.h, #15 ; CHECK-NEXT: bic z1.d, z1.d, z2.d ; CHECK-NEXT: and z0.d, z0.d, z2.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d @@ -123,12 +118,11 @@ define <8 x i16> @select_v8i16(<8 x i16> %op1, <8 x i16> %op2, <8 x i1> %mask) # ; CHECK-LABEL: select_v8i16: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2 -; CHECK-NEXT: ptrue p0.h, vl8 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: uunpklo z2.h, z2.b -; CHECK-NEXT: lsl z2.h, p0/m, z2.h, #15 -; CHECK-NEXT: asr z2.h, p0/m, z2.h, #15 +; CHECK-NEXT: lsl z2.h, z2.h, #15 +; CHECK-NEXT: asr z2.h, z2.h, #15 ; CHECK-NEXT: bic z1.d, z1.d, z2.d ; CHECK-NEXT: and z0.d, z0.d, z2.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d @@ -168,11 +162,10 @@ define <2 x i32> @select_v2i32(<2 x i32> %op1, <2 x i32> %op2, <2 x i1> %mask) # ; CHECK-LABEL: select_v2i32: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2 -; CHECK-NEXT: ptrue p0.s, vl2 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: lsl z2.s, p0/m, z2.s, #31 -; CHECK-NEXT: asr z2.s, p0/m, z2.s, #31 +; CHECK-NEXT: lsl z2.s, z2.s, #31 +; CHECK-NEXT: asr z2.s, z2.s, #31 ; CHECK-NEXT: bic z1.d, z1.d, z2.d ; CHECK-NEXT: and z0.d, z0.d, z2.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d @@ -186,12 +179,11 @@ define <4 x i32> @select_v4i32(<4 x i32> %op1, <4 x i32> %op2, <4 x i1> %mask) # ; CHECK-LABEL: select_v4i32: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2 -; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: uunpklo z2.s, z2.h -; CHECK-NEXT: lsl z2.s, p0/m, z2.s, #31 -; CHECK-NEXT: asr z2.s, p0/m, z2.s, #31 +; CHECK-NEXT: lsl z2.s, z2.s, #31 +; CHECK-NEXT: asr z2.s, z2.s, #31 ; CHECK-NEXT: bic z1.d, z1.d, z2.d ; CHECK-NEXT: and z0.d, z0.d, z2.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d @@ -250,12 +242,11 @@ define <2 x i64> @select_v2i64(<2 x i64> %op1, <2 x i64> %op2, <2 x i1> %mask) # ; CHECK-LABEL: select_v2i64: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2 -; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: uunpklo z2.d, z2.s -; CHECK-NEXT: lsl z2.d, p0/m, z2.d, #63 -; CHECK-NEXT: asr z2.d, p0/m, z2.d, #63 +; CHECK-NEXT: lsl z2.d, z2.d, #63 +; CHECK-NEXT: asr z2.d, z2.d, #63 ; CHECK-NEXT: bic z1.d, z1.d, z2.d ; CHECK-NEXT: and z0.d, z0.d, z2.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll index b417276..26b72b5 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll @@ -12,8 +12,8 @@ define <4 x i8> @masked_load_v4i8(ptr %src, <4 x i1> %mask) #0 { ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #15 -; CHECK-NEXT: asr z0.h, p0/m, z0.h, #15 +; CHECK-NEXT: lsl z0.h, z0.h, #15 +; CHECK-NEXT: asr z0.h, z0.h, #15 ; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0 ; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -27,8 +27,8 @@ define <8 x i8> @masked_load_v8i8(ptr %src, <8 x i1> %mask) #0 { ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl8 -; CHECK-NEXT: lsl z0.b, p0/m, z0.b, #7 -; CHECK-NEXT: asr z0.b, p0/m, z0.b, #7 +; CHECK-NEXT: lsl z0.b, z0.b, #7 +; CHECK-NEXT: asr z0.b, z0.b, #7 ; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -42,8 +42,8 @@ define <16 x i8> @masked_load_v16i8(ptr %src, <16 x i1> %mask) #0 { ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl16 -; CHECK-NEXT: lsl z0.b, p0/m, z0.b, #7 -; CHECK-NEXT: asr z0.b, p0/m, z0.b, #7 +; CHECK-NEXT: lsl z0.b, z0.b, #7 +; CHECK-NEXT: asr z0.b, z0.b, #7 ; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -117,10 +117,10 @@ define <32 x i8> @masked_load_v32i8(ptr %src, <32 x i1> %mask) #0 { ; CHECK-NEXT: strb w2, [sp, #1] ; CHECK-NEXT: strb w1, [sp] ; CHECK-NEXT: ldp q1, q0, [sp] -; CHECK-NEXT: lsl z1.b, p0/m, z1.b, #7 -; CHECK-NEXT: asr z1.b, p0/m, z1.b, #7 -; CHECK-NEXT: lsl z0.b, p0/m, z0.b, #7 -; CHECK-NEXT: asr z0.b, p0/m, z0.b, #7 +; CHECK-NEXT: lsl z1.b, z1.b, #7 +; CHECK-NEXT: asr z1.b, z1.b, #7 +; CHECK-NEXT: lsl z0.b, z0.b, #7 +; CHECK-NEXT: asr z0.b, z0.b, #7 ; CHECK-NEXT: cmpne p1.b, p0/z, z0.b, #0 ; CHECK-NEXT: cmpne p0.b, p0/z, z1.b, #0 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] @@ -147,8 +147,8 @@ define <2 x half> @masked_load_v2f16(ptr %src, <2 x i1> %mask) #0 { ; CHECK-NEXT: strh w8, [sp, #8] ; CHECK-NEXT: strh w9, [sp, #10] ; CHECK-NEXT: ldr d0, [sp, #8] -; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #15 -; CHECK-NEXT: asr z0.h, p0/m, z0.h, #15 +; CHECK-NEXT: lsl z0.h, z0.h, #15 +; CHECK-NEXT: asr z0.h, z0.h, #15 ; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -163,8 +163,8 @@ define <4 x half> @masked_load_v4f16(ptr %src, <4 x i1> %mask) #0 { ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #15 -; CHECK-NEXT: asr z0.h, p0/m, z0.h, #15 +; CHECK-NEXT: lsl z0.h, z0.h, #15 +; CHECK-NEXT: asr z0.h, z0.h, #15 ; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -179,8 +179,8 @@ define <8 x half> @masked_load_v8f16(ptr %src, <8 x i1> %mask) #0 { ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl8 ; CHECK-NEXT: uunpklo z0.h, z0.b -; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #15 -; CHECK-NEXT: asr z0.h, p0/m, z0.h, #15 +; CHECK-NEXT: lsl z0.h, z0.h, #15 +; CHECK-NEXT: asr z0.h, z0.h, #15 ; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -194,14 +194,14 @@ define <16 x half> @masked_load_v16f16(ptr %src, <16 x i1> %mask) #0 { ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: uunpklo z1.h, z0.b -; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: mov x8, #8 ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 -; CHECK-NEXT: lsl z1.h, p0/m, z1.h, #15 +; CHECK-NEXT: lsl z1.h, z1.h, #15 ; CHECK-NEXT: uunpklo z0.h, z0.b -; CHECK-NEXT: mov x8, #8 -; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #15 -; CHECK-NEXT: asr z1.h, p0/m, z1.h, #15 -; CHECK-NEXT: asr z0.h, p0/m, z0.h, #15 +; CHECK-NEXT: asr z1.h, z1.h, #15 +; CHECK-NEXT: lsl z0.h, z0.h, #15 +; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: asr z0.h, z0.h, #15 ; CHECK-NEXT: cmpne p1.h, p0/z, z1.h, #0 ; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0 ; CHECK-NEXT: ld1h { z0.h }, p1/z, [x0] @@ -218,8 +218,8 @@ define <2 x float> @masked_load_v2f32(ptr %src, <2 x i1> %mask) #0 { ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 -; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #31 -; CHECK-NEXT: asr z0.s, p0/m, z0.s, #31 +; CHECK-NEXT: lsl z0.s, z0.s, #31 +; CHECK-NEXT: asr z0.s, z0.s, #31 ; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -234,8 +234,8 @@ define <4 x float> @masked_load_v4f32(ptr %src, <4 x i1> %mask) #0 { ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: uunpklo z0.s, z0.h -; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #31 -; CHECK-NEXT: asr z0.s, p0/m, z0.s, #31 +; CHECK-NEXT: lsl z0.s, z0.s, #31 +; CHECK-NEXT: asr z0.s, z0.s, #31 ; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -277,10 +277,10 @@ define <8 x float> @masked_load_v8f32(ptr %src, <8 x i1> %mask) #0 { ; CHECK-NEXT: ldp d0, d1, [sp] ; CHECK-NEXT: uunpklo z0.s, z0.h ; CHECK-NEXT: uunpklo z1.s, z1.h -; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #31 -; CHECK-NEXT: lsl z1.s, p0/m, z1.s, #31 -; CHECK-NEXT: asr z0.s, p0/m, z0.s, #31 -; CHECK-NEXT: asr z1.s, p0/m, z1.s, #31 +; CHECK-NEXT: lsl z0.s, z0.s, #31 +; CHECK-NEXT: lsl z1.s, z1.s, #31 +; CHECK-NEXT: asr z0.s, z0.s, #31 +; CHECK-NEXT: asr z1.s, z1.s, #31 ; CHECK-NEXT: cmpne p1.s, p0/z, z0.s, #0 ; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0 ; CHECK-NEXT: ld1w { z0.s }, p1/z, [x0] @@ -299,8 +299,8 @@ define <2 x double> @masked_load_v2f64(ptr %src, <2 x i1> %mask) #0 { ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: uunpklo z0.d, z0.s -; CHECK-NEXT: lsl z0.d, p0/m, z0.d, #63 -; CHECK-NEXT: asr z0.d, p0/m, z0.d, #63 +; CHECK-NEXT: lsl z0.d, z0.d, #63 +; CHECK-NEXT: asr z0.d, z0.d, #63 ; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -313,16 +313,16 @@ define <4 x double> @masked_load_v4f64(ptr %src, <4 x i1> %mask) #0 { ; CHECK-LABEL: masked_load_v4f64: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: mov x8, #2 +; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: uunpklo z0.s, z0.h ; CHECK-NEXT: uunpklo z1.d, z0.s ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 ; CHECK-NEXT: uunpklo z0.d, z0.s -; CHECK-NEXT: lsl z1.d, p0/m, z1.d, #63 -; CHECK-NEXT: lsl z0.d, p0/m, z0.d, #63 -; CHECK-NEXT: asr z1.d, p0/m, z1.d, #63 -; CHECK-NEXT: asr z0.d, p0/m, z0.d, #63 +; CHECK-NEXT: lsl z1.d, z1.d, #63 +; CHECK-NEXT: lsl z0.d, z0.d, #63 +; CHECK-NEXT: asr z1.d, z1.d, #63 +; CHECK-NEXT: asr z0.d, z0.d, #63 ; CHECK-NEXT: cmpne p1.d, p0/z, z1.d, #0 ; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 ; CHECK-NEXT: ld1d { z0.d }, p1/z, [x0] diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-store.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-store.ll index 32b8112..cefa8e2 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-store.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-store.ll @@ -12,8 +12,8 @@ define void @masked_store_v4i8(ptr %dst, <4 x i1> %mask) #0 { ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #15 -; CHECK-NEXT: asr z0.h, p0/m, z0.h, #15 +; CHECK-NEXT: lsl z0.h, z0.h, #15 +; CHECK-NEXT: asr z0.h, z0.h, #15 ; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0 ; CHECK-NEXT: mov z0.h, #0 // =0x0 ; CHECK-NEXT: st1b { z0.h }, p0, [x0] @@ -27,8 +27,8 @@ define void @masked_store_v8i8(ptr %dst, <8 x i1> %mask) #0 { ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl8 -; CHECK-NEXT: lsl z0.b, p0/m, z0.b, #7 -; CHECK-NEXT: asr z0.b, p0/m, z0.b, #7 +; CHECK-NEXT: lsl z0.b, z0.b, #7 +; CHECK-NEXT: asr z0.b, z0.b, #7 ; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0 ; CHECK-NEXT: mov z0.b, #0 // =0x0 ; CHECK-NEXT: st1b { z0.b }, p0, [x0] @@ -42,8 +42,8 @@ define void @masked_store_v16i8(ptr %dst, <16 x i1> %mask) #0 { ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl16 -; CHECK-NEXT: lsl z0.b, p0/m, z0.b, #7 -; CHECK-NEXT: asr z0.b, p0/m, z0.b, #7 +; CHECK-NEXT: lsl z0.b, z0.b, #7 +; CHECK-NEXT: asr z0.b, z0.b, #7 ; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0 ; CHECK-NEXT: mov z0.b, #0 // =0x0 ; CHECK-NEXT: st1b { z0.b }, p0, [x0] @@ -117,12 +117,11 @@ define void @masked_store_v32i8(ptr %dst, <32 x i1> %mask) #0 { ; CHECK-NEXT: strb w8, [sp, #16] ; CHECK-NEXT: mov w8, #16 ; CHECK-NEXT: ldp q0, q1, [sp] -; CHECK-NEXT: lsl z0.b, p0/m, z0.b, #7 -; CHECK-NEXT: asr z0.b, p0/m, z0.b, #7 -; CHECK-NEXT: lsl z1.b, p0/m, z1.b, #7 +; CHECK-NEXT: lsl z0.b, z0.b, #7 +; CHECK-NEXT: asr z0.b, z0.b, #7 +; CHECK-NEXT: lsl z1.b, z1.b, #7 ; CHECK-NEXT: cmpne p1.b, p0/z, z0.b, #0 -; CHECK-NEXT: movprfx z0, z1 -; CHECK-NEXT: asr z0.b, p0/m, z0.b, #7 +; CHECK-NEXT: asr z0.b, z1.b, #7 ; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0 ; CHECK-NEXT: mov z0.b, #0 // =0x0 ; CHECK-NEXT: st1b { z0.b }, p0, [x0, x8] @@ -147,8 +146,8 @@ define void @masked_store_v2f16(ptr %dst, <2 x i1> %mask) #0 { ; CHECK-NEXT: strh w8, [sp, #8] ; CHECK-NEXT: strh w9, [sp, #10] ; CHECK-NEXT: ldr d0, [sp, #8] -; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #15 -; CHECK-NEXT: asr z0.h, p0/m, z0.h, #15 +; CHECK-NEXT: lsl z0.h, z0.h, #15 +; CHECK-NEXT: asr z0.h, z0.h, #15 ; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0 ; CHECK-NEXT: mov z0.h, #0 // =0x0 ; CHECK-NEXT: st1h { z0.h }, p0, [x0] @@ -163,8 +162,8 @@ define void @masked_store_v4f16(ptr %dst, <4 x i1> %mask) #0 { ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #15 -; CHECK-NEXT: asr z0.h, p0/m, z0.h, #15 +; CHECK-NEXT: lsl z0.h, z0.h, #15 +; CHECK-NEXT: asr z0.h, z0.h, #15 ; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0 ; CHECK-NEXT: mov z0.h, #0 // =0x0 ; CHECK-NEXT: st1h { z0.h }, p0, [x0] @@ -179,8 +178,8 @@ define void @masked_store_v8f16(ptr %dst, <8 x i1> %mask) #0 { ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl8 ; CHECK-NEXT: uunpklo z0.h, z0.b -; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #15 -; CHECK-NEXT: asr z0.h, p0/m, z0.h, #15 +; CHECK-NEXT: lsl z0.h, z0.h, #15 +; CHECK-NEXT: asr z0.h, z0.h, #15 ; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0 ; CHECK-NEXT: mov z0.h, #0 // =0x0 ; CHECK-NEXT: st1h { z0.h }, p0, [x0] @@ -194,15 +193,15 @@ define void @masked_store_v16f16(ptr %dst, <16 x i1> %mask) #0 { ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: mov z1.d, z0.d -; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: mov x8, #8 ; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8 ; CHECK-NEXT: uunpklo z0.h, z0.b ; CHECK-NEXT: uunpklo z1.h, z1.b -; CHECK-NEXT: mov x8, #8 -; CHECK-NEXT: lsl z1.h, p0/m, z1.h, #15 -; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #15 -; CHECK-NEXT: asr z1.h, p0/m, z1.h, #15 -; CHECK-NEXT: asr z0.h, p0/m, z0.h, #15 +; CHECK-NEXT: lsl z0.h, z0.h, #15 +; CHECK-NEXT: lsl z1.h, z1.h, #15 +; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: asr z1.h, z1.h, #15 +; CHECK-NEXT: asr z0.h, z0.h, #15 ; CHECK-NEXT: cmpne p1.h, p0/z, z1.h, #0 ; CHECK-NEXT: mov z1.h, #0 // =0x0 ; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0 @@ -219,8 +218,8 @@ define void @masked_store_v4f32(ptr %dst, <4 x i1> %mask) #0 { ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: uunpklo z0.s, z0.h -; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #31 -; CHECK-NEXT: asr z0.s, p0/m, z0.s, #31 +; CHECK-NEXT: lsl z0.s, z0.s, #31 +; CHECK-NEXT: asr z0.s, z0.s, #31 ; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0 ; CHECK-NEXT: mov z0.s, #0 // =0x0 ; CHECK-NEXT: st1w { z0.s }, p0, [x0] @@ -237,40 +236,40 @@ define void @masked_store_v8f32(ptr %dst, <8 x i1> %mask) #0 { ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: mov z1.b, z0.b[7] ; CHECK-NEXT: mov z2.b, z0.b[6] -; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: fmov w9, s1 ; CHECK-NEXT: mov z1.b, z0.b[5] -; CHECK-NEXT: fmov w9, s2 +; CHECK-NEXT: fmov w10, s2 ; CHECK-NEXT: mov z2.b, z0.b[4] -; CHECK-NEXT: fmov w10, s1 -; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: strh w8, [sp, #14] -; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: strh w9, [sp, #12] -; CHECK-NEXT: mov z2.b, z0.b[3] -; CHECK-NEXT: strh w10, [sp, #10] +; CHECK-NEXT: fmov w11, s1 ; CHECK-NEXT: mov z3.b, z0.b[2] -; CHECK-NEXT: strh w8, [sp, #8] +; CHECK-NEXT: strh w9, [sp, #14] +; CHECK-NEXT: fmov w9, s2 +; CHECK-NEXT: strh w10, [sp, #12] +; CHECK-NEXT: mov z2.b, z0.b[3] +; CHECK-NEXT: strh w11, [sp, #10] ; CHECK-NEXT: mov z4.b, z0.b[1] +; CHECK-NEXT: strh w9, [sp, #8] +; CHECK-NEXT: fmov w9, s0 ; CHECK-NEXT: ldr d1, [sp, #8] -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: mov x9, #4 +; CHECK-NEXT: mov x8, #4 +; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: fmov w10, s2 ; CHECK-NEXT: uunpklo z0.s, z1.h -; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #31 -; CHECK-NEXT: asr z0.s, p0/m, z0.s, #31 +; CHECK-NEXT: lsl z0.s, z0.s, #31 +; CHECK-NEXT: asr z0.s, z0.s, #31 ; CHECK-NEXT: cmpne p1.s, p0/z, z0.s, #0 ; CHECK-NEXT: mov z0.s, #0 // =0x0 -; CHECK-NEXT: st1w { z0.s }, p1, [x0, x9, lsl #2] -; CHECK-NEXT: fmov w9, s3 -; CHECK-NEXT: strh w8, [sp] -; CHECK-NEXT: fmov w8, s4 +; CHECK-NEXT: st1w { z0.s }, p1, [x0, x8, lsl #2] +; CHECK-NEXT: fmov w8, s3 +; CHECK-NEXT: strh w9, [sp] +; CHECK-NEXT: fmov w9, s4 ; CHECK-NEXT: strh w10, [sp, #6] -; CHECK-NEXT: strh w9, [sp, #4] -; CHECK-NEXT: strh w8, [sp, #2] +; CHECK-NEXT: strh w8, [sp, #4] +; CHECK-NEXT: strh w9, [sp, #2] ; CHECK-NEXT: ldr d1, [sp] ; CHECK-NEXT: uunpklo z1.s, z1.h -; CHECK-NEXT: lsl z1.s, p0/m, z1.s, #31 -; CHECK-NEXT: asr z1.s, p0/m, z1.s, #31 +; CHECK-NEXT: lsl z1.s, z1.s, #31 +; CHECK-NEXT: asr z1.s, z1.s, #31 ; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0 ; CHECK-NEXT: st1w { z0.s }, p0, [x0] ; CHECK-NEXT: add sp, sp, #16 @@ -285,8 +284,8 @@ define void @masked_store_v2f64(ptr %dst, <2 x i1> %mask) #0 { ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: uunpklo z0.d, z0.s -; CHECK-NEXT: lsl z0.d, p0/m, z0.d, #63 -; CHECK-NEXT: asr z0.d, p0/m, z0.d, #63 +; CHECK-NEXT: lsl z0.d, z0.d, #63 +; CHECK-NEXT: asr z0.d, z0.d, #63 ; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 ; CHECK-NEXT: mov z0.d, #0 // =0x0 ; CHECK-NEXT: st1d { z0.d }, p0, [x0] @@ -299,16 +298,16 @@ define void @masked_store_v4f64(ptr %dst, <4 x i1> %mask) #0 { ; CHECK-LABEL: masked_store_v4f64: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: mov x8, #2 +; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: uunpklo z0.s, z0.h ; CHECK-NEXT: uunpklo z1.d, z0.s ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 ; CHECK-NEXT: uunpklo z0.d, z0.s -; CHECK-NEXT: lsl z1.d, p0/m, z1.d, #63 -; CHECK-NEXT: lsl z0.d, p0/m, z0.d, #63 -; CHECK-NEXT: asr z1.d, p0/m, z1.d, #63 -; CHECK-NEXT: asr z0.d, p0/m, z0.d, #63 +; CHECK-NEXT: lsl z1.d, z1.d, #63 +; CHECK-NEXT: lsl z0.d, z0.d, #63 +; CHECK-NEXT: asr z1.d, z1.d, #63 +; CHECK-NEXT: asr z0.d, z0.d, #63 ; CHECK-NEXT: cmpne p1.d, p0/z, z0.d, #0 ; CHECK-NEXT: mov z0.d, #0 // =0x0 ; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0 diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll index aed6f5d..03d4118 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll @@ -13,7 +13,7 @@ define <4 x i8> @bitreverse_v4i8(<4 x i8> %op) #0 { ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: rbit z0.h, p0/m, z0.h -; CHECK-NEXT: lsr z0.h, p0/m, z0.h, #8 +; CHECK-NEXT: lsr z0.h, z0.h, #8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret %res = call <4 x i8> @llvm.bitreverse.v4i8(<4 x i8> %op) @@ -65,7 +65,7 @@ define <2 x i16> @bitreverse_v2i16(<2 x i16> %op) #0 { ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 ; CHECK-NEXT: rbit z0.s, p0/m, z0.s -; CHECK-NEXT: lsr z0.s, p0/m, z0.s, #16 +; CHECK-NEXT: lsr z0.s, z0.s, #16 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret %res = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> %op) @@ -197,20 +197,16 @@ define <2 x i16> @bswap_v2i16(<2 x i16> %op) #0 { ; CHECK-LABEL: bswap_v2i16: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: ptrue p0.s, vl2 -; CHECK-NEXT: movprfx z1, z0 -; CHECK-NEXT: lsr z1.s, p0/m, z1.s, #24 -; CHECK-NEXT: movprfx z2, z0 -; CHECK-NEXT: lsr z2.s, p0/m, z2.s, #8 -; CHECK-NEXT: movprfx z3, z0 -; CHECK-NEXT: lsl z3.s, p0/m, z3.s, #24 +; CHECK-NEXT: lsr z1.s, z0.s, #24 +; CHECK-NEXT: lsr z2.s, z0.s, #8 +; CHECK-NEXT: lsl z3.s, z0.s, #24 ; CHECK-NEXT: and z0.s, z0.s, #0xff00 ; CHECK-NEXT: and z2.s, z2.s, #0xff00 -; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #8 +; CHECK-NEXT: lsl z0.s, z0.s, #8 ; CHECK-NEXT: orr z1.d, z2.d, z1.d ; CHECK-NEXT: orr z0.d, z3.d, z0.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d -; CHECK-NEXT: lsr z0.s, p0/m, z0.s, #16 +; CHECK-NEXT: lsr z0.s, z0.s, #16 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret %res = call <2 x i16> @llvm.bswap.v2i16(<2 x i16> %op) @@ -221,10 +217,8 @@ define <4 x i16> @bswap_v4i16(<4 x i16> %op) #0 { ; CHECK-LABEL: bswap_v4i16: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: movprfx z1, z0 -; CHECK-NEXT: lsr z1.h, p0/m, z1.h, #8 -; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #8 +; CHECK-NEXT: lsr z1.h, z0.h, #8 +; CHECK-NEXT: lsl z0.h, z0.h, #8 ; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -236,10 +230,8 @@ define <8 x i16> @bswap_v8i16(<8 x i16> %op) #0 { ; CHECK-LABEL: bswap_v8i16: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 -; CHECK-NEXT: ptrue p0.h, vl8 -; CHECK-NEXT: movprfx z1, z0 -; CHECK-NEXT: lsr z1.h, p0/m, z1.h, #8 -; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #8 +; CHECK-NEXT: lsr z1.h, z0.h, #8 +; CHECK-NEXT: lsl z0.h, z0.h, #8 ; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -251,14 +243,11 @@ define void @bswap_v16i16(ptr %a) #0 { ; CHECK-LABEL: bswap_v16i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q1, q0, [x0] -; CHECK-NEXT: ptrue p0.h, vl8 -; CHECK-NEXT: movprfx z2, z0 -; CHECK-NEXT: lsr z2.h, p0/m, z2.h, #8 -; CHECK-NEXT: movprfx z3, z1 -; CHECK-NEXT: lsr z3.h, p0/m, z3.h, #8 -; CHECK-NEXT: lsl z1.h, p0/m, z1.h, #8 +; CHECK-NEXT: lsr z3.h, z1.h, #8 +; CHECK-NEXT: lsl z1.h, z1.h, #8 ; CHECK-NEXT: orr z1.d, z1.d, z3.d -; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #8 +; CHECK-NEXT: lsr z2.h, z0.h, #8 +; CHECK-NEXT: lsl z0.h, z0.h, #8 ; CHECK-NEXT: orr z0.d, z0.d, z2.d ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret @@ -272,16 +261,12 @@ define <2 x i32> @bswap_v2i32(<2 x i32> %op) #0 { ; CHECK-LABEL: bswap_v2i32: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: ptrue p0.s, vl2 -; CHECK-NEXT: movprfx z1, z0 -; CHECK-NEXT: lsr z1.s, p0/m, z1.s, #24 -; CHECK-NEXT: movprfx z2, z0 -; CHECK-NEXT: lsr z2.s, p0/m, z2.s, #8 -; CHECK-NEXT: movprfx z3, z0 -; CHECK-NEXT: lsl z3.s, p0/m, z3.s, #24 +; CHECK-NEXT: lsr z1.s, z0.s, #24 +; CHECK-NEXT: lsr z2.s, z0.s, #8 +; CHECK-NEXT: lsl z3.s, z0.s, #24 ; CHECK-NEXT: and z0.s, z0.s, #0xff00 ; CHECK-NEXT: and z2.s, z2.s, #0xff00 -; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #8 +; CHECK-NEXT: lsl z0.s, z0.s, #8 ; CHECK-NEXT: orr z1.d, z2.d, z1.d ; CHECK-NEXT: orr z0.d, z3.d, z0.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d @@ -295,16 +280,12 @@ define <4 x i32> @bswap_v4i32(<4 x i32> %op) #0 { ; CHECK-LABEL: bswap_v4i32: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 -; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: movprfx z1, z0 -; CHECK-NEXT: lsr z1.s, p0/m, z1.s, #24 -; CHECK-NEXT: movprfx z2, z0 -; CHECK-NEXT: lsr z2.s, p0/m, z2.s, #8 -; CHECK-NEXT: movprfx z3, z0 -; CHECK-NEXT: lsl z3.s, p0/m, z3.s, #24 +; CHECK-NEXT: lsr z1.s, z0.s, #24 +; CHECK-NEXT: lsr z2.s, z0.s, #8 +; CHECK-NEXT: lsl z3.s, z0.s, #24 ; CHECK-NEXT: and z0.s, z0.s, #0xff00 ; CHECK-NEXT: and z2.s, z2.s, #0xff00 -; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #8 +; CHECK-NEXT: lsl z0.s, z0.s, #8 ; CHECK-NEXT: orr z1.d, z2.d, z1.d ; CHECK-NEXT: orr z0.d, z3.d, z0.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d @@ -318,27 +299,20 @@ define void @bswap_v8i32(ptr %a) #0 { ; CHECK-LABEL: bswap_v8i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q1, q0, [x0] -; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: movprfx z3, z0 -; CHECK-NEXT: lsr z3.s, p0/m, z3.s, #8 -; CHECK-NEXT: movprfx z5, z1 -; CHECK-NEXT: lsr z5.s, p0/m, z5.s, #8 -; CHECK-NEXT: movprfx z2, z0 -; CHECK-NEXT: lsr z2.s, p0/m, z2.s, #24 -; CHECK-NEXT: movprfx z4, z1 -; CHECK-NEXT: lsr z4.s, p0/m, z4.s, #24 -; CHECK-NEXT: and z3.s, z3.s, #0xff00 +; CHECK-NEXT: lsr z5.s, z1.s, #8 +; CHECK-NEXT: lsr z4.s, z1.s, #24 ; CHECK-NEXT: and z5.s, z5.s, #0xff00 -; CHECK-NEXT: orr z2.d, z3.d, z2.d -; CHECK-NEXT: movprfx z3, z0 -; CHECK-NEXT: lsl z3.s, p0/m, z3.s, #24 +; CHECK-NEXT: lsr z3.s, z0.s, #8 +; CHECK-NEXT: lsr z2.s, z0.s, #24 +; CHECK-NEXT: and z3.s, z3.s, #0xff00 ; CHECK-NEXT: orr z4.d, z5.d, z4.d -; CHECK-NEXT: movprfx z5, z1 -; CHECK-NEXT: lsl z5.s, p0/m, z5.s, #24 +; CHECK-NEXT: orr z2.d, z3.d, z2.d +; CHECK-NEXT: lsl z3.s, z0.s, #24 +; CHECK-NEXT: lsl z5.s, z1.s, #24 ; CHECK-NEXT: and z1.s, z1.s, #0xff00 ; CHECK-NEXT: and z0.s, z0.s, #0xff00 -; CHECK-NEXT: lsl z1.s, p0/m, z1.s, #8 -; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #8 +; CHECK-NEXT: lsl z1.s, z1.s, #8 +; CHECK-NEXT: lsl z0.s, z0.s, #8 ; CHECK-NEXT: orr z1.d, z5.d, z1.d ; CHECK-NEXT: orr z0.d, z3.d, z0.d ; CHECK-NEXT: orr z1.d, z1.d, z4.d @@ -355,33 +329,26 @@ define <1 x i64> @bswap_v1i64(<1 x i64> %op) #0 { ; CHECK-LABEL: bswap_v1i64: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: ptrue p0.d, vl1 -; CHECK-NEXT: movprfx z2, z0 -; CHECK-NEXT: lsr z2.d, p0/m, z2.d, #40 -; CHECK-NEXT: movprfx z1, z0 -; CHECK-NEXT: lsr z1.d, p0/m, z1.d, #56 -; CHECK-NEXT: movprfx z3, z0 -; CHECK-NEXT: lsr z3.d, p0/m, z3.d, #24 -; CHECK-NEXT: movprfx z4, z0 -; CHECK-NEXT: lsr z4.d, p0/m, z4.d, #8 -; CHECK-NEXT: mov z5.d, z0.d +; CHECK-NEXT: lsr z2.d, z0.d, #40 +; CHECK-NEXT: lsr z1.d, z0.d, #56 +; CHECK-NEXT: lsr z3.d, z0.d, #24 +; CHECK-NEXT: lsr z4.d, z0.d, #8 ; CHECK-NEXT: and z2.d, z2.d, #0xff00 -; CHECK-NEXT: and z3.d, z3.d, #0xff0000 +; CHECK-NEXT: mov z5.d, z0.d ; CHECK-NEXT: orr z1.d, z2.d, z1.d ; CHECK-NEXT: mov z2.d, z0.d +; CHECK-NEXT: and z3.d, z3.d, #0xff0000 ; CHECK-NEXT: and z4.d, z4.d, #0xff000000 -; CHECK-NEXT: and z5.d, z5.d, #0xff000000 ; CHECK-NEXT: orr z3.d, z4.d, z3.d +; CHECK-NEXT: and z5.d, z5.d, #0xff000000 ; CHECK-NEXT: and z2.d, z2.d, #0xff0000 -; CHECK-NEXT: movprfx z4, z5 -; CHECK-NEXT: lsl z4.d, p0/m, z4.d, #8 -; CHECK-NEXT: movprfx z5, z0 -; CHECK-NEXT: lsl z5.d, p0/m, z5.d, #56 +; CHECK-NEXT: lsl z4.d, z0.d, #56 ; CHECK-NEXT: and z0.d, z0.d, #0xff00 -; CHECK-NEXT: lsl z2.d, p0/m, z2.d, #24 -; CHECK-NEXT: lsl z0.d, p0/m, z0.d, #40 -; CHECK-NEXT: orr z2.d, z2.d, z4.d -; CHECK-NEXT: orr z0.d, z5.d, z0.d +; CHECK-NEXT: lsl z5.d, z5.d, #8 +; CHECK-NEXT: lsl z2.d, z2.d, #24 +; CHECK-NEXT: lsl z0.d, z0.d, #40 +; CHECK-NEXT: orr z2.d, z2.d, z5.d +; CHECK-NEXT: orr z0.d, z4.d, z0.d ; CHECK-NEXT: orr z1.d, z3.d, z1.d ; CHECK-NEXT: orr z0.d, z0.d, z2.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d @@ -395,33 +362,26 @@ define <2 x i64> @bswap_v2i64(<2 x i64> %op) #0 { ; CHECK-LABEL: bswap_v2i64: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 -; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: movprfx z2, z0 -; CHECK-NEXT: lsr z2.d, p0/m, z2.d, #40 -; CHECK-NEXT: movprfx z1, z0 -; CHECK-NEXT: lsr z1.d, p0/m, z1.d, #56 -; CHECK-NEXT: movprfx z3, z0 -; CHECK-NEXT: lsr z3.d, p0/m, z3.d, #24 -; CHECK-NEXT: movprfx z4, z0 -; CHECK-NEXT: lsr z4.d, p0/m, z4.d, #8 -; CHECK-NEXT: mov z5.d, z0.d +; CHECK-NEXT: lsr z2.d, z0.d, #40 +; CHECK-NEXT: lsr z1.d, z0.d, #56 +; CHECK-NEXT: lsr z3.d, z0.d, #24 +; CHECK-NEXT: lsr z4.d, z0.d, #8 ; CHECK-NEXT: and z2.d, z2.d, #0xff00 -; CHECK-NEXT: and z3.d, z3.d, #0xff0000 +; CHECK-NEXT: mov z5.d, z0.d ; CHECK-NEXT: orr z1.d, z2.d, z1.d ; CHECK-NEXT: mov z2.d, z0.d +; CHECK-NEXT: and z3.d, z3.d, #0xff0000 ; CHECK-NEXT: and z4.d, z4.d, #0xff000000 -; CHECK-NEXT: and z5.d, z5.d, #0xff000000 ; CHECK-NEXT: orr z3.d, z4.d, z3.d +; CHECK-NEXT: and z5.d, z5.d, #0xff000000 ; CHECK-NEXT: and z2.d, z2.d, #0xff0000 -; CHECK-NEXT: movprfx z4, z5 -; CHECK-NEXT: lsl z4.d, p0/m, z4.d, #8 -; CHECK-NEXT: movprfx z5, z0 -; CHECK-NEXT: lsl z5.d, p0/m, z5.d, #56 +; CHECK-NEXT: lsl z4.d, z0.d, #56 ; CHECK-NEXT: and z0.d, z0.d, #0xff00 -; CHECK-NEXT: lsl z2.d, p0/m, z2.d, #24 -; CHECK-NEXT: lsl z0.d, p0/m, z0.d, #40 -; CHECK-NEXT: orr z2.d, z2.d, z4.d -; CHECK-NEXT: orr z0.d, z5.d, z0.d +; CHECK-NEXT: lsl z5.d, z5.d, #8 +; CHECK-NEXT: lsl z2.d, z2.d, #24 +; CHECK-NEXT: lsl z0.d, z0.d, #40 +; CHECK-NEXT: orr z2.d, z2.d, z5.d +; CHECK-NEXT: orr z0.d, z4.d, z0.d ; CHECK-NEXT: orr z1.d, z3.d, z1.d ; CHECK-NEXT: orr z0.d, z0.d, z2.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d @@ -435,64 +395,51 @@ define void @bswap_v4i64(ptr %a) #0 { ; CHECK-LABEL: bswap_v4i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q1, q0, [x0] -; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: movprfx z3, z0 -; CHECK-NEXT: lsr z3.d, p0/m, z3.d, #40 -; CHECK-NEXT: movprfx z4, z0 -; CHECK-NEXT: lsr z4.d, p0/m, z4.d, #24 -; CHECK-NEXT: movprfx z5, z0 -; CHECK-NEXT: lsr z5.d, p0/m, z5.d, #8 -; CHECK-NEXT: movprfx z2, z0 -; CHECK-NEXT: lsr z2.d, p0/m, z2.d, #56 +; CHECK-NEXT: lsr z3.d, z0.d, #40 +; CHECK-NEXT: lsr z4.d, z0.d, #24 +; CHECK-NEXT: lsr z5.d, z0.d, #8 +; CHECK-NEXT: mov z6.d, z0.d +; CHECK-NEXT: mov z7.d, z0.d +; CHECK-NEXT: lsr z2.d, z0.d, #56 ; CHECK-NEXT: and z3.d, z3.d, #0xff00 ; CHECK-NEXT: and z4.d, z4.d, #0xff0000 ; CHECK-NEXT: and z5.d, z5.d, #0xff000000 -; CHECK-NEXT: orr z2.d, z3.d, z2.d -; CHECK-NEXT: orr z3.d, z5.d, z4.d -; CHECK-NEXT: mov z6.d, z0.d -; CHECK-NEXT: mov z7.d, z0.d -; CHECK-NEXT: orr z2.d, z3.d, z2.d ; CHECK-NEXT: and z6.d, z6.d, #0xff000000 ; CHECK-NEXT: and z7.d, z7.d, #0xff0000 -; CHECK-NEXT: movprfx z3, z6 -; CHECK-NEXT: lsl z3.d, p0/m, z3.d, #8 -; CHECK-NEXT: movprfx z4, z7 -; CHECK-NEXT: lsl z4.d, p0/m, z4.d, #24 -; CHECK-NEXT: orr z3.d, z4.d, z3.d -; CHECK-NEXT: movprfx z4, z1 -; CHECK-NEXT: lsr z4.d, p0/m, z4.d, #40 -; CHECK-NEXT: movprfx z16, z0 -; CHECK-NEXT: lsl z16.d, p0/m, z16.d, #56 +; CHECK-NEXT: orr z2.d, z3.d, z2.d +; CHECK-NEXT: lsr z3.d, z1.d, #40 +; CHECK-NEXT: orr z4.d, z5.d, z4.d +; CHECK-NEXT: lsl z5.d, z6.d, #8 +; CHECK-NEXT: lsl z6.d, z7.d, #24 +; CHECK-NEXT: lsl z16.d, z0.d, #56 ; CHECK-NEXT: and z0.d, z0.d, #0xff00 -; CHECK-NEXT: movprfx z5, z1 -; CHECK-NEXT: lsr z5.d, p0/m, z5.d, #56 -; CHECK-NEXT: and z4.d, z4.d, #0xff00 -; CHECK-NEXT: lsl z0.d, p0/m, z0.d, #40 -; CHECK-NEXT: orr z4.d, z4.d, z5.d -; CHECK-NEXT: movprfx z5, z1 -; CHECK-NEXT: lsr z5.d, p0/m, z5.d, #24 -; CHECK-NEXT: movprfx z7, z1 -; CHECK-NEXT: lsr z7.d, p0/m, z7.d, #8 +; CHECK-NEXT: orr z2.d, z4.d, z2.d +; CHECK-NEXT: orr z4.d, z6.d, z5.d +; CHECK-NEXT: lsr z5.d, z1.d, #56 +; CHECK-NEXT: and z3.d, z3.d, #0xff00 +; CHECK-NEXT: lsl z0.d, z0.d, #40 +; CHECK-NEXT: orr z3.d, z3.d, z5.d +; CHECK-NEXT: lsr z5.d, z1.d, #24 +; CHECK-NEXT: lsr z7.d, z1.d, #8 ; CHECK-NEXT: orr z0.d, z16.d, z0.d ; CHECK-NEXT: mov z6.d, z1.d ; CHECK-NEXT: mov z16.d, z1.d ; CHECK-NEXT: and z5.d, z5.d, #0xff0000 ; CHECK-NEXT: and z7.d, z7.d, #0xff000000 -; CHECK-NEXT: orr z5.d, z7.d, z5.d ; CHECK-NEXT: and z6.d, z6.d, #0xff000000 +; CHECK-NEXT: orr z5.d, z7.d, z5.d ; CHECK-NEXT: and z16.d, z16.d, #0xff0000 -; CHECK-NEXT: movprfx z7, z1 -; CHECK-NEXT: lsl z7.d, p0/m, z7.d, #56 +; CHECK-NEXT: lsl z7.d, z1.d, #56 ; CHECK-NEXT: and z1.d, z1.d, #0xff00 -; CHECK-NEXT: lsl z6.d, p0/m, z6.d, #8 -; CHECK-NEXT: lsl z16.d, p0/m, z16.d, #24 -; CHECK-NEXT: lsl z1.d, p0/m, z1.d, #40 +; CHECK-NEXT: lsl z6.d, z6.d, #8 +; CHECK-NEXT: lsl z16.d, z16.d, #24 +; CHECK-NEXT: lsl z1.d, z1.d, #40 ; CHECK-NEXT: orr z6.d, z16.d, z6.d ; CHECK-NEXT: orr z1.d, z7.d, z1.d -; CHECK-NEXT: orr z4.d, z5.d, z4.d +; CHECK-NEXT: orr z3.d, z5.d, z3.d ; CHECK-NEXT: orr z1.d, z1.d, z6.d -; CHECK-NEXT: orr z0.d, z0.d, z3.d -; CHECK-NEXT: orr z1.d, z1.d, z4.d +; CHECK-NEXT: orr z0.d, z0.d, z4.d +; CHECK-NEXT: orr z1.d, z1.d, z3.d ; CHECK-NEXT: orr z0.d, z0.d, z2.d ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll index 173e3b4..35b2f4b 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll @@ -8,8 +8,8 @@ define <4 x i8> @sdiv_v4i8(<4 x i8> %op1) #0 { ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #8 -; CHECK-NEXT: asr z0.h, p0/m, z0.h, #8 +; CHECK-NEXT: lsl z0.h, z0.h, #8 +; CHECK-NEXT: asr z0.h, z0.h, #8 ; CHECK-NEXT: asrd z0.h, p0/m, z0.h, #5 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -61,8 +61,8 @@ define <2 x i16> @sdiv_v2i16(<2 x i16> %op1) #0 { ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 -; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #16 -; CHECK-NEXT: asr z0.s, p0/m, z0.s, #16 +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: asr z0.s, z0.s, #16 ; CHECK-NEXT: asrd z0.s, p0/m, z0.s, #5 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve2-sra.ll b/llvm/test/CodeGen/AArch64/sve2-sra.ll index 9751004..afa8dd5 100644 --- a/llvm/test/CodeGen/AArch64/sve2-sra.ll +++ b/llvm/test/CodeGen/AArch64/sve2-sra.ll @@ -108,8 +108,7 @@ define @usra_intr_i64( %a, @usra_intr_u_i8( %pg, %a, %b) #0 { ; CHECK-LABEL: usra_intr_u_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: lsr z1.b, p0/m, z1.b, #1 -; CHECK-NEXT: add z0.b, z0.b, z1.b +; CHECK-NEXT: usra z0.b, z1.b, #1 ; CHECK-NEXT: ret %ins = insertelement poison, i8 1, i32 0 %splat = shufflevector %ins, poison, zeroinitializer @@ -121,8 +120,7 @@ define @usra_intr_u_i8( %pg, @usra_intr_u_i16( %pg, %a, %b) #0 { ; CHECK-LABEL: usra_intr_u_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: lsr z1.h, p0/m, z1.h, #2 -; CHECK-NEXT: add z0.h, z0.h, z1.h +; CHECK-NEXT: usra z0.h, z1.h, #2 ; CHECK-NEXT: ret %ins = insertelement poison, i16 2, i32 0 %splat = shufflevector %ins, poison, zeroinitializer @@ -134,8 +132,7 @@ define @usra_intr_u_i16( %pg, @usra_intr_u_i32( %pg, %a, %b) #0 { ; CHECK-LABEL: usra_intr_u_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: lsr z1.s, p0/m, z1.s, #3 -; CHECK-NEXT: add z0.s, z0.s, z1.s +; CHECK-NEXT: usra z0.s, z1.s, #3 ; CHECK-NEXT: ret %ins = insertelement poison, i32 3, i32 0 %splat = shufflevector %ins, poison, zeroinitializer @@ -147,8 +144,7 @@ define @usra_intr_u_i32( %pg, @usra_intr_u_i64( %pg, %a, %b) #0 { ; CHECK-LABEL: usra_intr_u_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: lsr z1.d, p0/m, z1.d, #4 -; CHECK-NEXT: add z0.d, z0.d, z1.d +; CHECK-NEXT: usra z0.d, z1.d, #4 ; CHECK-NEXT: ret %ins = insertelement poison, i64 4, i32 0 %splat = shufflevector %ins, poison, zeroinitializer @@ -262,8 +258,7 @@ define @ssra_intr_i64( %a, @ssra_intr_u_i8( %pg, %a, %b) #0 { ; CHECK-LABEL: ssra_intr_u_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: asr z1.b, p0/m, z1.b, #1 -; CHECK-NEXT: add z0.b, z0.b, z1.b +; CHECK-NEXT: ssra z0.b, z1.b, #1 ; CHECK-NEXT: ret %ins = insertelement poison, i8 1, i32 0 %splat = shufflevector %ins, poison, zeroinitializer @@ -275,8 +270,7 @@ define @ssra_intr_u_i8( %pg, @ssra_intr_u_i16( %pg, %a, %b) #0 { ; CHECK-LABEL: ssra_intr_u_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: asr z1.h, p0/m, z1.h, #2 -; CHECK-NEXT: add z0.h, z0.h, z1.h +; CHECK-NEXT: ssra z0.h, z1.h, #2 ; CHECK-NEXT: ret %ins = insertelement poison, i16 2, i32 0 %splat = shufflevector %ins, poison, zeroinitializer @@ -288,8 +282,7 @@ define @ssra_intr_u_i16( %pg, @ssra_intr_u_i32( %pg, %a, %b) #0 { ; CHECK-LABEL: ssra_intr_u_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: asr z1.s, p0/m, z1.s, #3 -; CHECK-NEXT: add z0.s, z0.s, z1.s +; CHECK-NEXT: ssra z0.s, z1.s, #3 ; CHECK-NEXT: ret %ins = insertelement poison, i32 3, i32 0 %splat = shufflevector %ins, poison, zeroinitializer @@ -301,8 +294,7 @@ define @ssra_intr_u_i32( %pg, @ssra_intr_u_i64( %pg, %a, %b) #0 { ; CHECK-LABEL: ssra_intr_u_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: asr z1.d, p0/m, z1.d, #4 -; CHECK-NEXT: add z0.d, z0.d, z1.d +; CHECK-NEXT: ssra z0.d, z1.d, #4 ; CHECK-NEXT: ret %ins = insertelement poison, i64 4, i32 0 %splat = shufflevector %ins, poison, zeroinitializer