let TargetPrefix = "aarch64" in {
class SME_Load_Store_Intrinsic<LLVMType pred_ty>
: DefaultAttrsIntrinsic<[],
- [pred_ty, llvm_ptr_ty, llvm_i64_ty, llvm_i32_ty], []>;
+ [pred_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<2>>]>;
// Loads
def int_aarch64_sme_ld1b_horiz : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>;
class SME_TileToVector_Intrinsic
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
- [LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_i64_ty, llvm_i32_ty]>;
+ [LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<2>>]>;
class SME_VectorToTile_Intrinsic
: DefaultAttrsIntrinsic<[],
- [llvm_i64_ty, llvm_i32_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_anyvector_ty]>;
+ [llvm_i32_ty, llvm_i32_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_anyvector_ty], [ImmArg<ArgIndex<0>>]>;
def int_aarch64_sme_read_horiz : SME_TileToVector_Intrinsic;
def int_aarch64_sme_read_vert : SME_TileToVector_Intrinsic;
def int_aarch64_sme_writeq_horiz : SME_VectorToTile_Intrinsic;
def int_aarch64_sme_writeq_vert : SME_VectorToTile_Intrinsic;
- def int_aarch64_sme_zero : DefaultAttrsIntrinsic<[], [llvm_i64_ty]>;
+ def int_aarch64_sme_zero : DefaultAttrsIntrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>]>;
class SME_OuterProduct_Intrinsic
: DefaultAttrsIntrinsic<[],
- [llvm_i64_ty,
+ [llvm_i32_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
LLVMMatchType<0>,
- llvm_anyvector_ty]>;
+ llvm_anyvector_ty], [ImmArg<ArgIndex<0>>]>;
def int_aarch64_sme_mopa : SME_OuterProduct_Intrinsic;
def int_aarch64_sme_mops : SME_OuterProduct_Intrinsic;
class SME_AddVectorToTile_Intrinsic
: DefaultAttrsIntrinsic<[],
- [llvm_i64_ty,
+ [llvm_i32_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_anyvector_ty]>;
+ llvm_anyvector_ty], [ImmArg<ArgIndex<0>>]>;
def int_aarch64_sme_addha : SME_AddVectorToTile_Intrinsic;
def int_aarch64_sme_addva : SME_AddVectorToTile_Intrinsic;
let ParserMatchClass = Imm0_1Operand;
}
+// timm32_0_1 predicate - True if the 32-bit immediate is in the range [0,1]
+def timm32_0_1 : Operand<i32>, TImmLeaf<i32, [{
+ return ((uint32_t)Imm) < 2;
+}]> {
+ let ParserMatchClass = Imm0_1Operand;
+}
+
// imm0_15 predicate - True if the immediate is in the range [0,15]
def imm0_15 : Operand<i64>, ImmLeaf<i64, [{
return ((uint64_t)Imm) < 16;
let ParserMatchClass = Imm0_3Operand;
}
+// timm32_0_3 predicate - True if the 32-bit immediate is in the range [0,3]
+def timm32_0_3 : Operand<i32>, TImmLeaf<i32, [{
+ return ((uint32_t)Imm) < 4;
+}]> {
+ let ParserMatchClass = Imm0_3Operand;
+}
+
// timm32_0_7 predicate - True if the 32-bit immediate is in the range [0,7]
def timm32_0_7 : Operand<i32>, TImmLeaf<i32, [{
return ((uint32_t)Imm) < 8;
let ParserMatchClass = Imm0_15Operand;
}
+// timm32_0_15 predicate - True if the 32-bit immediate is in the range [0,15]
+def timm32_0_15 : Operand<i32>, TImmLeaf<i32, [{
+ return ((uint32_t)Imm) < 16;
+}]> {
+ let ParserMatchClass = Imm0_15Operand;
+}
+
+// timm32_0_31 predicate - True if the 32-bit immediate is in the range [0,31]
+def timm32_0_31 : Operand<i32>, TImmLeaf<i32, [{
+ return ((uint32_t)Imm) < 32;
+}]> {
+ let ParserMatchClass = Imm0_31Operand;
+}
+
+// timm32_0_255 predicate - True if the 32-bit immediate is in the range [0,255]
+def timm32_0_255 : Operand<i32>, TImmLeaf<i32, [{
+ return ((uint32_t)Imm) < 256;
+}]> {
+ let ParserMatchClass = Imm0_255Operand;
+}
+
// An arithmetic shifter operand:
// {7-6} - shift type: 00 = lsl, 01 = lsr, 10 = asr
// {5-0} - imm6
: VectorIndex<i64, SVEVectorIndexExtDupQOperand,
[{ return ((uint64_t)Imm) < 4; }]>;
-def sme_elm_idx0_0 : Operand<i64>, ImmLeaf<i64, [{
- return ((uint64_t)Imm) == 0;
+def sme_elm_idx0_0 : Operand<i32>, TImmLeaf<i32, [{
+ return ((uint32_t)Imm) == 0;
}]> {
let ParserMatchClass = Imm0_0Operand;
let PrintMethod = "printMatrixIndex";
let OperandNamespace = "AArch64";
let OperandType = "OPERAND_IMPLICIT_IMM_0";
}
-def sme_elm_idx0_1 : Operand<i64>, ImmLeaf<i64, [{
- return ((uint64_t)Imm) <= 1;
+def sme_elm_idx0_1 : Operand<i32>, TImmLeaf<i32, [{
+ return ((uint32_t)Imm) <= 1;
}]> {
let ParserMatchClass = Imm0_1Operand;
let PrintMethod = "printMatrixIndex";
}
-def sme_elm_idx0_3 : Operand<i64>, ImmLeaf<i64, [{
- return ((uint64_t)Imm) <= 3;
+def sme_elm_idx0_3 : Operand<i32>, TImmLeaf<i32, [{
+ return ((uint32_t)Imm) <= 3;
}]> {
let ParserMatchClass = Imm0_3Operand;
let PrintMethod = "printMatrixIndex";
}
-def sme_elm_idx0_7 : Operand<i64>, ImmLeaf<i64, [{
- return ((uint64_t)Imm) <= 7;
+def sme_elm_idx0_7 : Operand<i32>, TImmLeaf<i32, [{
+ return ((uint32_t)Imm) <= 7;
}]> {
let ParserMatchClass = Imm0_7Operand;
let PrintMethod = "printMatrixIndex";
}
-def sme_elm_idx0_15 : Operand<i64>, ImmLeaf<i64, [{
- return ((uint64_t)Imm) <= 15;
+def sme_elm_idx0_15 : Operand<i32>, TImmLeaf<i32, [{
+ return ((uint32_t)Imm) <= 15;
}]> {
let ParserMatchClass = Imm0_15Operand;
let PrintMethod = "printMatrixIndex";
//
//===----------------------------------------------------------------------===//
-def imm_to_tile8 : ComplexPattern<i64, 1, "ImmToTile<AArch64::ZAB0>", []>;
-def imm_to_tile16 : ComplexPattern<i64, 1, "ImmToTile<AArch64::ZAH0>", []>;
-def imm_to_tile32 : ComplexPattern<i64, 1, "ImmToTile<AArch64::ZAS0>", []>;
-def imm_to_tile64 : ComplexPattern<i64, 1, "ImmToTile<AArch64::ZAD0>", []>;
-def imm_to_tile128 : ComplexPattern<i64, 1, "ImmToTile<AArch64::ZAQ0>", []>;
+def imm_to_tile8 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAB0>", []>;
+def imm_to_tile16 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAH0>", []>;
+def imm_to_tile32 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAS0>", []>;
+def imm_to_tile64 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAD0>", []>;
+def imm_to_tile128 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAQ0>", []>;
def tileslice8 : ComplexPattern<i32 , 2, "SelectSMETileSlice<4>", []>;
def tileslice16 : ComplexPattern<i32 , 2, "SelectSMETileSlice<3>", []>;
//===----------------------------------------------------------------------===//
class sme_outer_product_pseudo<ZPRRegOp zpr_ty>
- : Pseudo<(outs), (ins i64imm:$tile, PPR3bAny:$pn, PPR3bAny:$pm,
+ : Pseudo<(outs), (ins i32imm:$tile, PPR3bAny:$pn, PPR3bAny:$pm,
zpr_ty:$zn, zpr_ty:$zm), []>,
Sched<[]> {
// Translated to the actual instructions in AArch64ISelLowering.cpp
def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR32>;
- def : Pat<(op imm0_3:$tile, (nxv4i1 PPR3bAny:$pn), (nxv4i1 PPR3bAny:$pm),
+ def : Pat<(op timm32_0_3:$tile, (nxv4i1 PPR3bAny:$pn), (nxv4i1 PPR3bAny:$pm),
(nxv4f32 ZPR32:$zn), (nxv4f32 ZPR32:$zm)),
- (!cast<Instruction>(NAME # _PSEUDO) imm0_3:$tile, $pn, $pm, $zn, $zm)>;
+ (!cast<Instruction>(NAME # _PSEUDO) timm32_0_3:$tile, $pn, $pm, $zn, $zm)>;
}
multiclass sme_outer_product_fp64<bit S, string mnemonic, SDPatternOperator op> {
def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR64>;
- def : Pat<(op imm0_7:$tile, (nxv2i1 PPR3bAny:$pn), (nxv2i1 PPR3bAny:$pm),
+ def : Pat<(op timm32_0_7:$tile, (nxv2i1 PPR3bAny:$pn), (nxv2i1 PPR3bAny:$pm),
(nxv2f64 ZPR64:$zn), (nxv2f64 ZPR64:$zm)),
- (!cast<Instruction>(NAME # _PSEUDO) imm0_7:$tile, $pn, $pm, $zn, $zm)>;
+ (!cast<Instruction>(NAME # _PSEUDO) timm32_0_7:$tile, $pn, $pm, $zn, $zm)>;
}
class sme_int_outer_product_inst<bit u0, bit u1, bit S, bit sz,
def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR8>;
- def : Pat<(op imm0_3:$tile, (nxv16i1 PPR3bAny:$pn), (nxv16i1 PPR3bAny:$pm),
+ def : Pat<(op timm32_0_3:$tile, (nxv16i1 PPR3bAny:$pn), (nxv16i1 PPR3bAny:$pm),
(nxv16i8 ZPR8:$zn), (nxv16i8 ZPR8:$zm)),
- (!cast<Instruction>(NAME # _PSEUDO) imm0_3:$tile, $pn, $pm, $zn, $zm)>;
+ (!cast<Instruction>(NAME # _PSEUDO) timm32_0_3:$tile, $pn, $pm, $zn, $zm)>;
}
multiclass sme_int_outer_product_i64<bits<3> opc, string mnemonic,
def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR16>;
- def : Pat<(op imm0_7:$tile, (nxv8i1 PPR3bAny:$pn), (nxv8i1 PPR3bAny:$pm),
+ def : Pat<(op timm32_0_7:$tile, (nxv8i1 PPR3bAny:$pn), (nxv8i1 PPR3bAny:$pm),
(nxv8i16 ZPR16:$zn), (nxv8i16 ZPR16:$zm)),
- (!cast<Instruction>(NAME # _PSEUDO) imm0_7:$tile, $pn, $pm, $zn, $zm)>;
+ (!cast<Instruction>(NAME # _PSEUDO) timm32_0_7:$tile, $pn, $pm, $zn, $zm)>;
}
class sme_outer_product_widening_inst<bit op, bit S, string mnemonic>
def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR16>;
- def : Pat<(op imm0_3:$tile, (nxv8i1 PPR3bAny:$pn), (nxv8i1 PPR3bAny:$pm),
+ def : Pat<(op timm32_0_3:$tile, (nxv8i1 PPR3bAny:$pn), (nxv8i1 PPR3bAny:$pm),
(nxv8bf16 ZPR16:$zn), (nxv8bf16 ZPR16:$zm)),
- (!cast<Instruction>(NAME # _PSEUDO) imm0_3:$tile, $pn, $pm, $zn, $zm)>;
+ (!cast<Instruction>(NAME # _PSEUDO) timm32_0_3:$tile, $pn, $pm, $zn, $zm)>;
}
multiclass sme_f16_outer_product<bit S, string mnemonic, SDPatternOperator op> {
def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR16>;
- def : Pat<(op imm0_3:$tile, (nxv8i1 PPR3bAny:$pn), (nxv8i1 PPR3bAny:$pm),
+ def : Pat<(op timm32_0_3:$tile, (nxv8i1 PPR3bAny:$pn), (nxv8i1 PPR3bAny:$pm),
(nxv8f16 ZPR16:$zn), (nxv8f16 ZPR16:$zm)),
- (!cast<Instruction>(NAME # _PSEUDO) imm0_3:$tile, $pn, $pm, $zn, $zm)>;
+ (!cast<Instruction>(NAME # _PSEUDO) timm32_0_3:$tile, $pn, $pm, $zn, $zm)>;
}
//===----------------------------------------------------------------------===//
class sme_add_vector_to_tile_pseudo<ZPRRegOp zpr_ty>
: Pseudo<(outs),
- (ins i64imm:$tile, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn), []>,
+ (ins i32imm:$tile, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn), []>,
Sched<[]> {
// Translated to the actual instructions in AArch64ISelLowering.cpp
let usesCustomInserter = 1;
def ADDVA_MPPZ_PSEUDO_S : sme_add_vector_to_tile_pseudo<ZPR32>;
def : Pat<(int_aarch64_sme_addha
- imm0_3:$tile, (nxv4i1 PPR3bAny:$pn), (nxv4i1 PPR3bAny:$pm),
+ timm32_0_3:$tile, (nxv4i1 PPR3bAny:$pn), (nxv4i1 PPR3bAny:$pm),
(nxv4i32 ZPR32:$zn)),
- (ADDHA_MPPZ_PSEUDO_S imm0_3:$tile, $pn, $pm, $zn)>;
+ (ADDHA_MPPZ_PSEUDO_S timm32_0_3:$tile, $pn, $pm, $zn)>;
def : Pat<(int_aarch64_sme_addva
- imm0_3:$tile, (nxv4i1 PPR3bAny:$pn), (nxv4i1 PPR3bAny:$pm),
+ timm32_0_3:$tile, (nxv4i1 PPR3bAny:$pn), (nxv4i1 PPR3bAny:$pm),
(nxv4i32 ZPR32:$zn)),
- (ADDVA_MPPZ_PSEUDO_S imm0_3:$tile, $pn, $pm, $zn)>;
+ (ADDVA_MPPZ_PSEUDO_S timm32_0_3:$tile, $pn, $pm, $zn)>;
let Predicates = [HasSMEI16I64] in {
def ADDHA_MPPZ_PSEUDO_D : sme_add_vector_to_tile_pseudo<ZPR64>;
def ADDVA_MPPZ_PSEUDO_D : sme_add_vector_to_tile_pseudo<ZPR64>;
def : Pat<(int_aarch64_sme_addha
- imm0_7:$tile, (nxv2i1 PPR3bAny:$pn), (nxv2i1 PPR3bAny:$pm),
+ timm32_0_7:$tile, (nxv2i1 PPR3bAny:$pn), (nxv2i1 PPR3bAny:$pm),
(nxv2i64 ZPR64:$zn)),
- (ADDHA_MPPZ_PSEUDO_D imm0_7:$tile, $pn, $pm, $zn)>;
+ (ADDHA_MPPZ_PSEUDO_D timm32_0_7:$tile, $pn, $pm, $zn)>;
def : Pat<(int_aarch64_sme_addva
- imm0_7:$tile, (nxv2i1 PPR3bAny:$pn), (nxv2i1 PPR3bAny:$pm),
+ timm32_0_7:$tile, (nxv2i1 PPR3bAny:$pn), (nxv2i1 PPR3bAny:$pm),
(nxv2i64 ZPR64:$zn)),
- (ADDVA_MPPZ_PSEUDO_D imm0_7:$tile, $pn, $pm, $zn)>;
+ (ADDVA_MPPZ_PSEUDO_D timm32_0_7:$tile, $pn, $pm, $zn)>;
}
//===----------------------------------------------------------------------===//
}
class sme_load_pseudo
- : Pseudo<(outs), (ins i64imm:$tile, MatrixIndexGPR32Op12_15:$idx,
- i64imm:$imm, PPR3bAny:$pg, GPR64sp:$base, GPR64:$offset), []>,
+ : Pseudo<(outs), (ins i32imm:$tile, MatrixIndexGPR32Op12_15:$idx,
+ i32imm:$imm, PPR3bAny:$pg, GPR64sp:$base, GPR64:$offset), []>,
Sched<[]> {
// Translated to the actual instructions in AArch64ISelLowering.cpp
let usesCustomInserter = 1;
defm : sme_mem_ld_ss_patterns<!cast<Instruction>(NAME # _PSEUDO_B),
!if(is_col, int_aarch64_sme_ld1b_vert,
int_aarch64_sme_ld1b_horiz),
- sme_elm_idx0_0, imm0_15, am_sve_regreg_lsl0,
+ sme_elm_idx0_0, timm32_0_15, am_sve_regreg_lsl0,
tileslice8>;
defm : sme_mem_ld_ss_patterns<!cast<Instruction>(NAME # _PSEUDO_H),
!if(is_col, int_aarch64_sme_ld1h_vert,
int_aarch64_sme_ld1h_horiz),
- imm0_1, imm0_7, am_sve_regreg_lsl1,
+ timm32_0_1, timm32_0_7, am_sve_regreg_lsl1,
tileslice16>;
defm : sme_mem_ld_ss_patterns<!cast<Instruction>(NAME # _PSEUDO_S),
!if(is_col, int_aarch64_sme_ld1w_vert,
int_aarch64_sme_ld1w_horiz),
- imm0_3, imm0_3, am_sve_regreg_lsl2,
+ timm32_0_3, timm32_0_3, am_sve_regreg_lsl2,
tileslice32>;
defm : sme_mem_ld_ss_patterns<!cast<Instruction>(NAME # _PSEUDO_D),
!if(is_col, int_aarch64_sme_ld1d_vert,
int_aarch64_sme_ld1d_horiz),
- imm0_7, imm0_1, am_sve_regreg_lsl3,
+ timm32_0_7, timm32_0_1, am_sve_regreg_lsl3,
tileslice64>;
defm : sme_mem_ld_ss_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
!if(is_col, int_aarch64_sme_ld1q_vert,
int_aarch64_sme_ld1q_horiz),
- imm0_15, sme_elm_idx0_0, am_sve_regreg_lsl4,
+ timm32_0_15, sme_elm_idx0_0, am_sve_regreg_lsl4,
tileslice128>;
}
defm : sme_mem_st_ss_patterns<!cast<Instruction>(NAME # _B),
!if(is_col, int_aarch64_sme_st1b_vert,
int_aarch64_sme_st1b_horiz),
- imm0_15, imm_to_tile8, am_sve_regreg_lsl0,
+ timm32_0_15, imm_to_tile8, am_sve_regreg_lsl0,
tileslice8>;
defm : sme_mem_st_ss_patterns<!cast<Instruction>(NAME # _H),
!if(is_col, int_aarch64_sme_st1h_vert,
int_aarch64_sme_st1h_horiz),
- imm0_7, imm_to_tile16, am_sve_regreg_lsl1,
+ timm32_0_7, imm_to_tile16, am_sve_regreg_lsl1,
tileslice16>;
defm : sme_mem_st_ss_patterns<!cast<Instruction>(NAME # _S),
!if(is_col, int_aarch64_sme_st1w_vert,
int_aarch64_sme_st1w_horiz),
- imm0_3, imm_to_tile32, am_sve_regreg_lsl2,
+ timm32_0_3, imm_to_tile32, am_sve_regreg_lsl2,
tileslice32>;
defm : sme_mem_st_ss_patterns<!cast<Instruction>(NAME # _D),
!if(is_col, int_aarch64_sme_st1d_vert,
int_aarch64_sme_st1d_horiz),
- imm0_1, imm_to_tile64, am_sve_regreg_lsl3,
+ timm32_0_1, imm_to_tile64, am_sve_regreg_lsl3,
tileslice64>;
defm : sme_mem_st_ss_patterns<!cast<Instruction>(NAME # _Q),
!if(is_col, int_aarch64_sme_st1q_vert,
}
class sme_mova_insert_pseudo
- : Pseudo<(outs), (ins i64imm:$tile, MatrixIndexGPR32Op12_15:$idx,
- i64imm:$imm, PPR3bAny:$pg, ZPRAny:$zn), []>,
+ : Pseudo<(outs), (ins i32imm:$tile, MatrixIndexGPR32Op12_15:$idx,
+ i32imm:$imm, PPR3bAny:$pg, ZPRAny:$zn), []>,
Sched<[]> {
// Translated to the actual instructions in AArch64ISelLowering.cpp
let usesCustomInserter = 1;
int_aarch64_sme_write_horiz);
defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_B),
- nxv16i8, nxv16i1, sme_elm_idx0_0, imm0_15,
+ nxv16i8, nxv16i1, sme_elm_idx0_0, sme_elm_idx0_15,
op, tileslice8>;
defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_H),
- nxv8i16, nxv8i1, sme_elm_idx0_1, imm0_7,
+ nxv8i16, nxv8i1, sme_elm_idx0_1, sme_elm_idx0_7,
op, tileslice16>;
defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_H),
- nxv8f16, nxv8i1, sme_elm_idx0_1, imm0_7,
+ nxv8f16, nxv8i1, sme_elm_idx0_1, sme_elm_idx0_7,
op, tileslice16>;
defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_H),
- nxv8bf16, nxv8i1, sme_elm_idx0_1, imm0_7,
+ nxv8bf16, nxv8i1, sme_elm_idx0_1, sme_elm_idx0_7,
op, tileslice16>;
defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_S),
- nxv4i32, nxv4i1, sme_elm_idx0_3, imm0_3,
+ nxv4i32, nxv4i1, sme_elm_idx0_3, sme_elm_idx0_3,
op, tileslice32>;
defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_S),
- nxv4f32, nxv4i1, sme_elm_idx0_3, imm0_3,
+ nxv4f32, nxv4i1, sme_elm_idx0_3, sme_elm_idx0_3,
op, tileslice32>;
defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_D),
- nxv2i64, nxv2i1, sme_elm_idx0_7, imm0_1,
+ nxv2i64, nxv2i1, sme_elm_idx0_7, sme_elm_idx0_1,
op, tileslice64>;
defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_D),
- nxv2f64, nxv2i1, sme_elm_idx0_7, imm0_1,
+ nxv2f64, nxv2i1, sme_elm_idx0_7, sme_elm_idx0_1,
op, tileslice64>;
defvar opq = !if(is_col, int_aarch64_sme_writeq_vert,
int_aarch64_sme_read_horiz);
defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _B),
- nxv16i8, nxv16i1, imm0_15,
+ nxv16i8, nxv16i1, sme_elm_idx0_15,
imm_to_tile8, tileslice8, op>;
defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _H),
- nxv8i16, nxv8i1, imm0_7,
+ nxv8i16, nxv8i1, sme_elm_idx0_7,
imm_to_tile16, tileslice16, op>;
defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _H),
- nxv8f16, nxv8i1, imm0_7,
+ nxv8f16, nxv8i1, sme_elm_idx0_7,
imm_to_tile16, tileslice16, op>;
defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _H),
- nxv8bf16, nxv8i1, imm0_7,
+ nxv8bf16, nxv8i1, sme_elm_idx0_7,
imm_to_tile16, tileslice16, op>;
defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _S),
- nxv4i32, nxv4i1, imm0_3,
+ nxv4i32, nxv4i1, sme_elm_idx0_3,
imm_to_tile32, tileslice32, op>;
defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _S),
- nxv4f32, nxv4i1, imm0_3,
+ nxv4f32, nxv4i1, sme_elm_idx0_3,
imm_to_tile32, tileslice32, op>;
defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _D),
- nxv2i64, nxv2i1, imm0_1,
+ nxv2i64, nxv2i1, sme_elm_idx0_1,
imm_to_tile64, tileslice64, op>;
defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _D),
- nxv2f64, nxv2i1, imm0_1,
+ nxv2f64, nxv2i1, sme_elm_idx0_1,
imm_to_tile64, tileslice64, op>;
defvar opq = !if(is_col, int_aarch64_sme_readq_vert,
def : InstAlias<"zero\t\\{za0.s,za2.s,za3.s\\}", (!cast<Instruction>(NAME) 0b11011101), 1>;
def : InstAlias<"zero\t\\{za1.s,za2.s,za3.s\\}", (!cast<Instruction>(NAME) 0b11101110), 1>;
- def NAME # _PSEUDO : Pseudo<(outs), (ins i64imm:$tilelist), []>,
+ def NAME # _PSEUDO : Pseudo<(outs), (ins i32imm:$tilelist), []>,
Sched<[]> {
// Translated to the actual instructions in AArch64ISelLowering.cpp
let usesCustomInserter = 1;
}
- def : Pat<(int_aarch64_sme_zero imm:$imm),
- (!cast<Instruction>(NAME # _PSEUDO) imm:$imm)>;
+ def : Pat<(int_aarch64_sme_zero timm32_0_255:$imm),
+ (!cast<Instruction>(NAME # _PSEUDO) timm32_0_255:$imm)>;
}
//===----------------------------------------------------------------------===//
; CHECK: // %bb.0:
; CHECK-NEXT: addha za0.s, p0/m, p1/m, z0.s
; CHECK-NEXT: ret
- call void @llvm.aarch64.sme.addha.nxv4i32(i64 0, <vscale x 4 x i1> %pn, <vscale x 4 x i1> %pm, <vscale x 4 x i32> %zn)
+ call void @llvm.aarch64.sme.addha.nxv4i32(i32 0, <vscale x 4 x i1> %pn, <vscale x 4 x i1> %pm, <vscale x 4 x i32> %zn)
ret void
}
; CHECK: // %bb.0:
; CHECK-NEXT: addva za3.s, p0/m, p1/m, z0.s
; CHECK-NEXT: ret
- call void @llvm.aarch64.sme.addva.nxv4i32(i64 3, <vscale x 4 x i1> %pn, <vscale x 4 x i1> %pm, <vscale x 4 x i32> %zn)
+ call void @llvm.aarch64.sme.addva.nxv4i32(i32 3, <vscale x 4 x i1> %pn, <vscale x 4 x i1> %pm, <vscale x 4 x i32> %zn)
ret void
}
; CHECK: // %bb.0:
; CHECK-NEXT: addha za0.d, p0/m, p1/m, z0.d
; CHECK-NEXT: ret
- call void @llvm.aarch64.sme.addha.nxv2i64(i64 0, <vscale x 2 x i1> %pn, <vscale x 2 x i1> %pm, <vscale x 2 x i64> %zn)
+ call void @llvm.aarch64.sme.addha.nxv2i64(i32 0, <vscale x 2 x i1> %pn, <vscale x 2 x i1> %pm, <vscale x 2 x i64> %zn)
ret void
}
; CHECK: // %bb.0:
; CHECK-NEXT: addva za7.d, p0/m, p1/m, z0.d
; CHECK-NEXT: ret
- call void @llvm.aarch64.sme.addva.nxv2i64(i64 7, <vscale x 2 x i1> %pn, <vscale x 2 x i1> %pm, <vscale x 2 x i64> %zn)
+ call void @llvm.aarch64.sme.addva.nxv2i64(i32 7, <vscale x 2 x i1> %pn, <vscale x 2 x i1> %pm, <vscale x 2 x i64> %zn)
ret void
}
-declare void @llvm.aarch64.sme.addha.nxv4i32(i64, <vscale x 4 x i1>, <vscale x 4 x i1>, <vscale x 4 x i32>)
-declare void @llvm.aarch64.sme.addha.nxv2i64(i64, <vscale x 2 x i1>, <vscale x 2 x i1>, <vscale x 2 x i64>)
-declare void @llvm.aarch64.sme.addva.nxv4i32(i64, <vscale x 4 x i1>, <vscale x 4 x i1>, <vscale x 4 x i32>)
-declare void @llvm.aarch64.sme.addva.nxv2i64(i64, <vscale x 2 x i1>, <vscale x 2 x i1>, <vscale x 2 x i64>)
+declare void @llvm.aarch64.sme.addha.nxv4i32(i32, <vscale x 4 x i1>, <vscale x 4 x i1>, <vscale x 4 x i32>)
+declare void @llvm.aarch64.sme.addha.nxv2i64(i32, <vscale x 2 x i1>, <vscale x 2 x i1>, <vscale x 2 x i64>)
+declare void @llvm.aarch64.sme.addva.nxv4i32(i32, <vscale x 4 x i1>, <vscale x 4 x i1>, <vscale x 4 x i32>)
+declare void @llvm.aarch64.sme.addva.nxv2i64(i32, <vscale x 2 x i1>, <vscale x 2 x i1>, <vscale x 2 x i64>)
; CHECK-NEXT: ld1b {za0v.b[w13, 0]}, p0/z, [x0]
; CHECK-NEXT: ret
%tileslice = add i32 %sliceidx, 15
- call void @llvm.aarch64.sme.ld1b.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 0, i32 %tileslice)
- call void @llvm.aarch64.sme.ld1b.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 0, i32 0)
+ call void @llvm.aarch64.sme.ld1b.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i32 0, i32 %tileslice)
+ call void @llvm.aarch64.sme.ld1b.vert(<vscale x 16 x i1> %pg, ptr %ptr, i32 0, i32 0)
ret void;
}
; CHECK-NEXT: ret
%base = getelementptr i8, ptr %ptr, i64 %index
%tileslice = add i32 %sliceidx, 15
- call void @llvm.aarch64.sme.ld1b.horiz(<vscale x 16 x i1> %pg, ptr %base, i64 0, i32 0)
- call void @llvm.aarch64.sme.ld1b.vert(<vscale x 16 x i1> %pg, ptr %base, i64 0, i32 %tileslice)
+ call void @llvm.aarch64.sme.ld1b.horiz(<vscale x 16 x i1> %pg, ptr %base, i32 0, i32 0)
+ call void @llvm.aarch64.sme.ld1b.vert(<vscale x 16 x i1> %pg, ptr %base, i32 0, i32 %tileslice)
ret void;
}
; CHECK-NEXT: ld1h {za1v.h[w12, 7]}, p0/z, [x0]
; CHECK-NEXT: ret
%tileslice = add i32 %sliceidx, 7
- call void @llvm.aarch64.sme.ld1h.horiz(<vscale x 8 x i1> %pg, ptr %ptr, i64 0, i32 %tileslice)
- call void @llvm.aarch64.sme.ld1h.horiz(<vscale x 8 x i1> %pg, ptr %ptr, i64 1, i32 0)
- call void @llvm.aarch64.sme.ld1h.vert(<vscale x 8 x i1> %pg, ptr %ptr, i64 0, i32 0)
- call void @llvm.aarch64.sme.ld1h.vert(<vscale x 8 x i1> %pg, ptr %ptr, i64 1, i32 %tileslice)
+ call void @llvm.aarch64.sme.ld1h.horiz(<vscale x 8 x i1> %pg, ptr %ptr, i32 0, i32 %tileslice)
+ call void @llvm.aarch64.sme.ld1h.horiz(<vscale x 8 x i1> %pg, ptr %ptr, i32 1, i32 0)
+ call void @llvm.aarch64.sme.ld1h.vert(<vscale x 8 x i1> %pg, ptr %ptr, i32 0, i32 0)
+ call void @llvm.aarch64.sme.ld1h.vert(<vscale x 8 x i1> %pg, ptr %ptr, i32 1, i32 %tileslice)
ret void;
}
; CHECK-NEXT: ret
%base = getelementptr i16, ptr %ptr, i64 %index
%tileslice = add i32 %sliceidx, 7
- call void @llvm.aarch64.sme.ld1h.horiz(<vscale x 8 x i1> %pg, ptr %base, i64 0, i32 %tileslice)
- call void @llvm.aarch64.sme.ld1h.vert(<vscale x 8 x i1> %pg, ptr %base, i64 1, i32 0)
+ call void @llvm.aarch64.sme.ld1h.horiz(<vscale x 8 x i1> %pg, ptr %base, i32 0, i32 %tileslice)
+ call void @llvm.aarch64.sme.ld1h.vert(<vscale x 8 x i1> %pg, ptr %base, i32 1, i32 0)
ret void;
}
; CHECK-NEXT: ld1w {za3v.s[w12, 0]}, p0/z, [x0]
; CHECK-NEXT: ret
%tileslice = add i32 %sliceidx, 3
- call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> %pg, ptr %ptr, i64 0, i32 0)
- call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> %pg, ptr %ptr, i64 1, i32 0)
- call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> %pg, ptr %ptr, i64 2, i32 0)
- call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> %pg, ptr %ptr, i64 3, i32 %tileslice)
- call void @llvm.aarch64.sme.ld1w.vert(<vscale x 4 x i1> %pg, ptr %ptr, i64 0, i32 0)
- call void @llvm.aarch64.sme.ld1w.vert(<vscale x 4 x i1> %pg, ptr %ptr, i64 1, i32 0)
- call void @llvm.aarch64.sme.ld1w.vert(<vscale x 4 x i1> %pg, ptr %ptr, i64 2, i32 %tileslice)
- call void @llvm.aarch64.sme.ld1w.vert(<vscale x 4 x i1> %pg, ptr %ptr, i64 3, i32 0)
+ call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> %pg, ptr %ptr, i32 0, i32 0)
+ call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> %pg, ptr %ptr, i32 1, i32 0)
+ call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> %pg, ptr %ptr, i32 2, i32 0)
+ call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> %pg, ptr %ptr, i32 3, i32 %tileslice)
+ call void @llvm.aarch64.sme.ld1w.vert(<vscale x 4 x i1> %pg, ptr %ptr, i32 0, i32 0)
+ call void @llvm.aarch64.sme.ld1w.vert(<vscale x 4 x i1> %pg, ptr %ptr, i32 1, i32 0)
+ call void @llvm.aarch64.sme.ld1w.vert(<vscale x 4 x i1> %pg, ptr %ptr, i32 2, i32 %tileslice)
+ call void @llvm.aarch64.sme.ld1w.vert(<vscale x 4 x i1> %pg, ptr %ptr, i32 3, i32 0)
ret void;
}
; CHECK-NEXT: ret
%base = getelementptr i32, ptr %ptr, i64 %index
%tileslice = add i32 %sliceidx, 3
- call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> %pg, ptr %base, i64 0, i32 0)
- call void @llvm.aarch64.sme.ld1w.vert(<vscale x 4 x i1> %pg, ptr %base, i64 3, i32 %tileslice)
+ call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> %pg, ptr %base, i32 0, i32 0)
+ call void @llvm.aarch64.sme.ld1w.vert(<vscale x 4 x i1> %pg, ptr %base, i32 3, i32 %tileslice)
ret void;
}
; CHECK-NEXT: ld1d {za7v.d[w12, 1]}, p0/z, [x0]
; CHECK-NEXT: ret
%tileslice = add i32 %sliceidx, 1
- call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> %pg, ptr %ptr, i64 0, i32 0)
- call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> %pg, ptr %ptr, i64 1, i32 0)
- call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> %pg, ptr %ptr, i64 2, i32 0)
- call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> %pg, ptr %ptr, i64 3, i32 0)
- call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> %pg, ptr %ptr, i64 4, i32 %tileslice)
- call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> %pg, ptr %ptr, i64 5, i32 0)
- call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> %pg, ptr %ptr, i64 6, i32 0)
- call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> %pg, ptr %ptr, i64 7, i32 0)
- call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> %pg, ptr %ptr, i64 0, i32 0)
- call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> %pg, ptr %ptr, i64 1, i32 0)
- call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> %pg, ptr %ptr, i64 2, i32 0)
- call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> %pg, ptr %ptr, i64 3, i32 0)
- call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> %pg, ptr %ptr, i64 4, i32 0)
- call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> %pg, ptr %ptr, i64 5, i32 0)
- call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> %pg, ptr %ptr, i64 6, i32 0)
- call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> %pg, ptr %ptr, i64 7, i32 %tileslice)
+ call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> %pg, ptr %ptr, i32 0, i32 0)
+ call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> %pg, ptr %ptr, i32 1, i32 0)
+ call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> %pg, ptr %ptr, i32 2, i32 0)
+ call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> %pg, ptr %ptr, i32 3, i32 0)
+ call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> %pg, ptr %ptr, i32 4, i32 %tileslice)
+ call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> %pg, ptr %ptr, i32 5, i32 0)
+ call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> %pg, ptr %ptr, i32 6, i32 0)
+ call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> %pg, ptr %ptr, i32 7, i32 0)
+ call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> %pg, ptr %ptr, i32 0, i32 0)
+ call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> %pg, ptr %ptr, i32 1, i32 0)
+ call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> %pg, ptr %ptr, i32 2, i32 0)
+ call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> %pg, ptr %ptr, i32 3, i32 0)
+ call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> %pg, ptr %ptr, i32 4, i32 0)
+ call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> %pg, ptr %ptr, i32 5, i32 0)
+ call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> %pg, ptr %ptr, i32 6, i32 0)
+ call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> %pg, ptr %ptr, i32 7, i32 %tileslice)
ret void;
}
; CHECK-NEXT: ret
%base = getelementptr i64, ptr %ptr, i64 %index
%tileslice = add i32 %sliceidx, 1
- call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> %pg, ptr %base, i64 0, i32 %tileslice)
- call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> %pg, ptr %base, i64 7, i32 0)
+ call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> %pg, ptr %base, i32 0, i32 %tileslice)
+ call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> %pg, ptr %base, i32 7, i32 0)
ret void;
}
; CHECK-NEXT: ld1q {za14v.q[w12, 0]}, p0/z, [x0]
; CHECK-NEXT: ld1q {za15v.q[w12, 0]}, p0/z, [x0]
; CHECK-NEXT: ret
- call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i64 0, i32 0)
- call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i64 1, i32 0)
- call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i64 2, i32 0)
- call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i64 3, i32 0)
- call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i64 4, i32 0)
- call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i64 5, i32 0)
- call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i64 6, i32 0)
- call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i64 7, i32 0)
- call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i64 8, i32 0)
- call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i64 9, i32 0)
- call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i64 10, i32 0)
- call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i64 11, i32 0)
- call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i64 12, i32 0)
- call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i64 13, i32 0)
- call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i64 14, i32 0)
- call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i64 15, i32 0)
- call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i64 0, i32 0)
- call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i64 1, i32 0)
- call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i64 2, i32 0)
- call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i64 3, i32 0)
- call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i64 4, i32 0)
- call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i64 5, i32 0)
- call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i64 6, i32 0)
- call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i64 7, i32 0)
- call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i64 8, i32 0)
- call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i64 9, i32 0)
- call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i64 10, i32 0)
- call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i64 11, i32 0)
- call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i64 12, i32 0)
- call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i64 13, i32 0)
- call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i64 14, i32 0)
- call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i64 15, i32 0)
+ call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i32 0, i32 0)
+ call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i32 1, i32 0)
+ call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i32 2, i32 0)
+ call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i32 3, i32 0)
+ call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i32 4, i32 0)
+ call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i32 5, i32 0)
+ call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i32 6, i32 0)
+ call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i32 7, i32 0)
+ call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i32 8, i32 0)
+ call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i32 9, i32 0)
+ call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i32 10, i32 0)
+ call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i32 11, i32 0)
+ call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i32 12, i32 0)
+ call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i32 13, i32 0)
+ call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i32 14, i32 0)
+ call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i32 15, i32 0)
+ call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i32 0, i32 0)
+ call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i32 1, i32 0)
+ call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i32 2, i32 0)
+ call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i32 3, i32 0)
+ call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i32 4, i32 0)
+ call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i32 5, i32 0)
+ call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i32 6, i32 0)
+ call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i32 7, i32 0)
+ call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i32 8, i32 0)
+ call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i32 9, i32 0)
+ call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i32 10, i32 0)
+ call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i32 11, i32 0)
+ call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i32 12, i32 0)
+ call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i32 13, i32 0)
+ call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i32 14, i32 0)
+ call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i32 15, i32 0)
ret void;
}
; CHECK-NEXT: ld1q {za15v.q[w12, 0]}, p0/z, [x0, x1, lsl #4]
; CHECK-NEXT: ret
%base = getelementptr i128, ptr %ptr, i64 %index
- call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> %pg, ptr %base, i64 0, i32 0)
- call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> %pg, ptr %base, i64 15, i32 0)
+ call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> %pg, ptr %base, i32 0, i32 0)
+ call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> %pg, ptr %base, i32 15, i32 0)
ret void;
}
for.body:
%i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
- call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> %pg, ptr %src, i64 0, i32 %base)
- call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> %pg, ptr %src, i64 0, i32 %add1)
- call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> %pg, ptr %src, i64 0, i32 %add2)
+ call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> %pg, ptr %src, i32 0, i32 %base)
+ call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> %pg, ptr %src, i32 0, i32 %add1)
+ call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> %pg, ptr %src, i32 0, i32 %add2)
%inc = add nuw nsw i32 %i, 1
%exitcond.not = icmp eq i32 %inc, %N
br i1 %exitcond.not, label %exit, label %for.body
}
-declare void @llvm.aarch64.sme.ld1b.horiz(<vscale x 16 x i1>, ptr, i64, i32)
-declare void @llvm.aarch64.sme.ld1h.horiz(<vscale x 8 x i1>, ptr, i64, i32)
-declare void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1>, ptr, i64, i32)
-declare void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1>, ptr, i64, i32)
-declare void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1>, ptr, i64, i32)
-declare void @llvm.aarch64.sme.ld1b.vert(<vscale x 16 x i1>, ptr, i64, i32)
-declare void @llvm.aarch64.sme.ld1h.vert(<vscale x 8 x i1>, ptr, i64, i32)
-declare void @llvm.aarch64.sme.ld1w.vert(<vscale x 4 x i1>, ptr, i64, i32)
-declare void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1>, ptr, i64, i32)
-declare void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1>, ptr, i64, i32)
+declare void @llvm.aarch64.sme.ld1b.horiz(<vscale x 16 x i1>, ptr, i32, i32)
+declare void @llvm.aarch64.sme.ld1h.horiz(<vscale x 8 x i1>, ptr, i32, i32)
+declare void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1>, ptr, i32, i32)
+declare void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1>, ptr, i32, i32)
+declare void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1>, ptr, i32, i32)
+declare void @llvm.aarch64.sme.ld1b.vert(<vscale x 16 x i1>, ptr, i32, i32)
+declare void @llvm.aarch64.sme.ld1h.vert(<vscale x 8 x i1>, ptr, i32, i32)
+declare void @llvm.aarch64.sme.ld1w.vert(<vscale x 4 x i1>, ptr, i32, i32)
+declare void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1>, ptr, i32, i32)
+declare void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1>, ptr, i32, i32)
declare void @llvm.aarch64.sme.ldr(i32, ptr)
declare i64 @llvm.vscale.i64()
; CHECK: // %bb.0:
; CHECK-NEXT: bfmopa za0.s, p0/m, p1/m, z0.h, z1.h
; CHECK-NEXT: ret
- call void @llvm.aarch64.sme.mopa.wide.nxv8bf16(i64 0, <vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm)
+ call void @llvm.aarch64.sme.mopa.wide.nxv8bf16(i32 0, <vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm)
ret void
}
; CHECK: // %bb.0:
; CHECK-NEXT: fmopa za1.s, p0/m, p1/m, z0.h, z1.h
; CHECK-NEXT: ret
- call void @llvm.aarch64.sme.mopa.wide.nxv8f16(i64 1, <vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x half> %zn, <vscale x 8 x half> %zm)
+ call void @llvm.aarch64.sme.mopa.wide.nxv8f16(i32 1, <vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x half> %zn, <vscale x 8 x half> %zm)
ret void
}
; CHECK: // %bb.0:
; CHECK-NEXT: smopa za2.s, p0/m, p1/m, z0.b, z1.b
; CHECK-NEXT: ret
- call void @llvm.aarch64.sme.smopa.wide.nxv16i8(i64 2, <vscale x 16 x i1> %pn, <vscale x 16 x i1> %pm, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
+ call void @llvm.aarch64.sme.smopa.wide.nxv16i8(i32 2, <vscale x 16 x i1> %pn, <vscale x 16 x i1> %pm, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
ret void
}
; CHECK: // %bb.0:
; CHECK-NEXT: smopa za0.d, p0/m, p1/m, z0.h, z1.h
; CHECK-NEXT: ret
- call void @llvm.aarch64.sme.smopa.wide.nxv8i16(i64 0, <vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm)
+ call void @llvm.aarch64.sme.smopa.wide.nxv8i16(i32 0, <vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm)
ret void
}
; CHECK: // %bb.0:
; CHECK-NEXT: umopa za3.s, p0/m, p1/m, z0.b, z1.b
; CHECK-NEXT: ret
- call void @llvm.aarch64.sme.umopa.wide.nxv16i8(i64 3, <vscale x 16 x i1> %pn, <vscale x 16 x i1> %pm, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
+ call void @llvm.aarch64.sme.umopa.wide.nxv16i8(i32 3, <vscale x 16 x i1> %pn, <vscale x 16 x i1> %pm, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
ret void
}
; CHECK: // %bb.0:
; CHECK-NEXT: umopa za1.d, p0/m, p1/m, z0.h, z1.h
; CHECK-NEXT: ret
- call void @llvm.aarch64.sme.umopa.wide.nxv8i16(i64 1, <vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm)
+ call void @llvm.aarch64.sme.umopa.wide.nxv8i16(i32 1, <vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm)
ret void
}
; CHECK: // %bb.0:
; CHECK-NEXT: fmopa za0.s, p0/m, p1/m, z0.s, z1.s
; CHECK-NEXT: ret
- call void @llvm.aarch64.sme.mopa.nxv4f32(i64 0, <vscale x 4 x i1> %pn, <vscale x 4 x i1> %pm, <vscale x 4 x float> %zn, <vscale x 4 x float> %zm)
+ call void @llvm.aarch64.sme.mopa.nxv4f32(i32 0, <vscale x 4 x i1> %pn, <vscale x 4 x i1> %pm, <vscale x 4 x float> %zn, <vscale x 4 x float> %zm)
ret void
}
; CHECK: // %bb.0:
; CHECK-NEXT: fmopa za2.d, p0/m, p1/m, z0.d, z1.d
; CHECK-NEXT: ret
- call void @llvm.aarch64.sme.mopa.nxv2f64(i64 2, <vscale x 2 x i1> %pn, <vscale x 2 x i1> %pm, <vscale x 2 x double> %zn, <vscale x 2 x double> %zm)
+ call void @llvm.aarch64.sme.mopa.nxv2f64(i32 2, <vscale x 2 x i1> %pn, <vscale x 2 x i1> %pm, <vscale x 2 x double> %zn, <vscale x 2 x double> %zm)
ret void
}
; CHECK: // %bb.0:
; CHECK-NEXT: sumopa za1.s, p0/m, p1/m, z0.b, z1.b
; CHECK-NEXT: ret
- call void @llvm.aarch64.sme.sumopa.wide.nxv16i8(i64 1, <vscale x 16 x i1> %pn, <vscale x 16 x i1> %pm, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
+ call void @llvm.aarch64.sme.sumopa.wide.nxv16i8(i32 1, <vscale x 16 x i1> %pn, <vscale x 16 x i1> %pm, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
ret void
}
; CHECK: // %bb.0:
; CHECK-NEXT: sumopa za3.d, p0/m, p1/m, z0.h, z1.h
; CHECK-NEXT: ret
- call void @llvm.aarch64.sme.sumopa.wide.nxv8i16(i64 3, <vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm)
+ call void @llvm.aarch64.sme.sumopa.wide.nxv8i16(i32 3, <vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm)
ret void
}
; CHECK: // %bb.0:
; CHECK-NEXT: usmopa za2.s, p0/m, p1/m, z0.b, z1.b
; CHECK-NEXT: ret
- call void @llvm.aarch64.sme.usmopa.wide.nxv16i8(i64 2, <vscale x 16 x i1> %pn, <vscale x 16 x i1> %pm, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
+ call void @llvm.aarch64.sme.usmopa.wide.nxv16i8(i32 2, <vscale x 16 x i1> %pn, <vscale x 16 x i1> %pm, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
ret void
}
; CHECK: // %bb.0:
; CHECK-NEXT: usmopa za7.d, p0/m, p1/m, z0.h, z1.h
; CHECK-NEXT: ret
- call void @llvm.aarch64.sme.usmopa.wide.nxv8i16(i64 7, <vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm)
+ call void @llvm.aarch64.sme.usmopa.wide.nxv8i16(i32 7, <vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm)
ret void
}
attributes #0 = { "target-features"="+sme-i16i64" }
attributes #1 = { "target-features"="+sme-f64f64" }
-declare void @llvm.aarch64.sme.mopa.wide.nxv8bf16(i64, <vscale x 8 x i1>, <vscale x 8 x i1>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
-declare void @llvm.aarch64.sme.mopa.wide.nxv8f16(i64, <vscale x 8 x i1>, <vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
-declare void @llvm.aarch64.sme.mopa.nxv4f32(i64, <vscale x 4 x i1>, <vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
-declare void @llvm.aarch64.sme.mopa.nxv2f64(i64, <vscale x 2 x i1>, <vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
-declare void @llvm.aarch64.sme.smopa.wide.nxv16i8(i64, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-declare void @llvm.aarch64.sme.smopa.wide.nxv8i16(i64, <vscale x 8 x i1>, <vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-declare void @llvm.aarch64.sme.umopa.wide.nxv16i8(i64, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-declare void @llvm.aarch64.sme.umopa.wide.nxv8i16(i64, <vscale x 8 x i1>, <vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-declare void @llvm.aarch64.sme.sumopa.wide.nxv16i8(i64, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-declare void @llvm.aarch64.sme.sumopa.wide.nxv8i16(i64, <vscale x 8 x i1>, <vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-declare void @llvm.aarch64.sme.usmopa.wide.nxv16i8(i64, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-declare void @llvm.aarch64.sme.usmopa.wide.nxv8i16(i64, <vscale x 8 x i1>, <vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare void @llvm.aarch64.sme.mopa.wide.nxv8bf16(i32, <vscale x 8 x i1>, <vscale x 8 x i1>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
+declare void @llvm.aarch64.sme.mopa.wide.nxv8f16(i32, <vscale x 8 x i1>, <vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
+declare void @llvm.aarch64.sme.mopa.nxv4f32(i32, <vscale x 4 x i1>, <vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+declare void @llvm.aarch64.sme.mopa.nxv2f64(i32, <vscale x 2 x i1>, <vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+declare void @llvm.aarch64.sme.smopa.wide.nxv16i8(i32, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare void @llvm.aarch64.sme.smopa.wide.nxv8i16(i32, <vscale x 8 x i1>, <vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare void @llvm.aarch64.sme.umopa.wide.nxv16i8(i32, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare void @llvm.aarch64.sme.umopa.wide.nxv8i16(i32, <vscale x 8 x i1>, <vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare void @llvm.aarch64.sme.sumopa.wide.nxv16i8(i32, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare void @llvm.aarch64.sme.sumopa.wide.nxv8i16(i32, <vscale x 8 x i1>, <vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare void @llvm.aarch64.sme.usmopa.wide.nxv16i8(i32, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare void @llvm.aarch64.sme.usmopa.wide.nxv8i16(i32, <vscale x 8 x i1>, <vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
; CHECK: // %bb.0:
; CHECK-NEXT: bfmops za0.s, p0/m, p1/m, z0.h, z1.h
; CHECK-NEXT: ret
- call void @llvm.aarch64.sme.mops.wide.nxv8bf16(i64 0, <vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm)
+ call void @llvm.aarch64.sme.mops.wide.nxv8bf16(i32 0, <vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm)
ret void
}
; CHECK: // %bb.0:
; CHECK-NEXT: fmops za1.s, p0/m, p1/m, z0.h, z1.h
; CHECK-NEXT: ret
- call void @llvm.aarch64.sme.mops.wide.nxv8f16(i64 1, <vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x half> %zn, <vscale x 8 x half> %zm)
+ call void @llvm.aarch64.sme.mops.wide.nxv8f16(i32 1, <vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x half> %zn, <vscale x 8 x half> %zm)
ret void
}
; CHECK: // %bb.0:
; CHECK-NEXT: smops za2.s, p0/m, p1/m, z0.b, z1.b
; CHECK-NEXT: ret
- call void @llvm.aarch64.sme.smops.wide.nxv16i8(i64 2, <vscale x 16 x i1> %pn, <vscale x 16 x i1> %pm, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
+ call void @llvm.aarch64.sme.smops.wide.nxv16i8(i32 2, <vscale x 16 x i1> %pn, <vscale x 16 x i1> %pm, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
ret void
}
; CHECK: // %bb.0:
; CHECK-NEXT: smops za0.d, p0/m, p1/m, z0.h, z1.h
; CHECK-NEXT: ret
- call void @llvm.aarch64.sme.smops.wide.nxv8i16(i64 0, <vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm)
+ call void @llvm.aarch64.sme.smops.wide.nxv8i16(i32 0, <vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm)
ret void
}
; CHECK: // %bb.0:
; CHECK-NEXT: umops za3.s, p0/m, p1/m, z0.b, z1.b
; CHECK-NEXT: ret
- call void @llvm.aarch64.sme.umops.wide.nxv16i8(i64 3, <vscale x 16 x i1> %pn, <vscale x 16 x i1> %pm, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
+ call void @llvm.aarch64.sme.umops.wide.nxv16i8(i32 3, <vscale x 16 x i1> %pn, <vscale x 16 x i1> %pm, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
ret void
}
; CHECK: // %bb.0:
; CHECK-NEXT: umops za1.d, p0/m, p1/m, z0.h, z1.h
; CHECK-NEXT: ret
- call void @llvm.aarch64.sme.umops.wide.nxv8i16(i64 1, <vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm)
+ call void @llvm.aarch64.sme.umops.wide.nxv8i16(i32 1, <vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm)
ret void
}
; CHECK: // %bb.0:
; CHECK-NEXT: fmops za0.s, p0/m, p1/m, z0.s, z1.s
; CHECK-NEXT: ret
- call void @llvm.aarch64.sme.mops.nxv4f32(i64 0, <vscale x 4 x i1> %pn, <vscale x 4 x i1> %pm, <vscale x 4 x float> %zn, <vscale x 4 x float> %zm)
+ call void @llvm.aarch64.sme.mops.nxv4f32(i32 0, <vscale x 4 x i1> %pn, <vscale x 4 x i1> %pm, <vscale x 4 x float> %zn, <vscale x 4 x float> %zm)
ret void
}
; CHECK: // %bb.0:
; CHECK-NEXT: fmops za2.d, p0/m, p1/m, z0.d, z1.d
; CHECK-NEXT: ret
- call void @llvm.aarch64.sme.mops.nxv2f64(i64 2, <vscale x 2 x i1> %pn, <vscale x 2 x i1> %pm, <vscale x 2 x double> %zn, <vscale x 2 x double> %zm)
+ call void @llvm.aarch64.sme.mops.nxv2f64(i32 2, <vscale x 2 x i1> %pn, <vscale x 2 x i1> %pm, <vscale x 2 x double> %zn, <vscale x 2 x double> %zm)
ret void
}
; CHECK: // %bb.0:
; CHECK-NEXT: sumops za1.s, p0/m, p1/m, z0.b, z1.b
; CHECK-NEXT: ret
- call void @llvm.aarch64.sme.sumops.wide.nxv16i8(i64 1, <vscale x 16 x i1> %pn, <vscale x 16 x i1> %pm, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
+ call void @llvm.aarch64.sme.sumops.wide.nxv16i8(i32 1, <vscale x 16 x i1> %pn, <vscale x 16 x i1> %pm, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
ret void
}
; CHECK: // %bb.0:
; CHECK-NEXT: sumops za3.d, p0/m, p1/m, z0.h, z1.h
; CHECK-NEXT: ret
- call void @llvm.aarch64.sme.sumops.wide.nxv8i16(i64 3, <vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm)
+ call void @llvm.aarch64.sme.sumops.wide.nxv8i16(i32 3, <vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm)
ret void
}
; CHECK: // %bb.0:
; CHECK-NEXT: usmops za2.s, p0/m, p1/m, z0.b, z1.b
; CHECK-NEXT: ret
- call void @llvm.aarch64.sme.usmops.wide.nxv16i8(i64 2, <vscale x 16 x i1> %pn, <vscale x 16 x i1> %pm, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
+ call void @llvm.aarch64.sme.usmops.wide.nxv16i8(i32 2, <vscale x 16 x i1> %pn, <vscale x 16 x i1> %pm, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
ret void
}
; CHECK: // %bb.0:
; CHECK-NEXT: usmops za7.d, p0/m, p1/m, z0.h, z1.h
; CHECK-NEXT: ret
- call void @llvm.aarch64.sme.usmops.wide.nxv8i16(i64 7, <vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm)
+ call void @llvm.aarch64.sme.usmops.wide.nxv8i16(i32 7, <vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm)
ret void
}
attributes #0 = { "target-features"="+sme-i16i64" }
attributes #1 = { "target-features"="+sme-f64f64" }
-declare void @llvm.aarch64.sme.mops.wide.nxv8bf16(i64, <vscale x 8 x i1>, <vscale x 8 x i1>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
-declare void @llvm.aarch64.sme.mops.wide.nxv8f16(i64, <vscale x 8 x i1>, <vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
-declare void @llvm.aarch64.sme.mops.nxv4f32(i64, <vscale x 4 x i1>, <vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
-declare void @llvm.aarch64.sme.mops.nxv2f64(i64, <vscale x 2 x i1>, <vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
-declare void @llvm.aarch64.sme.smops.wide.nxv16i8(i64, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-declare void @llvm.aarch64.sme.smops.wide.nxv8i16(i64, <vscale x 8 x i1>, <vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-declare void @llvm.aarch64.sme.umops.wide.nxv16i8(i64, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-declare void @llvm.aarch64.sme.umops.wide.nxv8i16(i64, <vscale x 8 x i1>, <vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-declare void @llvm.aarch64.sme.sumops.wide.nxv16i8(i64, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-declare void @llvm.aarch64.sme.sumops.wide.nxv8i16(i64, <vscale x 8 x i1>, <vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-declare void @llvm.aarch64.sme.usmops.wide.nxv16i8(i64, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-declare void @llvm.aarch64.sme.usmops.wide.nxv8i16(i64, <vscale x 8 x i1>, <vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare void @llvm.aarch64.sme.mops.wide.nxv8bf16(i32, <vscale x 8 x i1>, <vscale x 8 x i1>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
+declare void @llvm.aarch64.sme.mops.wide.nxv8f16(i32, <vscale x 8 x i1>, <vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
+declare void @llvm.aarch64.sme.mops.nxv4f32(i32, <vscale x 4 x i1>, <vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+declare void @llvm.aarch64.sme.mops.nxv2f64(i32, <vscale x 2 x i1>, <vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+declare void @llvm.aarch64.sme.smops.wide.nxv16i8(i32, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare void @llvm.aarch64.sme.smops.wide.nxv8i16(i32, <vscale x 8 x i1>, <vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare void @llvm.aarch64.sme.umops.wide.nxv16i8(i32, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare void @llvm.aarch64.sme.umops.wide.nxv8i16(i32, <vscale x 8 x i1>, <vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare void @llvm.aarch64.sme.sumops.wide.nxv16i8(i32, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare void @llvm.aarch64.sme.sumops.wide.nxv8i16(i32, <vscale x 8 x i1>, <vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare void @llvm.aarch64.sme.usmops.wide.nxv16i8(i32, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare void @llvm.aarch64.sme.usmops.wide.nxv8i16(i32, <vscale x 8 x i1>, <vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
; CHECK-NEXT: mov z0.b, p0/m, za0h.b[w12, 14]
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
- %z0 = call <vscale x 16 x i8> @llvm.aarch64.sme.read.horiz.nxv16i8(<vscale x 16 x i8> %zd, <vscale x 16 x i1> %pg, i64 0, i32 %tileslice)
+ %z0 = call <vscale x 16 x i8> @llvm.aarch64.sme.read.horiz.nxv16i8(<vscale x 16 x i8> %zd, <vscale x 16 x i1> %pg, i32 0, i32 %tileslice)
%tileslice.2 = add i32 %tileslice, 2
- %z1 = call <vscale x 16 x i8> @llvm.aarch64.sme.read.horiz.nxv16i8(<vscale x 16 x i8> %zd, <vscale x 16 x i1> %pg, i64 0, i32 %tileslice.2)
+ %z1 = call <vscale x 16 x i8> @llvm.aarch64.sme.read.horiz.nxv16i8(<vscale x 16 x i8> %zd, <vscale x 16 x i1> %pg, i32 0, i32 %tileslice.2)
%tileslice.4 = add i32 %tileslice, 4
- %z2 = call <vscale x 16 x i8> @llvm.aarch64.sme.read.horiz.nxv16i8(<vscale x 16 x i8> %zd, <vscale x 16 x i1> %pg, i64 0, i32 %tileslice.4)
+ %z2 = call <vscale x 16 x i8> @llvm.aarch64.sme.read.horiz.nxv16i8(<vscale x 16 x i8> %zd, <vscale x 16 x i1> %pg, i32 0, i32 %tileslice.4)
%tileslice.6 = add i32 %tileslice, 6
- %z3 = call <vscale x 16 x i8> @llvm.aarch64.sme.read.horiz.nxv16i8(<vscale x 16 x i8> %zd, <vscale x 16 x i1> %pg, i64 0, i32 %tileslice.6)
+ %z3 = call <vscale x 16 x i8> @llvm.aarch64.sme.read.horiz.nxv16i8(<vscale x 16 x i8> %zd, <vscale x 16 x i1> %pg, i32 0, i32 %tileslice.6)
%tileslice.8 = add i32 %tileslice, 8
- %z4 = call <vscale x 16 x i8> @llvm.aarch64.sme.read.horiz.nxv16i8(<vscale x 16 x i8> %zd, <vscale x 16 x i1> %pg, i64 0, i32 %tileslice.8)
+ %z4 = call <vscale x 16 x i8> @llvm.aarch64.sme.read.horiz.nxv16i8(<vscale x 16 x i8> %zd, <vscale x 16 x i1> %pg, i32 0, i32 %tileslice.8)
%tileslice.10 = add i32 %tileslice, 10
- %z5 = call <vscale x 16 x i8> @llvm.aarch64.sme.read.horiz.nxv16i8(<vscale x 16 x i8> %zd, <vscale x 16 x i1> %pg, i64 0, i32 %tileslice.10)
+ %z5 = call <vscale x 16 x i8> @llvm.aarch64.sme.read.horiz.nxv16i8(<vscale x 16 x i8> %zd, <vscale x 16 x i1> %pg, i32 0, i32 %tileslice.10)
%tileslice.12 = add i32 %tileslice, 12
- %z6 = call <vscale x 16 x i8> @llvm.aarch64.sme.read.horiz.nxv16i8(<vscale x 16 x i8> %zd, <vscale x 16 x i1> %pg, i64 0, i32 %tileslice.12)
+ %z6 = call <vscale x 16 x i8> @llvm.aarch64.sme.read.horiz.nxv16i8(<vscale x 16 x i8> %zd, <vscale x 16 x i1> %pg, i32 0, i32 %tileslice.12)
%tileslice.14 = add i32 %tileslice, 14
- %z7 = call <vscale x 16 x i8> @llvm.aarch64.sme.read.horiz.nxv16i8(<vscale x 16 x i8> %zd, <vscale x 16 x i1> %pg, i64 0, i32 %tileslice.14)
+ %z7 = call <vscale x 16 x i8> @llvm.aarch64.sme.read.horiz.nxv16i8(<vscale x 16 x i8> %zd, <vscale x 16 x i1> %pg, i32 0, i32 %tileslice.14)
ret <vscale x 16 x i8> %z0
}
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
%tileslice.1 = add i32 %tileslice, 1
- %z0 = call <vscale x 16 x i8> @llvm.aarch64.sme.read.vert.nxv16i8(<vscale x 16 x i8> %zd, <vscale x 16 x i1> %pg, i64 0, i32 %tileslice.1)
+ %z0 = call <vscale x 16 x i8> @llvm.aarch64.sme.read.vert.nxv16i8(<vscale x 16 x i8> %zd, <vscale x 16 x i1> %pg, i32 0, i32 %tileslice.1)
%tileslice.3 = add i32 %tileslice, 3
- %z1 = call <vscale x 16 x i8> @llvm.aarch64.sme.read.vert.nxv16i8(<vscale x 16 x i8> %zd, <vscale x 16 x i1> %pg, i64 0, i32 %tileslice.3)
+ %z1 = call <vscale x 16 x i8> @llvm.aarch64.sme.read.vert.nxv16i8(<vscale x 16 x i8> %zd, <vscale x 16 x i1> %pg, i32 0, i32 %tileslice.3)
%tileslice.5 = add i32 %tileslice, 5
- %z2 = call <vscale x 16 x i8> @llvm.aarch64.sme.read.vert.nxv16i8(<vscale x 16 x i8> %zd, <vscale x 16 x i1> %pg, i64 0, i32 %tileslice.5)
+ %z2 = call <vscale x 16 x i8> @llvm.aarch64.sme.read.vert.nxv16i8(<vscale x 16 x i8> %zd, <vscale x 16 x i1> %pg, i32 0, i32 %tileslice.5)
%tileslice.7 = add i32 %tileslice, 7
- %z3 = call <vscale x 16 x i8> @llvm.aarch64.sme.read.vert.nxv16i8(<vscale x 16 x i8> %zd, <vscale x 16 x i1> %pg, i64 0, i32 %tileslice.7)
+ %z3 = call <vscale x 16 x i8> @llvm.aarch64.sme.read.vert.nxv16i8(<vscale x 16 x i8> %zd, <vscale x 16 x i1> %pg, i32 0, i32 %tileslice.7)
%tileslice.9 = add i32 %tileslice, 9
- %z4 = call <vscale x 16 x i8> @llvm.aarch64.sme.read.vert.nxv16i8(<vscale x 16 x i8> %zd, <vscale x 16 x i1> %pg, i64 0, i32 %tileslice.9)
+ %z4 = call <vscale x 16 x i8> @llvm.aarch64.sme.read.vert.nxv16i8(<vscale x 16 x i8> %zd, <vscale x 16 x i1> %pg, i32 0, i32 %tileslice.9)
%tileslice.11 = add i32 %tileslice, 11
- %z5 = call <vscale x 16 x i8> @llvm.aarch64.sme.read.vert.nxv16i8(<vscale x 16 x i8> %zd, <vscale x 16 x i1> %pg, i64 0, i32 %tileslice.11)
+ %z5 = call <vscale x 16 x i8> @llvm.aarch64.sme.read.vert.nxv16i8(<vscale x 16 x i8> %zd, <vscale x 16 x i1> %pg, i32 0, i32 %tileslice.11)
%tileslice.13 = add i32 %tileslice, 13
- %z6 = call <vscale x 16 x i8> @llvm.aarch64.sme.read.vert.nxv16i8(<vscale x 16 x i8> %zd, <vscale x 16 x i1> %pg, i64 0, i32 %tileslice.13)
+ %z6 = call <vscale x 16 x i8> @llvm.aarch64.sme.read.vert.nxv16i8(<vscale x 16 x i8> %zd, <vscale x 16 x i1> %pg, i32 0, i32 %tileslice.13)
%tileslice.15 = add i32 %tileslice, 15
- %z7 = call <vscale x 16 x i8> @llvm.aarch64.sme.read.vert.nxv16i8(<vscale x 16 x i8> %zd, <vscale x 16 x i1> %pg, i64 0, i32 %tileslice.15)
+ %z7 = call <vscale x 16 x i8> @llvm.aarch64.sme.read.vert.nxv16i8(<vscale x 16 x i8> %zd, <vscale x 16 x i1> %pg, i32 0, i32 %tileslice.15)
ret <vscale x 16 x i8> %z0
}
; CHECK-NEXT: mov z0.h, p0/m, za0h.h[w12, 6]
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
- %z0 = call <vscale x 8 x i16> @llvm.aarch64.sme.read.horiz.nxv8i16(<vscale x 8 x i16> %zd, <vscale x 8 x i1> %pg, i64 0, i32 %tileslice)
+ %z0 = call <vscale x 8 x i16> @llvm.aarch64.sme.read.horiz.nxv8i16(<vscale x 8 x i16> %zd, <vscale x 8 x i1> %pg, i32 0, i32 %tileslice)
%tileslice.2 = add i32 %tileslice, 2
- %z1 = call <vscale x 8 x i16> @llvm.aarch64.sme.read.horiz.nxv8i16(<vscale x 8 x i16> %zd, <vscale x 8 x i1> %pg, i64 0, i32 %tileslice.2)
+ %z1 = call <vscale x 8 x i16> @llvm.aarch64.sme.read.horiz.nxv8i16(<vscale x 8 x i16> %zd, <vscale x 8 x i1> %pg, i32 0, i32 %tileslice.2)
%tileslice.4 = add i32 %tileslice, 4
- %z2 = call <vscale x 8 x i16> @llvm.aarch64.sme.read.horiz.nxv8i16(<vscale x 8 x i16> %zd, <vscale x 8 x i1> %pg, i64 0, i32 %tileslice.4)
+ %z2 = call <vscale x 8 x i16> @llvm.aarch64.sme.read.horiz.nxv8i16(<vscale x 8 x i16> %zd, <vscale x 8 x i1> %pg, i32 0, i32 %tileslice.4)
%tileslice.6 = add i32 %tileslice, 6
- %z3 = call <vscale x 8 x i16> @llvm.aarch64.sme.read.horiz.nxv8i16(<vscale x 8 x i16> %zd, <vscale x 8 x i1> %pg, i64 0, i32 %tileslice.6)
+ %z3 = call <vscale x 8 x i16> @llvm.aarch64.sme.read.horiz.nxv8i16(<vscale x 8 x i16> %zd, <vscale x 8 x i1> %pg, i32 0, i32 %tileslice.6)
ret <vscale x 8 x i16> %z0
}
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
%tileslice.1 = add i32 %tileslice, 1
- %z0 = call <vscale x 8 x i16> @llvm.aarch64.sme.read.vert.nxv8i16(<vscale x 8 x i16> %zd, <vscale x 8 x i1> %pg, i64 1, i32 %tileslice.1)
+ %z0 = call <vscale x 8 x i16> @llvm.aarch64.sme.read.vert.nxv8i16(<vscale x 8 x i16> %zd, <vscale x 8 x i1> %pg, i32 1, i32 %tileslice.1)
%tileslice.3 = add i32 %tileslice, 3
- %z1 = call <vscale x 8 x i16> @llvm.aarch64.sme.read.vert.nxv8i16(<vscale x 8 x i16> %zd, <vscale x 8 x i1> %pg, i64 1, i32 %tileslice.3)
+ %z1 = call <vscale x 8 x i16> @llvm.aarch64.sme.read.vert.nxv8i16(<vscale x 8 x i16> %zd, <vscale x 8 x i1> %pg, i32 1, i32 %tileslice.3)
%tileslice.5 = add i32 %tileslice, 5
- %z2 = call <vscale x 8 x i16> @llvm.aarch64.sme.read.vert.nxv8i16(<vscale x 8 x i16> %zd, <vscale x 8 x i1> %pg, i64 1, i32 %tileslice.5)
+ %z2 = call <vscale x 8 x i16> @llvm.aarch64.sme.read.vert.nxv8i16(<vscale x 8 x i16> %zd, <vscale x 8 x i1> %pg, i32 1, i32 %tileslice.5)
%tileslice.7 = add i32 %tileslice, 7
- %z3 = call <vscale x 8 x i16> @llvm.aarch64.sme.read.vert.nxv8i16(<vscale x 8 x i16> %zd, <vscale x 8 x i1> %pg, i64 1, i32 %tileslice.7)
+ %z3 = call <vscale x 8 x i16> @llvm.aarch64.sme.read.vert.nxv8i16(<vscale x 8 x i16> %zd, <vscale x 8 x i1> %pg, i32 1, i32 %tileslice.7)
ret <vscale x 8 x i16> %z0
}
; CHECK-NEXT: mov z0.h, p0/m, za0v.h[w12, 7]
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
- %z0 = call <vscale x 8 x half> @llvm.aarch64.sme.read.horiz.nxv8f16(<vscale x 8 x half> %zd, <vscale x 8 x i1> %pg, i64 0, i32 %tileslice)
+ %z0 = call <vscale x 8 x half> @llvm.aarch64.sme.read.horiz.nxv8f16(<vscale x 8 x half> %zd, <vscale x 8 x i1> %pg, i32 0, i32 %tileslice)
%tileslice.1 = add i32 %tileslice, 1
- %z1 = call <vscale x 8 x half> @llvm.aarch64.sme.read.horiz.nxv8f16(<vscale x 8 x half> %zd, <vscale x 8 x i1> %pg, i64 0, i32 %tileslice.1)
+ %z1 = call <vscale x 8 x half> @llvm.aarch64.sme.read.horiz.nxv8f16(<vscale x 8 x half> %zd, <vscale x 8 x i1> %pg, i32 0, i32 %tileslice.1)
%tileslice.2 = add i32 %tileslice, 2
- %z2 = call <vscale x 8 x half> @llvm.aarch64.sme.read.vert.nxv8f16(<vscale x 8 x half> %zd, <vscale x 8 x i1> %pg, i64 0, i32 %tileslice.2)
+ %z2 = call <vscale x 8 x half> @llvm.aarch64.sme.read.vert.nxv8f16(<vscale x 8 x half> %zd, <vscale x 8 x i1> %pg, i32 0, i32 %tileslice.2)
%tileslice.3 = add i32 %tileslice, 3
- %z3 = call <vscale x 8 x half> @llvm.aarch64.sme.read.vert.nxv8f16(<vscale x 8 x half> %zd, <vscale x 8 x i1> %pg, i64 0, i32 %tileslice.3)
+ %z3 = call <vscale x 8 x half> @llvm.aarch64.sme.read.vert.nxv8f16(<vscale x 8 x half> %zd, <vscale x 8 x i1> %pg, i32 0, i32 %tileslice.3)
%tileslice.4 = add i32 %tileslice, 4
- %z4 = call <vscale x 8 x half> @llvm.aarch64.sme.read.horiz.nxv8f16(<vscale x 8 x half> %zd, <vscale x 8 x i1> %pg, i64 0, i32 %tileslice.4)
+ %z4 = call <vscale x 8 x half> @llvm.aarch64.sme.read.horiz.nxv8f16(<vscale x 8 x half> %zd, <vscale x 8 x i1> %pg, i32 0, i32 %tileslice.4)
%tileslice.5 = add i32 %tileslice, 5
- %z5 = call <vscale x 8 x half> @llvm.aarch64.sme.read.horiz.nxv8f16(<vscale x 8 x half> %zd, <vscale x 8 x i1> %pg, i64 0, i32 %tileslice.5)
+ %z5 = call <vscale x 8 x half> @llvm.aarch64.sme.read.horiz.nxv8f16(<vscale x 8 x half> %zd, <vscale x 8 x i1> %pg, i32 0, i32 %tileslice.5)
%tileslice.6 = add i32 %tileslice, 6
- %z6 = call <vscale x 8 x half> @llvm.aarch64.sme.read.vert.nxv8f16(<vscale x 8 x half> %zd, <vscale x 8 x i1> %pg, i64 0, i32 %tileslice.6)
+ %z6 = call <vscale x 8 x half> @llvm.aarch64.sme.read.vert.nxv8f16(<vscale x 8 x half> %zd, <vscale x 8 x i1> %pg, i32 0, i32 %tileslice.6)
%tileslice.7 = add i32 %tileslice, 7
- %z7 = call <vscale x 8 x half> @llvm.aarch64.sme.read.vert.nxv8f16(<vscale x 8 x half> %zd, <vscale x 8 x i1> %pg, i64 0, i32 %tileslice.7)
+ %z7 = call <vscale x 8 x half> @llvm.aarch64.sme.read.vert.nxv8f16(<vscale x 8 x half> %zd, <vscale x 8 x i1> %pg, i32 0, i32 %tileslice.7)
ret <vscale x 8 x half> %z0
}
; CHECK-NEXT: mov z0.h, p0/m, za0v.h[w12, 7]
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
- %z0 = call <vscale x 8 x bfloat> @llvm.aarch64.sme.read.horiz.nxv8bf16(<vscale x 8 x bfloat> %zd, <vscale x 8 x i1> %pg, i64 0, i32 %tileslice)
+ %z0 = call <vscale x 8 x bfloat> @llvm.aarch64.sme.read.horiz.nxv8bf16(<vscale x 8 x bfloat> %zd, <vscale x 8 x i1> %pg, i32 0, i32 %tileslice)
%tileslice.1 = add i32 %tileslice, 1
- %z1 = call <vscale x 8 x bfloat> @llvm.aarch64.sme.read.horiz.nxv8bf16(<vscale x 8 x bfloat> %zd, <vscale x 8 x i1> %pg, i64 0, i32 %tileslice.1)
+ %z1 = call <vscale x 8 x bfloat> @llvm.aarch64.sme.read.horiz.nxv8bf16(<vscale x 8 x bfloat> %zd, <vscale x 8 x i1> %pg, i32 0, i32 %tileslice.1)
%tileslice.2 = add i32 %tileslice, 2
- %z2 = call <vscale x 8 x bfloat> @llvm.aarch64.sme.read.vert.nxv8bf16(<vscale x 8 x bfloat> %zd, <vscale x 8 x i1> %pg, i64 0, i32 %tileslice.2)
+ %z2 = call <vscale x 8 x bfloat> @llvm.aarch64.sme.read.vert.nxv8bf16(<vscale x 8 x bfloat> %zd, <vscale x 8 x i1> %pg, i32 0, i32 %tileslice.2)
%tileslice.3 = add i32 %tileslice, 3
- %z3 = call <vscale x 8 x bfloat> @llvm.aarch64.sme.read.vert.nxv8bf16(<vscale x 8 x bfloat> %zd, <vscale x 8 x i1> %pg, i64 0, i32 %tileslice.3)
+ %z3 = call <vscale x 8 x bfloat> @llvm.aarch64.sme.read.vert.nxv8bf16(<vscale x 8 x bfloat> %zd, <vscale x 8 x i1> %pg, i32 0, i32 %tileslice.3)
%tileslice.4 = add i32 %tileslice, 4
- %z4 = call <vscale x 8 x bfloat> @llvm.aarch64.sme.read.horiz.nxv8bf16(<vscale x 8 x bfloat> %zd, <vscale x 8 x i1> %pg, i64 0, i32 %tileslice.4)
+ %z4 = call <vscale x 8 x bfloat> @llvm.aarch64.sme.read.horiz.nxv8bf16(<vscale x 8 x bfloat> %zd, <vscale x 8 x i1> %pg, i32 0, i32 %tileslice.4)
%tileslice.5 = add i32 %tileslice, 5
- %z5 = call <vscale x 8 x bfloat> @llvm.aarch64.sme.read.horiz.nxv8bf16(<vscale x 8 x bfloat> %zd, <vscale x 8 x i1> %pg, i64 0, i32 %tileslice.5)
+ %z5 = call <vscale x 8 x bfloat> @llvm.aarch64.sme.read.horiz.nxv8bf16(<vscale x 8 x bfloat> %zd, <vscale x 8 x i1> %pg, i32 0, i32 %tileslice.5)
%tileslice.6 = add i32 %tileslice, 6
- %z6 = call <vscale x 8 x bfloat> @llvm.aarch64.sme.read.vert.nxv8bf16(<vscale x 8 x bfloat> %zd, <vscale x 8 x i1> %pg, i64 0, i32 %tileslice.6)
+ %z6 = call <vscale x 8 x bfloat> @llvm.aarch64.sme.read.vert.nxv8bf16(<vscale x 8 x bfloat> %zd, <vscale x 8 x i1> %pg, i32 0, i32 %tileslice.6)
%tileslice.7 = add i32 %tileslice, 7
- %z7 = call <vscale x 8 x bfloat> @llvm.aarch64.sme.read.vert.nxv8bf16(<vscale x 8 x bfloat> %zd, <vscale x 8 x i1> %pg, i64 0, i32 %tileslice.7)
+ %z7 = call <vscale x 8 x bfloat> @llvm.aarch64.sme.read.vert.nxv8bf16(<vscale x 8 x bfloat> %zd, <vscale x 8 x i1> %pg, i32 0, i32 %tileslice.7)
ret <vscale x 8 x bfloat> %z0
}
; CHECK-NEXT: mov z0.s, p0/m, za0h.s[w12, 2]
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
- %z0 = call <vscale x 4 x i32> @llvm.aarch64.sme.read.horiz.nxv4i32(<vscale x 4 x i32> %zd, <vscale x 4 x i1> %pg, i64 0, i32 %tileslice)
+ %z0 = call <vscale x 4 x i32> @llvm.aarch64.sme.read.horiz.nxv4i32(<vscale x 4 x i32> %zd, <vscale x 4 x i1> %pg, i32 0, i32 %tileslice)
%tileslice.2 = add i32 %tileslice, 2
- %z1 = call <vscale x 4 x i32> @llvm.aarch64.sme.read.horiz.nxv4i32(<vscale x 4 x i32> %zd, <vscale x 4 x i1> %pg, i64 0, i32 %tileslice.2)
+ %z1 = call <vscale x 4 x i32> @llvm.aarch64.sme.read.horiz.nxv4i32(<vscale x 4 x i32> %zd, <vscale x 4 x i1> %pg, i32 0, i32 %tileslice.2)
ret <vscale x 4 x i32> %z0
}
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
%tileslice.1 = add i32 %tileslice, 1
- %z0 = call <vscale x 4 x i32> @llvm.aarch64.sme.read.vert.nxv4i32(<vscale x 4 x i32> %zd, <vscale x 4 x i1> %pg, i64 3, i32 %tileslice.1)
+ %z0 = call <vscale x 4 x i32> @llvm.aarch64.sme.read.vert.nxv4i32(<vscale x 4 x i32> %zd, <vscale x 4 x i1> %pg, i32 3, i32 %tileslice.1)
%tileslice.3 = add i32 %tileslice, 3
- %z1 = call <vscale x 4 x i32> @llvm.aarch64.sme.read.vert.nxv4i32(<vscale x 4 x i32> %zd, <vscale x 4 x i1> %pg, i64 3, i32 %tileslice.3)
+ %z1 = call <vscale x 4 x i32> @llvm.aarch64.sme.read.vert.nxv4i32(<vscale x 4 x i32> %zd, <vscale x 4 x i1> %pg, i32 3, i32 %tileslice.3)
ret <vscale x 4 x i32> %z0
}
; CHECK-NEXT: mov z0.s, p0/m, za0v.s[w12, 3]
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
- %z0 = call <vscale x 4 x float> @llvm.aarch64.sme.read.horiz.nxv4f32(<vscale x 4 x float> %zd, <vscale x 4 x i1> %pg, i64 0, i32 %tileslice)
+ %z0 = call <vscale x 4 x float> @llvm.aarch64.sme.read.horiz.nxv4f32(<vscale x 4 x float> %zd, <vscale x 4 x i1> %pg, i32 0, i32 %tileslice)
%tileslice.1 = add i32 %tileslice, 1
- %z1 = call <vscale x 4 x float> @llvm.aarch64.sme.read.horiz.nxv4f32(<vscale x 4 x float> %zd, <vscale x 4 x i1> %pg, i64 0, i32 %tileslice.1)
+ %z1 = call <vscale x 4 x float> @llvm.aarch64.sme.read.horiz.nxv4f32(<vscale x 4 x float> %zd, <vscale x 4 x i1> %pg, i32 0, i32 %tileslice.1)
%tileslice.2 = add i32 %tileslice, 2
- %z2 = call <vscale x 4 x float> @llvm.aarch64.sme.read.vert.nxv4f32(<vscale x 4 x float> %zd, <vscale x 4 x i1> %pg, i64 0, i32 %tileslice.2)
+ %z2 = call <vscale x 4 x float> @llvm.aarch64.sme.read.vert.nxv4f32(<vscale x 4 x float> %zd, <vscale x 4 x i1> %pg, i32 0, i32 %tileslice.2)
%tileslice.3 = add i32 %tileslice, 3
- %z3 = call <vscale x 4 x float> @llvm.aarch64.sme.read.vert.nxv4f32(<vscale x 4 x float> %zd, <vscale x 4 x i1> %pg, i64 0, i32 %tileslice.3)
+ %z3 = call <vscale x 4 x float> @llvm.aarch64.sme.read.vert.nxv4f32(<vscale x 4 x float> %zd, <vscale x 4 x i1> %pg, i32 0, i32 %tileslice.3)
ret <vscale x 4 x float> %z0
}
; CHECK-NEXT: mov w12, w0
; CHECK-NEXT: mov z0.d, p0/m, za0h.d[w12, 0]
; CHECK-NEXT: ret
- %z0 = call <vscale x 2 x i64> @llvm.aarch64.sme.read.horiz.nxv2i64(<vscale x 2 x i64> %zd, <vscale x 2 x i1> %pg, i64 0, i32 %tileslice)
+ %z0 = call <vscale x 2 x i64> @llvm.aarch64.sme.read.horiz.nxv2i64(<vscale x 2 x i64> %zd, <vscale x 2 x i1> %pg, i32 0, i32 %tileslice)
ret <vscale x 2 x i64> %z0
}
; CHECK-NEXT: mov z0.d, p0/m, za1v.d[w12, 1]
; CHECK-NEXT: ret
%tileslice.1 = add i32 %tileslice, 1
- %z0 = call <vscale x 2 x i64> @llvm.aarch64.sme.read.vert.nxv2i64(<vscale x 2 x i64> %zd, <vscale x 2 x i1> %pg, i64 1, i32 %tileslice.1)
+ %z0 = call <vscale x 2 x i64> @llvm.aarch64.sme.read.vert.nxv2i64(<vscale x 2 x i64> %zd, <vscale x 2 x i1> %pg, i32 1, i32 %tileslice.1)
ret <vscale x 2 x i64> %z0
}
; CHECK-NEXT: mov z0.d, p0/m, za0v.d[w12, 1]
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
- %z0 = call <vscale x 2 x double> @llvm.aarch64.sme.read.horiz.nxv2f64(<vscale x 2 x double> %zd, <vscale x 2 x i1> %pg, i64 0, i32 %tileslice)
+ %z0 = call <vscale x 2 x double> @llvm.aarch64.sme.read.horiz.nxv2f64(<vscale x 2 x double> %zd, <vscale x 2 x i1> %pg, i32 0, i32 %tileslice)
%tileslice.1 = add i32 %tileslice, 1
- %z1 = call <vscale x 2 x double> @llvm.aarch64.sme.read.vert.nxv2f64(<vscale x 2 x double> %zd, <vscale x 2 x i1> %pg, i64 0, i32 %tileslice.1)
+ %z1 = call <vscale x 2 x double> @llvm.aarch64.sme.read.vert.nxv2f64(<vscale x 2 x double> %zd, <vscale x 2 x i1> %pg, i32 0, i32 %tileslice.1)
ret <vscale x 2 x double> %z0
}
; CHECK-NEXT: mov w12, wzr
; CHECK-NEXT: mov z0.q, p0/m, za0h.q[w12, 0]
; CHECK-NEXT: ret
- %res = call <vscale x 16 x i8> @llvm.aarch64.sme.readq.horiz.nxv16i8(<vscale x 16 x i8> %zd, <vscale x 16 x i1> %pg, i64 0, i32 0)
+ %res = call <vscale x 16 x i8> @llvm.aarch64.sme.readq.horiz.nxv16i8(<vscale x 16 x i8> %zd, <vscale x 16 x i1> %pg, i32 0, i32 0)
ret <vscale x 16 x i8> %res
}
; CHECK-NEXT: mov w12, wzr
; CHECK-NEXT: mov z0.q, p0/m, za0h.q[w12, 0]
; CHECK-NEXT: ret
- %res = call <vscale x 8 x i16> @llvm.aarch64.sme.readq.horiz.nxv8i16(<vscale x 8 x i16> %zd, <vscale x 8 x i1> %pg, i64 0, i32 0)
+ %res = call <vscale x 8 x i16> @llvm.aarch64.sme.readq.horiz.nxv8i16(<vscale x 8 x i16> %zd, <vscale x 8 x i1> %pg, i32 0, i32 0)
ret <vscale x 8 x i16> %res
}
; CHECK-NEXT: mov w12, wzr
; CHECK-NEXT: mov z0.q, p0/m, za0h.q[w12, 0]
; CHECK-NEXT: ret
- %res = call <vscale x 8 x half> @llvm.aarch64.sme.readq.horiz.nxv8f16(<vscale x 8 x half> %zd, <vscale x 8 x i1> %pg, i64 0, i32 0)
+ %res = call <vscale x 8 x half> @llvm.aarch64.sme.readq.horiz.nxv8f16(<vscale x 8 x half> %zd, <vscale x 8 x i1> %pg, i32 0, i32 0)
ret <vscale x 8 x half> %res
}
; CHECK-NEXT: mov w12, wzr
; CHECK-NEXT: mov z0.q, p0/m, za0h.q[w12, 0]
; CHECK-NEXT: ret
- %res = call <vscale x 4 x i32> @llvm.aarch64.sme.readq.horiz.nxv4i32(<vscale x 4 x i32> %zd, <vscale x 4 x i1> %pg, i64 0, i32 0)
+ %res = call <vscale x 4 x i32> @llvm.aarch64.sme.readq.horiz.nxv4i32(<vscale x 4 x i32> %zd, <vscale x 4 x i1> %pg, i32 0, i32 0)
ret <vscale x 4 x i32> %res
}
; CHECK-NEXT: mov w12, wzr
; CHECK-NEXT: mov z0.q, p0/m, za0h.q[w12, 0]
; CHECK-NEXT: ret
- %res = call <vscale x 4 x float> @llvm.aarch64.sme.readq.horiz.nxv4f32(<vscale x 4 x float> %zd, <vscale x 4 x i1> %pg, i64 0, i32 0)
+ %res = call <vscale x 4 x float> @llvm.aarch64.sme.readq.horiz.nxv4f32(<vscale x 4 x float> %zd, <vscale x 4 x i1> %pg, i32 0, i32 0)
ret <vscale x 4 x float> %res
}
; CHECK-NEXT: mov w12, wzr
; CHECK-NEXT: mov z0.q, p0/m, za0h.q[w12, 0]
; CHECK-NEXT: ret
- %res = call <vscale x 2 x i64> @llvm.aarch64.sme.readq.horiz.nxv2i64(<vscale x 2 x i64> %zd, <vscale x 2 x i1> %pg, i64 0, i32 0)
+ %res = call <vscale x 2 x i64> @llvm.aarch64.sme.readq.horiz.nxv2i64(<vscale x 2 x i64> %zd, <vscale x 2 x i1> %pg, i32 0, i32 0)
ret <vscale x 2 x i64> %res
}
; CHECK-NEXT: mov w12, wzr
; CHECK-NEXT: mov z0.q, p0/m, za0h.q[w12, 0]
; CHECK-NEXT: ret
- %res = call <vscale x 2 x double> @llvm.aarch64.sme.readq.horiz.nxv2f64(<vscale x 2 x double> %zd, <vscale x 2 x i1> %pg, i64 0, i32 0)
+ %res = call <vscale x 2 x double> @llvm.aarch64.sme.readq.horiz.nxv2f64(<vscale x 2 x double> %zd, <vscale x 2 x i1> %pg, i32 0, i32 0)
ret <vscale x 2 x double> %res
}
; CHECK-NEXT: mov w12, wzr
; CHECK-NEXT: mov z0.q, p0/m, za15v.q[w12, 0]
; CHECK-NEXT: ret
- %res = call <vscale x 16 x i8> @llvm.aarch64.sme.readq.vert.nxv16i8(<vscale x 16 x i8> %zd, <vscale x 16 x i1> %pg, i64 15, i32 0)
+ %res = call <vscale x 16 x i8> @llvm.aarch64.sme.readq.vert.nxv16i8(<vscale x 16 x i8> %zd, <vscale x 16 x i1> %pg, i32 15, i32 0)
ret <vscale x 16 x i8> %res
}
; CHECK-NEXT: mov w12, wzr
; CHECK-NEXT: mov z0.q, p0/m, za15v.q[w12, 0]
; CHECK-NEXT: ret
- %res = call <vscale x 8 x i16> @llvm.aarch64.sme.readq.vert.nxv8i16(<vscale x 8 x i16> %zd, <vscale x 8 x i1> %pg, i64 15, i32 0)
+ %res = call <vscale x 8 x i16> @llvm.aarch64.sme.readq.vert.nxv8i16(<vscale x 8 x i16> %zd, <vscale x 8 x i1> %pg, i32 15, i32 0)
ret <vscale x 8 x i16> %res
}
; CHECK-NEXT: mov w12, wzr
; CHECK-NEXT: mov z0.q, p0/m, za15v.q[w12, 0]
; CHECK-NEXT: ret
- %res = call <vscale x 8 x half> @llvm.aarch64.sme.readq.vert.nxv8f16(<vscale x 8 x half> %zd, <vscale x 8 x i1> %pg, i64 15, i32 0)
+ %res = call <vscale x 8 x half> @llvm.aarch64.sme.readq.vert.nxv8f16(<vscale x 8 x half> %zd, <vscale x 8 x i1> %pg, i32 15, i32 0)
ret <vscale x 8 x half> %res
}
; CHECK-NEXT: mov w12, wzr
; CHECK-NEXT: mov z0.q, p0/m, za15v.q[w12, 0]
; CHECK-NEXT: ret
- %res = call <vscale x 4 x i32> @llvm.aarch64.sme.readq.vert.nxv4i32(<vscale x 4 x i32> %zd, <vscale x 4 x i1> %pg, i64 15, i32 0)
+ %res = call <vscale x 4 x i32> @llvm.aarch64.sme.readq.vert.nxv4i32(<vscale x 4 x i32> %zd, <vscale x 4 x i1> %pg, i32 15, i32 0)
ret <vscale x 4 x i32> %res
}
; CHECK-NEXT: mov w12, wzr
; CHECK-NEXT: mov z0.q, p0/m, za15v.q[w12, 0]
; CHECK-NEXT: ret
- %res = call <vscale x 4 x float> @llvm.aarch64.sme.readq.vert.nxv4f32(<vscale x 4 x float> %zd, <vscale x 4 x i1> %pg, i64 15, i32 0)
+ %res = call <vscale x 4 x float> @llvm.aarch64.sme.readq.vert.nxv4f32(<vscale x 4 x float> %zd, <vscale x 4 x i1> %pg, i32 15, i32 0)
ret <vscale x 4 x float> %res
}
; CHECK-NEXT: mov w12, wzr
; CHECK-NEXT: mov z0.q, p0/m, za15v.q[w12, 0]
; CHECK-NEXT: ret
- %res = call <vscale x 2 x i64> @llvm.aarch64.sme.readq.vert.nxv2i64(<vscale x 2 x i64> %zd, <vscale x 2 x i1> %pg, i64 15, i32 0)
+ %res = call <vscale x 2 x i64> @llvm.aarch64.sme.readq.vert.nxv2i64(<vscale x 2 x i64> %zd, <vscale x 2 x i1> %pg, i32 15, i32 0)
ret <vscale x 2 x i64> %res
}
; CHECK-NEXT: mov w12, wzr
; CHECK-NEXT: mov z0.q, p0/m, za15v.q[w12, 0]
; CHECK-NEXT: ret
- %res = call <vscale x 2 x double> @llvm.aarch64.sme.readq.vert.nxv2f64(<vscale x 2 x double> %zd, <vscale x 2 x i1> %pg, i64 15, i32 0)
+ %res = call <vscale x 2 x double> @llvm.aarch64.sme.readq.vert.nxv2f64(<vscale x 2 x double> %zd, <vscale x 2 x i1> %pg, i32 15, i32 0)
ret <vscale x 2 x double> %res
}
for.body:
%i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
- %z0 = call <vscale x 4 x i32> @llvm.aarch64.sme.read.horiz.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> %pg, i64 0, i32 %base)
- %z1 = call <vscale x 4 x i32> @llvm.aarch64.sme.read.horiz.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> %pg, i64 0, i32 %add1)
- %z2 = call <vscale x 4 x i32> @llvm.aarch64.sme.read.horiz.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> %pg, i64 0, i32 %add2)
+ %z0 = call <vscale x 4 x i32> @llvm.aarch64.sme.read.horiz.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> %pg, i32 0, i32 %base)
+ %z1 = call <vscale x 4 x i32> @llvm.aarch64.sme.read.horiz.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> %pg, i32 0, i32 %add1)
+ %z2 = call <vscale x 4 x i32> @llvm.aarch64.sme.read.horiz.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> %pg, i32 0, i32 %add2)
%inc = add nuw nsw i32 %i, 3
%exitcond.not = icmp eq i32 %inc, %N
br i1 %exitcond.not, label %exit, label %for.body
ret <vscale x 4 x i32> %res
}
-declare <vscale x 16 x i8> @llvm.aarch64.sme.read.horiz.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, i64, i32)
-declare <vscale x 8 x i16> @llvm.aarch64.sme.read.horiz.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i64, i32)
-declare <vscale x 8 x half> @llvm.aarch64.sme.read.horiz.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, i64, i32)
-declare <vscale x 8 x bfloat> @llvm.aarch64.sme.read.horiz.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i1>, i64, i32)
-declare <vscale x 4 x i32> @llvm.aarch64.sme.read.horiz.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i64, i32)
-declare <vscale x 4 x float> @llvm.aarch64.sme.read.horiz.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, i64, i32)
-declare <vscale x 2 x i64> @llvm.aarch64.sme.read.horiz.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i64, i32)
-declare <vscale x 2 x double> @llvm.aarch64.sme.read.horiz.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i64, i32)
-declare <vscale x 16 x i8> @llvm.aarch64.sme.read.vert.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, i64, i32)
-declare <vscale x 8 x i16> @llvm.aarch64.sme.read.vert.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i64, i32)
-declare <vscale x 8 x half> @llvm.aarch64.sme.read.vert.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, i64, i32)
-declare <vscale x 8 x bfloat> @llvm.aarch64.sme.read.vert.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i1>, i64, i32)
-declare <vscale x 4 x i32> @llvm.aarch64.sme.read.vert.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i64, i32)
-declare <vscale x 4 x float> @llvm.aarch64.sme.read.vert.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, i64, i32)
-declare <vscale x 2 x i64> @llvm.aarch64.sme.read.vert.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i64, i32)
-declare <vscale x 2 x double> @llvm.aarch64.sme.read.vert.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i64, i32)
+declare <vscale x 16 x i8> @llvm.aarch64.sme.read.horiz.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, i32, i32)
+declare <vscale x 8 x i16> @llvm.aarch64.sme.read.horiz.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i32, i32)
+declare <vscale x 8 x half> @llvm.aarch64.sme.read.horiz.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, i32, i32)
+declare <vscale x 8 x bfloat> @llvm.aarch64.sme.read.horiz.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i1>, i32, i32)
+declare <vscale x 4 x i32> @llvm.aarch64.sme.read.horiz.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32, i32)
+declare <vscale x 4 x float> @llvm.aarch64.sme.read.horiz.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, i32, i32)
+declare <vscale x 2 x i64> @llvm.aarch64.sme.read.horiz.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i32, i32)
+declare <vscale x 2 x double> @llvm.aarch64.sme.read.horiz.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32, i32)
+declare <vscale x 16 x i8> @llvm.aarch64.sme.read.vert.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, i32, i32)
+declare <vscale x 8 x i16> @llvm.aarch64.sme.read.vert.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i32, i32)
+declare <vscale x 8 x half> @llvm.aarch64.sme.read.vert.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, i32, i32)
+declare <vscale x 8 x bfloat> @llvm.aarch64.sme.read.vert.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i1>, i32, i32)
+declare <vscale x 4 x i32> @llvm.aarch64.sme.read.vert.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32, i32)
+declare <vscale x 4 x float> @llvm.aarch64.sme.read.vert.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, i32, i32)
+declare <vscale x 2 x i64> @llvm.aarch64.sme.read.vert.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i32, i32)
+declare <vscale x 2 x double> @llvm.aarch64.sme.read.vert.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32, i32)
-declare <vscale x 16 x i8> @llvm.aarch64.sme.readq.horiz.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, i64, i32)
-declare <vscale x 8 x i16> @llvm.aarch64.sme.readq.horiz.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i64, i32)
-declare <vscale x 8 x half> @llvm.aarch64.sme.readq.horiz.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, i64, i32)
-declare <vscale x 8 x bfloat> @llvm.aarch64.sme.readq.horiz.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i1>, i64, i32)
-declare <vscale x 4 x i32> @llvm.aarch64.sme.readq.horiz.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i64, i32)
-declare <vscale x 4 x float> @llvm.aarch64.sme.readq.horiz.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, i64, i32)
-declare <vscale x 2 x i64> @llvm.aarch64.sme.readq.horiz.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i64, i32)
-declare <vscale x 2 x double> @llvm.aarch64.sme.readq.horiz.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i64, i32)
-declare <vscale x 16 x i8> @llvm.aarch64.sme.readq.vert.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, i64, i32)
-declare <vscale x 8 x i16> @llvm.aarch64.sme.readq.vert.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i64, i32)
-declare <vscale x 8 x half> @llvm.aarch64.sme.readq.vert.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, i64, i32)
-declare <vscale x 8 x bfloat> @llvm.aarch64.sme.readq.vert.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i1>, i64, i32)
-declare <vscale x 4 x i32> @llvm.aarch64.sme.readq.vert.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i64, i32)
-declare <vscale x 4 x float> @llvm.aarch64.sme.readq.vert.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, i64, i32)
-declare <vscale x 2 x i64> @llvm.aarch64.sme.readq.vert.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i64, i32)
-declare <vscale x 2 x double> @llvm.aarch64.sme.readq.vert.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i64, i32)
+declare <vscale x 16 x i8> @llvm.aarch64.sme.readq.horiz.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, i32, i32)
+declare <vscale x 8 x i16> @llvm.aarch64.sme.readq.horiz.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i32, i32)
+declare <vscale x 8 x half> @llvm.aarch64.sme.readq.horiz.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, i32, i32)
+declare <vscale x 8 x bfloat> @llvm.aarch64.sme.readq.horiz.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i1>, i32, i32)
+declare <vscale x 4 x i32> @llvm.aarch64.sme.readq.horiz.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32, i32)
+declare <vscale x 4 x float> @llvm.aarch64.sme.readq.horiz.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, i32, i32)
+declare <vscale x 2 x i64> @llvm.aarch64.sme.readq.horiz.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i32, i32)
+declare <vscale x 2 x double> @llvm.aarch64.sme.readq.horiz.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32, i32)
+declare <vscale x 16 x i8> @llvm.aarch64.sme.readq.vert.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, i32, i32)
+declare <vscale x 8 x i16> @llvm.aarch64.sme.readq.vert.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i32, i32)
+declare <vscale x 8 x half> @llvm.aarch64.sme.readq.vert.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, i32, i32)
+declare <vscale x 8 x bfloat> @llvm.aarch64.sme.readq.vert.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i1>, i32, i32)
+declare <vscale x 4 x i32> @llvm.aarch64.sme.readq.vert.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32, i32)
+declare <vscale x 4 x float> @llvm.aarch64.sme.readq.vert.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, i32, i32)
+declare <vscale x 2 x i64> @llvm.aarch64.sme.readq.vert.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i32, i32)
+declare <vscale x 2 x double> @llvm.aarch64.sme.readq.vert.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32, i32)
<vscale x 16 x i8> %z2, <vscale x 16 x i8> %z3,
<vscale x 16 x i8> %z4, <vscale x 16 x i8> %z5,
<vscale x 16 x i8> %z6, <vscale x 16 x i8> %z7) {
- call void @llvm.aarch64.sme.write.horiz.nxv16i8(i64 0, i32 %tileslice, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %z0)
+ call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 %tileslice, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %z0)
%tileslice.2 = add i32 %tileslice, 2
- call void @llvm.aarch64.sme.write.horiz.nxv16i8(i64 0, i32 %tileslice.2, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %z1)
+ call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 %tileslice.2, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %z1)
%tileslice.4 = add i32 %tileslice, 4
- call void @llvm.aarch64.sme.write.horiz.nxv16i8(i64 0, i32 %tileslice.4, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %z2)
+ call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 %tileslice.4, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %z2)
%tileslice.6 = add i32 %tileslice, 6
- call void @llvm.aarch64.sme.write.horiz.nxv16i8(i64 0, i32 %tileslice.6, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %z3)
+ call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 %tileslice.6, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %z3)
%tileslice.8 = add i32 %tileslice, 8
- call void @llvm.aarch64.sme.write.horiz.nxv16i8(i64 0, i32 %tileslice.8, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %z4)
+ call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 %tileslice.8, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %z4)
%tileslice.10 = add i32 %tileslice, 10
- call void @llvm.aarch64.sme.write.horiz.nxv16i8(i64 0, i32 %tileslice.10, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %z5)
+ call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 %tileslice.10, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %z5)
%tileslice.12 = add i32 %tileslice, 12
- call void @llvm.aarch64.sme.write.horiz.nxv16i8(i64 0, i32 %tileslice.12, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %z6)
+ call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 %tileslice.12, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %z6)
%tileslice.14 = add i32 %tileslice, 14
- call void @llvm.aarch64.sme.write.horiz.nxv16i8(i64 0, i32 %tileslice.14, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %z7)
+ call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 %tileslice.14, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %z7)
ret void
}
<vscale x 16 x i8> %z4, <vscale x 16 x i8> %z5,
<vscale x 16 x i8> %z6, <vscale x 16 x i8> %z7) {
%tileslice.1 = add i32 %tileslice, 1
- call void @llvm.aarch64.sme.write.vert.nxv16i8(i64 0, i32 %tileslice.1, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %z0)
+ call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 %tileslice.1, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %z0)
%tileslice.3 = add i32 %tileslice, 3
- call void @llvm.aarch64.sme.write.vert.nxv16i8(i64 0, i32 %tileslice.3, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %z1)
+ call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 %tileslice.3, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %z1)
%tileslice.5 = add i32 %tileslice, 5
- call void @llvm.aarch64.sme.write.vert.nxv16i8(i64 0, i32 %tileslice.5, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %z2)
+ call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 %tileslice.5, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %z2)
%tileslice.7 = add i32 %tileslice, 7
- call void @llvm.aarch64.sme.write.vert.nxv16i8(i64 0, i32 %tileslice.7, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %z3)
+ call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 %tileslice.7, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %z3)
%tileslice.9 = add i32 %tileslice, 9
- call void @llvm.aarch64.sme.write.vert.nxv16i8(i64 0, i32 %tileslice.9, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %z4)
+ call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 %tileslice.9, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %z4)
%tileslice.11 = add i32 %tileslice, 11
- call void @llvm.aarch64.sme.write.vert.nxv16i8(i64 0, i32 %tileslice.11, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %z5)
+ call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 %tileslice.11, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %z5)
%tileslice.13 = add i32 %tileslice, 13
- call void @llvm.aarch64.sme.write.vert.nxv16i8(i64 0, i32 %tileslice.13, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %z6)
+ call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 %tileslice.13, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %z6)
%tileslice.15 = add i32 %tileslice, 15
- call void @llvm.aarch64.sme.write.vert.nxv16i8(i64 0, i32 %tileslice.15, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %z7)
+ call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 %tileslice.15, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %z7)
ret void
}
<vscale x 8 x i16> %z2, <vscale x 8 x i16> %z3,
<vscale x 8 x i16> %z4, <vscale x 8 x i16> %z5,
<vscale x 8 x i16> %z6, <vscale x 8 x i16> %z7) {
- call void @llvm.aarch64.sme.write.horiz.nxv8i16(i64 0, i32 %tileslice, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %z0)
+ call void @llvm.aarch64.sme.write.horiz.nxv8i16(i32 0, i32 %tileslice, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %z0)
%tileslice.2 = add i32 %tileslice, 2
- call void @llvm.aarch64.sme.write.horiz.nxv8i16(i64 0, i32 %tileslice.2, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %z2)
+ call void @llvm.aarch64.sme.write.horiz.nxv8i16(i32 0, i32 %tileslice.2, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %z2)
%tileslice.4 = add i32 %tileslice, 4
- call void @llvm.aarch64.sme.write.horiz.nxv8i16(i64 0, i32 %tileslice.4, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %z4)
+ call void @llvm.aarch64.sme.write.horiz.nxv8i16(i32 0, i32 %tileslice.4, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %z4)
%tileslice.6 = add i32 %tileslice, 6
- call void @llvm.aarch64.sme.write.horiz.nxv8i16(i64 0, i32 %tileslice.6, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %z6)
+ call void @llvm.aarch64.sme.write.horiz.nxv8i16(i32 0, i32 %tileslice.6, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %z6)
ret void
}
<vscale x 8 x i16> %z4, <vscale x 8 x i16> %z5,
<vscale x 8 x i16> %z6, <vscale x 8 x i16> %z7) {
%tileslice.1 = add i32 %tileslice, 1
- call void @llvm.aarch64.sme.write.vert.nxv8i16(i64 1, i32 %tileslice.1, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %z1)
+ call void @llvm.aarch64.sme.write.vert.nxv8i16(i32 1, i32 %tileslice.1, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %z1)
%tileslice.3 = add i32 %tileslice, 3
- call void @llvm.aarch64.sme.write.vert.nxv8i16(i64 1, i32 %tileslice.3, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %z3)
+ call void @llvm.aarch64.sme.write.vert.nxv8i16(i32 1, i32 %tileslice.3, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %z3)
%tileslice.5 = add i32 %tileslice, 5
- call void @llvm.aarch64.sme.write.vert.nxv8i16(i64 1, i32 %tileslice.5, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %z5)
+ call void @llvm.aarch64.sme.write.vert.nxv8i16(i32 1, i32 %tileslice.5, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %z5)
%tileslice.7 = add i32 %tileslice, 7
- call void @llvm.aarch64.sme.write.vert.nxv8i16(i64 1, i32 %tileslice.7, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %z7)
+ call void @llvm.aarch64.sme.write.vert.nxv8i16(i32 1, i32 %tileslice.7, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %z7)
ret void
}
<vscale x 8 x half> %z2, <vscale x 8 x half> %z3,
<vscale x 8 x half> %z4, <vscale x 8 x half> %z5,
<vscale x 8 x half> %z6, <vscale x 8 x half> %z7) {
- call void @llvm.aarch64.sme.write.horiz.nxv8f16(i64 0, i32 %tileslice, <vscale x 8 x i1> %pg, <vscale x 8 x half> %z0)
+ call void @llvm.aarch64.sme.write.horiz.nxv8f16(i32 0, i32 %tileslice, <vscale x 8 x i1> %pg, <vscale x 8 x half> %z0)
%tileslice.1 = add i32 %tileslice, 1
- call void @llvm.aarch64.sme.write.horiz.nxv8f16(i64 0, i32 %tileslice.1, <vscale x 8 x i1> %pg, <vscale x 8 x half> %z1)
+ call void @llvm.aarch64.sme.write.horiz.nxv8f16(i32 0, i32 %tileslice.1, <vscale x 8 x i1> %pg, <vscale x 8 x half> %z1)
%tileslice.2 = add i32 %tileslice, 2
- call void @llvm.aarch64.sme.write.vert.nxv8f16(i64 0, i32 %tileslice.2, <vscale x 8 x i1> %pg, <vscale x 8 x half> %z2)
+ call void @llvm.aarch64.sme.write.vert.nxv8f16(i32 0, i32 %tileslice.2, <vscale x 8 x i1> %pg, <vscale x 8 x half> %z2)
%tileslice.3 = add i32 %tileslice, 3
- call void @llvm.aarch64.sme.write.vert.nxv8f16(i64 0, i32 %tileslice.3, <vscale x 8 x i1> %pg, <vscale x 8 x half> %z3)
+ call void @llvm.aarch64.sme.write.vert.nxv8f16(i32 0, i32 %tileslice.3, <vscale x 8 x i1> %pg, <vscale x 8 x half> %z3)
%tileslice.4 = add i32 %tileslice, 4
- call void @llvm.aarch64.sme.write.horiz.nxv8f16(i64 0, i32 %tileslice.4, <vscale x 8 x i1> %pg, <vscale x 8 x half> %z4)
+ call void @llvm.aarch64.sme.write.horiz.nxv8f16(i32 0, i32 %tileslice.4, <vscale x 8 x i1> %pg, <vscale x 8 x half> %z4)
%tileslice.5 = add i32 %tileslice, 5
- call void @llvm.aarch64.sme.write.horiz.nxv8f16(i64 0, i32 %tileslice.5, <vscale x 8 x i1> %pg, <vscale x 8 x half> %z5)
+ call void @llvm.aarch64.sme.write.horiz.nxv8f16(i32 0, i32 %tileslice.5, <vscale x 8 x i1> %pg, <vscale x 8 x half> %z5)
%tileslice.6 = add i32 %tileslice, 6
- call void @llvm.aarch64.sme.write.vert.nxv8f16(i64 0, i32 %tileslice.6, <vscale x 8 x i1> %pg, <vscale x 8 x half> %z6)
+ call void @llvm.aarch64.sme.write.vert.nxv8f16(i32 0, i32 %tileslice.6, <vscale x 8 x i1> %pg, <vscale x 8 x half> %z6)
%tileslice.7 = add i32 %tileslice, 7
- call void @llvm.aarch64.sme.write.vert.nxv8f16(i64 0, i32 %tileslice.7, <vscale x 8 x i1> %pg, <vscale x 8 x half> %z7)
+ call void @llvm.aarch64.sme.write.vert.nxv8f16(i32 0, i32 %tileslice.7, <vscale x 8 x i1> %pg, <vscale x 8 x half> %z7)
ret void
}
<vscale x 8 x bfloat> %z2, <vscale x 8 x bfloat> %z3,
<vscale x 8 x bfloat> %z4, <vscale x 8 x bfloat> %z5,
<vscale x 8 x bfloat> %z6, <vscale x 8 x bfloat> %z7) {
- call void @llvm.aarch64.sme.write.horiz.nxv8bf16(i64 0, i32 %tileslice, <vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %z0)
+ call void @llvm.aarch64.sme.write.horiz.nxv8bf16(i32 0, i32 %tileslice, <vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %z0)
%tileslice.1 = add i32 %tileslice, 1
- call void @llvm.aarch64.sme.write.horiz.nxv8bf16(i64 0, i32 %tileslice.1, <vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %z1)
+ call void @llvm.aarch64.sme.write.horiz.nxv8bf16(i32 0, i32 %tileslice.1, <vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %z1)
%tileslice.2 = add i32 %tileslice, 2
- call void @llvm.aarch64.sme.write.vert.nxv8bf16(i64 0, i32 %tileslice.2, <vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %z2)
+ call void @llvm.aarch64.sme.write.vert.nxv8bf16(i32 0, i32 %tileslice.2, <vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %z2)
%tileslice.3 = add i32 %tileslice, 3
- call void @llvm.aarch64.sme.write.vert.nxv8bf16(i64 0, i32 %tileslice.3, <vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %z3)
+ call void @llvm.aarch64.sme.write.vert.nxv8bf16(i32 0, i32 %tileslice.3, <vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %z3)
%tileslice.4 = add i32 %tileslice, 4
- call void @llvm.aarch64.sme.write.horiz.nxv8bf16(i64 0, i32 %tileslice.4, <vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %z4)
+ call void @llvm.aarch64.sme.write.horiz.nxv8bf16(i32 0, i32 %tileslice.4, <vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %z4)
%tileslice.5 = add i32 %tileslice, 5
- call void @llvm.aarch64.sme.write.horiz.nxv8bf16(i64 0, i32 %tileslice.5, <vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %z5)
+ call void @llvm.aarch64.sme.write.horiz.nxv8bf16(i32 0, i32 %tileslice.5, <vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %z5)
%tileslice.6 = add i32 %tileslice, 6
- call void @llvm.aarch64.sme.write.vert.nxv8bf16(i64 0, i32 %tileslice.6, <vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %z6)
+ call void @llvm.aarch64.sme.write.vert.nxv8bf16(i32 0, i32 %tileslice.6, <vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %z6)
%tileslice.7 = add i32 %tileslice, 7
- call void @llvm.aarch64.sme.write.vert.nxv8bf16(i64 0, i32 %tileslice.7, <vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %z7)
+ call void @llvm.aarch64.sme.write.vert.nxv8bf16(i32 0, i32 %tileslice.7, <vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %z7)
ret void
}
; CHECK-NEXT: ret
<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
<vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3) {
- call void @llvm.aarch64.sme.write.horiz.nxv4i32(i64 0, i32 %tileslice, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %z0)
+ call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 0, i32 %tileslice, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %z0)
%tileslice.2 = add i32 %tileslice, 2
- call void @llvm.aarch64.sme.write.horiz.nxv4i32(i64 0, i32 %tileslice.2, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %z2)
+ call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 0, i32 %tileslice.2, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %z2)
ret void
}
<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
<vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3) {
%tileslice.1 = add i32 %tileslice, 1
- call void @llvm.aarch64.sme.write.vert.nxv4i32(i64 3, i32 %tileslice.1, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %z1)
+ call void @llvm.aarch64.sme.write.vert.nxv4i32(i32 3, i32 %tileslice.1, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %z1)
%tileslice.3 = add i32 %tileslice, 3
- call void @llvm.aarch64.sme.write.vert.nxv4i32(i64 3, i32 %tileslice.3, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %z3)
+ call void @llvm.aarch64.sme.write.vert.nxv4i32(i32 3, i32 %tileslice.3, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %z3)
ret void
}
; CHECK-NEXT: ret
<vscale x 4 x float> %z0, <vscale x 4 x float> %z1,
<vscale x 4 x float> %z2, <vscale x 4 x float> %z3) {
- call void @llvm.aarch64.sme.write.horiz.nxv4f32(i64 0, i32 %tileslice, <vscale x 4 x i1> %pg, <vscale x 4 x float> %z0)
+ call void @llvm.aarch64.sme.write.horiz.nxv4f32(i32 0, i32 %tileslice, <vscale x 4 x i1> %pg, <vscale x 4 x float> %z0)
%tileslice.1 = add i32 %tileslice, 1
- call void @llvm.aarch64.sme.write.horiz.nxv4f32(i64 0, i32 %tileslice.1, <vscale x 4 x i1> %pg, <vscale x 4 x float> %z1)
+ call void @llvm.aarch64.sme.write.horiz.nxv4f32(i32 0, i32 %tileslice.1, <vscale x 4 x i1> %pg, <vscale x 4 x float> %z1)
%tileslice.2 = add i32 %tileslice, 2
- call void @llvm.aarch64.sme.write.vert.nxv4f32(i64 0, i32 %tileslice.2, <vscale x 4 x i1> %pg, <vscale x 4 x float> %z2)
+ call void @llvm.aarch64.sme.write.vert.nxv4f32(i32 0, i32 %tileslice.2, <vscale x 4 x i1> %pg, <vscale x 4 x float> %z2)
%tileslice.3 = add i32 %tileslice, 3
- call void @llvm.aarch64.sme.write.vert.nxv4f32(i64 0, i32 %tileslice.3, <vscale x 4 x i1> %pg, <vscale x 4 x float> %z3)
+ call void @llvm.aarch64.sme.write.vert.nxv4f32(i32 0, i32 %tileslice.3, <vscale x 4 x i1> %pg, <vscale x 4 x float> %z3)
ret void
}
; CHECK-NEXT: mov za0h.d[w12, 0], p0/m, z0.d
; CHECK-NEXT: ret
<vscale x 2 x i64> %z0, <vscale x 2 x i64> %z1) {
- call void @llvm.aarch64.sme.write.horiz.nxv2i64(i64 0, i32 %tileslice, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %z0)
+ call void @llvm.aarch64.sme.write.horiz.nxv2i64(i32 0, i32 %tileslice, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %z0)
ret void
}
; CHECK-NEXT: ret
<vscale x 2 x i64> %z0, <vscale x 2 x i64> %z1) {
%tileslice.1 = add i32 %tileslice, 1
- call void @llvm.aarch64.sme.write.vert.nxv2i64(i64 7, i32 %tileslice.1, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %z1)
+ call void @llvm.aarch64.sme.write.vert.nxv2i64(i32 7, i32 %tileslice.1, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %z1)
ret void
}
; CHECK-NEXT: mov za0v.d[w12, 1], p0/m, z1.d
; CHECK-NEXT: ret
<vscale x 2 x double> %z0, <vscale x 2 x double> %z1) {
- call void @llvm.aarch64.sme.write.horiz.nxv2f64(i64 0, i32 %tileslice, <vscale x 2 x i1> %pg, <vscale x 2 x double> %z0)
+ call void @llvm.aarch64.sme.write.horiz.nxv2f64(i32 0, i32 %tileslice, <vscale x 2 x i1> %pg, <vscale x 2 x double> %z0)
%tileslice.1 = add i32 %tileslice, 1
- call void @llvm.aarch64.sme.write.vert.nxv2f64(i64 0, i32 %tileslice.1, <vscale x 2 x i1> %pg, <vscale x 2 x double> %z1)
+ call void @llvm.aarch64.sme.write.vert.nxv2f64(i32 0, i32 %tileslice.1, <vscale x 2 x i1> %pg, <vscale x 2 x double> %z1)
ret void
}
; CHECK-NEXT: mov w12, wzr
; CHECK-NEXT: mov za0h.q[w12, 0], p0/m, z0.q
; CHECK-NEXT: ret
- call void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i64 0, i32 0, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %zn)
+ call void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i32 0, i32 0, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %zn)
ret void
}
; CHECK-NEXT: mov w12, wzr
; CHECK-NEXT: mov za0h.q[w12, 0], p0/m, z0.q
; CHECK-NEXT: ret
- call void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i64 0, i32 0, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %zn)
+ call void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i32 0, i32 0, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %zn)
ret void
}
; CHECK-NEXT: mov w12, wzr
; CHECK-NEXT: mov za0h.q[w12, 0], p0/m, z0.q
; CHECK-NEXT: ret
- call void @llvm.aarch64.sme.writeq.horiz.nxv8f16(i64 0, i32 0, <vscale x 8 x i1> %pg, <vscale x 8 x half> %zn)
+ call void @llvm.aarch64.sme.writeq.horiz.nxv8f16(i32 0, i32 0, <vscale x 8 x i1> %pg, <vscale x 8 x half> %zn)
ret void
}
; CHECK-NEXT: mov w12, wzr
; CHECK-NEXT: mov za0h.q[w12, 0], p0/m, z0.q
; CHECK-NEXT: ret
- call void @llvm.aarch64.sme.writeq.horiz.nxv8bf16(i64 0, i32 0, <vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %zn)
+ call void @llvm.aarch64.sme.writeq.horiz.nxv8bf16(i32 0, i32 0, <vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %zn)
ret void
}
; CHECK-NEXT: mov w12, wzr
; CHECK-NEXT: mov za0h.q[w12, 0], p0/m, z0.q
; CHECK-NEXT: ret
- call void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i64 0, i32 0, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %zn)
+ call void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i32 0, i32 0, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %zn)
ret void
}
; CHECK-NEXT: mov w12, wzr
; CHECK-NEXT: mov za0h.q[w12, 0], p0/m, z0.q
; CHECK-NEXT: ret
- call void @llvm.aarch64.sme.writeq.horiz.nxv4f32(i64 0, i32 0, <vscale x 4 x i1> %pg, <vscale x 4 x float> %zn)
+ call void @llvm.aarch64.sme.writeq.horiz.nxv4f32(i32 0, i32 0, <vscale x 4 x i1> %pg, <vscale x 4 x float> %zn)
ret void
}
; CHECK-NEXT: mov w12, wzr
; CHECK-NEXT: mov za0h.q[w12, 0], p0/m, z0.q
; CHECK-NEXT: ret
- call void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i64 0, i32 0, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %zn)
+ call void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i32 0, i32 0, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %zn)
ret void
}
; CHECK-NEXT: mov w12, wzr
; CHECK-NEXT: mov za0h.q[w12, 0], p0/m, z0.q
; CHECK-NEXT: ret
- call void @llvm.aarch64.sme.writeq.horiz.nxv2f64(i64 0, i32 0, <vscale x 2 x i1> %pg, <vscale x 2 x double> %zn)
+ call void @llvm.aarch64.sme.writeq.horiz.nxv2f64(i32 0, i32 0, <vscale x 2 x i1> %pg, <vscale x 2 x double> %zn)
ret void
}
; CHECK-NEXT: mov w12, wzr
; CHECK-NEXT: mov za15v.q[w12, 0], p0/m, z0.q
; CHECK-NEXT: ret
- call void @llvm.aarch64.sme.writeq.vert.nxv16i8(i64 15, i32 0, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %zn)
+ call void @llvm.aarch64.sme.writeq.vert.nxv16i8(i32 15, i32 0, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %zn)
ret void
}
; CHECK-NEXT: mov w12, wzr
; CHECK-NEXT: mov za15v.q[w12, 0], p0/m, z0.q
; CHECK-NEXT: ret
- call void @llvm.aarch64.sme.writeq.vert.nxv8i16(i64 15, i32 0, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %zn)
+ call void @llvm.aarch64.sme.writeq.vert.nxv8i16(i32 15, i32 0, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %zn)
ret void
}
; CHECK-NEXT: mov w12, wzr
; CHECK-NEXT: mov za15v.q[w12, 0], p0/m, z0.q
; CHECK-NEXT: ret
- call void @llvm.aarch64.sme.writeq.vert.nxv8f16(i64 15, i32 0, <vscale x 8 x i1> %pg, <vscale x 8 x half> %zn)
+ call void @llvm.aarch64.sme.writeq.vert.nxv8f16(i32 15, i32 0, <vscale x 8 x i1> %pg, <vscale x 8 x half> %zn)
ret void
}
; CHECK-NEXT: mov w12, wzr
; CHECK-NEXT: mov za15v.q[w12, 0], p0/m, z0.q
; CHECK-NEXT: ret
- call void @llvm.aarch64.sme.writeq.vert.nxv8bf16(i64 15, i32 0, <vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %zn)
+ call void @llvm.aarch64.sme.writeq.vert.nxv8bf16(i32 15, i32 0, <vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %zn)
ret void
}
; CHECK-NEXT: mov w12, wzr
; CHECK-NEXT: mov za15v.q[w12, 0], p0/m, z0.q
; CHECK-NEXT: ret
- call void @llvm.aarch64.sme.writeq.vert.nxv4i32(i64 15, i32 0, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %zn)
+ call void @llvm.aarch64.sme.writeq.vert.nxv4i32(i32 15, i32 0, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %zn)
ret void
}
; CHECK-NEXT: mov w12, wzr
; CHECK-NEXT: mov za15v.q[w12, 0], p0/m, z0.q
; CHECK-NEXT: ret
- call void @llvm.aarch64.sme.writeq.vert.nxv4f32(i64 15, i32 0, <vscale x 4 x i1> %pg, <vscale x 4 x float> %zn)
+ call void @llvm.aarch64.sme.writeq.vert.nxv4f32(i32 15, i32 0, <vscale x 4 x i1> %pg, <vscale x 4 x float> %zn)
ret void
}
; CHECK-NEXT: mov w12, wzr
; CHECK-NEXT: mov za15v.q[w12, 0], p0/m, z0.q
; CHECK-NEXT: ret
- call void @llvm.aarch64.sme.writeq.vert.nxv2i64(i64 15, i32 0, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %zn)
+ call void @llvm.aarch64.sme.writeq.vert.nxv2i64(i32 15, i32 0, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %zn)
ret void
}
; CHECK-NEXT: mov w12, wzr
; CHECK-NEXT: mov za15v.q[w12, 0], p0/m, z0.q
; CHECK-NEXT: ret
- call void @llvm.aarch64.sme.writeq.vert.nxv2f64(i64 15, i32 0, <vscale x 2 x i1> %pg, <vscale x 2 x double> %zn)
+ call void @llvm.aarch64.sme.writeq.vert.nxv2f64(i32 15, i32 0, <vscale x 2 x i1> %pg, <vscale x 2 x double> %zn)
ret void
}
for.body:
%i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
- call void @llvm.aarch64.sme.write.horiz.nxv4i32(i64 0, i32 %base, <vscale x 4 x i1> %pg, <vscale x 4 x i32> zeroinitializer)
- call void @llvm.aarch64.sme.write.horiz.nxv4i32(i64 0, i32 %add1, <vscale x 4 x i1> %pg, <vscale x 4 x i32> zeroinitializer)
- call void @llvm.aarch64.sme.write.horiz.nxv4i32(i64 0, i32 %add2, <vscale x 4 x i1> %pg, <vscale x 4 x i32> zeroinitializer)
+ call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 0, i32 %base, <vscale x 4 x i1> %pg, <vscale x 4 x i32> zeroinitializer)
+ call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 0, i32 %add1, <vscale x 4 x i1> %pg, <vscale x 4 x i32> zeroinitializer)
+ call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 0, i32 %add2, <vscale x 4 x i1> %pg, <vscale x 4 x i32> zeroinitializer)
%inc = add nuw nsw i32 %i, 3
%exitcond.not = icmp eq i32 %inc, %N
br i1 %exitcond.not, label %exit, label %for.body
ret void
}
-declare void @llvm.aarch64.sme.write.horiz.nxv16i8(i64, i32, <vscale x 16 x i1>, <vscale x 16 x i8>)
-declare void @llvm.aarch64.sme.write.horiz.nxv8i16(i64, i32, <vscale x 8 x i1>, <vscale x 8 x i16>)
-declare void @llvm.aarch64.sme.write.horiz.nxv8f16(i64, i32, <vscale x 8 x i1>, <vscale x 8 x half>)
-declare void @llvm.aarch64.sme.write.horiz.nxv8bf16(i64, i32, <vscale x 8 x i1>, <vscale x 8 x bfloat>)
-declare void @llvm.aarch64.sme.write.horiz.nxv4i32(i64, i32, <vscale x 4 x i1>, <vscale x 4 x i32>)
-declare void @llvm.aarch64.sme.write.horiz.nxv4f32(i64, i32, <vscale x 4 x i1>, <vscale x 4 x float>)
-declare void @llvm.aarch64.sme.write.horiz.nxv2i64(i64, i32, <vscale x 2 x i1>, <vscale x 2 x i64>)
-declare void @llvm.aarch64.sme.write.horiz.nxv2f64(i64, i32, <vscale x 2 x i1>, <vscale x 2 x double>)
-declare void @llvm.aarch64.sme.write.vert.nxv16i8(i64, i32, <vscale x 16 x i1>, <vscale x 16 x i8>)
-declare void @llvm.aarch64.sme.write.vert.nxv8i16(i64, i32, <vscale x 8 x i1>, <vscale x 8 x i16>)
-declare void @llvm.aarch64.sme.write.vert.nxv8f16(i64, i32, <vscale x 8 x i1>, <vscale x 8 x half>)
-declare void @llvm.aarch64.sme.write.vert.nxv8bf16(i64, i32, <vscale x 8 x i1>, <vscale x 8 x bfloat>)
-declare void @llvm.aarch64.sme.write.vert.nxv4i32(i64, i32, <vscale x 4 x i1>, <vscale x 4 x i32>)
-declare void @llvm.aarch64.sme.write.vert.nxv4f32(i64, i32, <vscale x 4 x i1>, <vscale x 4 x float>)
-declare void @llvm.aarch64.sme.write.vert.nxv2i64(i64, i32, <vscale x 2 x i1>, <vscale x 2 x i64>)
-declare void @llvm.aarch64.sme.write.vert.nxv2f64(i64, i32, <vscale x 2 x i1>, <vscale x 2 x double>)
-
-declare void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i64, i32, <vscale x 16 x i1>, <vscale x 16 x i8>)
-declare void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i64, i32, <vscale x 8 x i1>, <vscale x 8 x i16>)
-declare void @llvm.aarch64.sme.writeq.horiz.nxv8f16(i64, i32, <vscale x 8 x i1>, <vscale x 8 x half>)
-declare void @llvm.aarch64.sme.writeq.horiz.nxv8bf16(i64, i32, <vscale x 8 x i1>, <vscale x 8 x bfloat>)
-declare void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i64, i32, <vscale x 4 x i1>, <vscale x 4 x i32>)
-declare void @llvm.aarch64.sme.writeq.horiz.nxv4f32(i64, i32, <vscale x 4 x i1>, <vscale x 4 x float>)
-declare void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i64, i32, <vscale x 2 x i1>, <vscale x 2 x i64>)
-declare void @llvm.aarch64.sme.writeq.horiz.nxv2f64(i64, i32, <vscale x 2 x i1>, <vscale x 2 x double>)
-declare void @llvm.aarch64.sme.writeq.vert.nxv16i8(i64, i32, <vscale x 16 x i1>, <vscale x 16 x i8>)
-declare void @llvm.aarch64.sme.writeq.vert.nxv8i16(i64, i32, <vscale x 8 x i1>, <vscale x 8 x i16>)
-declare void @llvm.aarch64.sme.writeq.vert.nxv8f16(i64, i32, <vscale x 8 x i1>, <vscale x 8 x half>)
-declare void @llvm.aarch64.sme.writeq.vert.nxv8bf16(i64, i32, <vscale x 8 x i1>, <vscale x 8 x bfloat>)
-declare void @llvm.aarch64.sme.writeq.vert.nxv4i32(i64, i32, <vscale x 4 x i1>, <vscale x 4 x i32>)
-declare void @llvm.aarch64.sme.writeq.vert.nxv4f32(i64, i32, <vscale x 4 x i1>, <vscale x 4 x float>)
-declare void @llvm.aarch64.sme.writeq.vert.nxv2i64(i64, i32, <vscale x 2 x i1>, <vscale x 2 x i64>)
-declare void @llvm.aarch64.sme.writeq.vert.nxv2f64(i64, i32, <vscale x 2 x i1>, <vscale x 2 x double>)
+declare void @llvm.aarch64.sme.write.horiz.nxv16i8(i32, i32, <vscale x 16 x i1>, <vscale x 16 x i8>)
+declare void @llvm.aarch64.sme.write.horiz.nxv8i16(i32, i32, <vscale x 8 x i1>, <vscale x 8 x i16>)
+declare void @llvm.aarch64.sme.write.horiz.nxv8f16(i32, i32, <vscale x 8 x i1>, <vscale x 8 x half>)
+declare void @llvm.aarch64.sme.write.horiz.nxv8bf16(i32, i32, <vscale x 8 x i1>, <vscale x 8 x bfloat>)
+declare void @llvm.aarch64.sme.write.horiz.nxv4i32(i32, i32, <vscale x 4 x i1>, <vscale x 4 x i32>)
+declare void @llvm.aarch64.sme.write.horiz.nxv4f32(i32, i32, <vscale x 4 x i1>, <vscale x 4 x float>)
+declare void @llvm.aarch64.sme.write.horiz.nxv2i64(i32, i32, <vscale x 2 x i1>, <vscale x 2 x i64>)
+declare void @llvm.aarch64.sme.write.horiz.nxv2f64(i32, i32, <vscale x 2 x i1>, <vscale x 2 x double>)
+declare void @llvm.aarch64.sme.write.vert.nxv16i8(i32, i32, <vscale x 16 x i1>, <vscale x 16 x i8>)
+declare void @llvm.aarch64.sme.write.vert.nxv8i16(i32, i32, <vscale x 8 x i1>, <vscale x 8 x i16>)
+declare void @llvm.aarch64.sme.write.vert.nxv8f16(i32, i32, <vscale x 8 x i1>, <vscale x 8 x half>)
+declare void @llvm.aarch64.sme.write.vert.nxv8bf16(i32, i32, <vscale x 8 x i1>, <vscale x 8 x bfloat>)
+declare void @llvm.aarch64.sme.write.vert.nxv4i32(i32, i32, <vscale x 4 x i1>, <vscale x 4 x i32>)
+declare void @llvm.aarch64.sme.write.vert.nxv4f32(i32, i32, <vscale x 4 x i1>, <vscale x 4 x float>)
+declare void @llvm.aarch64.sme.write.vert.nxv2i64(i32, i32, <vscale x 2 x i1>, <vscale x 2 x i64>)
+declare void @llvm.aarch64.sme.write.vert.nxv2f64(i32, i32, <vscale x 2 x i1>, <vscale x 2 x double>)
+
+declare void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i32, i32, <vscale x 16 x i1>, <vscale x 16 x i8>)
+declare void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i32, i32, <vscale x 8 x i1>, <vscale x 8 x i16>)
+declare void @llvm.aarch64.sme.writeq.horiz.nxv8f16(i32, i32, <vscale x 8 x i1>, <vscale x 8 x half>)
+declare void @llvm.aarch64.sme.writeq.horiz.nxv8bf16(i32, i32, <vscale x 8 x i1>, <vscale x 8 x bfloat>)
+declare void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i32, i32, <vscale x 4 x i1>, <vscale x 4 x i32>)
+declare void @llvm.aarch64.sme.writeq.horiz.nxv4f32(i32, i32, <vscale x 4 x i1>, <vscale x 4 x float>)
+declare void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i32, i32, <vscale x 2 x i1>, <vscale x 2 x i64>)
+declare void @llvm.aarch64.sme.writeq.horiz.nxv2f64(i32, i32, <vscale x 2 x i1>, <vscale x 2 x double>)
+declare void @llvm.aarch64.sme.writeq.vert.nxv16i8(i32, i32, <vscale x 16 x i1>, <vscale x 16 x i8>)
+declare void @llvm.aarch64.sme.writeq.vert.nxv8i16(i32, i32, <vscale x 8 x i1>, <vscale x 8 x i16>)
+declare void @llvm.aarch64.sme.writeq.vert.nxv8f16(i32, i32, <vscale x 8 x i1>, <vscale x 8 x half>)
+declare void @llvm.aarch64.sme.writeq.vert.nxv8bf16(i32, i32, <vscale x 8 x i1>, <vscale x 8 x bfloat>)
+declare void @llvm.aarch64.sme.writeq.vert.nxv4i32(i32, i32, <vscale x 4 x i1>, <vscale x 4 x i32>)
+declare void @llvm.aarch64.sme.writeq.vert.nxv4f32(i32, i32, <vscale x 4 x i1>, <vscale x 4 x float>)
+declare void @llvm.aarch64.sme.writeq.vert.nxv2i64(i32, i32, <vscale x 2 x i1>, <vscale x 2 x i64>)
+declare void @llvm.aarch64.sme.writeq.vert.nxv2f64(i32, i32, <vscale x 2 x i1>, <vscale x 2 x double>)
; CHECK-NEXT: st1b {za0v.b[w13, 0]}, p0, [x0]
; CHECK-NEXT: ret
%tileslice = add i32 %sliceidx, 15
- call void @llvm.aarch64.sme.st1b.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 0, i32 %tileslice)
- call void @llvm.aarch64.sme.st1b.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 0, i32 0)
+ call void @llvm.aarch64.sme.st1b.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i32 0, i32 %tileslice)
+ call void @llvm.aarch64.sme.st1b.vert(<vscale x 16 x i1> %pg, ptr %ptr, i32 0, i32 0)
ret void;
}
; CHECK-NEXT: ret
%base = getelementptr i8, ptr %ptr, i64 %index
%tileslice = add i32 %sliceidx, 15
- call void @llvm.aarch64.sme.st1b.horiz(<vscale x 16 x i1> %pg, ptr %base, i64 0, i32 0)
- call void @llvm.aarch64.sme.st1b.vert(<vscale x 16 x i1> %pg, ptr %base, i64 0, i32 %tileslice)
+ call void @llvm.aarch64.sme.st1b.horiz(<vscale x 16 x i1> %pg, ptr %base, i32 0, i32 0)
+ call void @llvm.aarch64.sme.st1b.vert(<vscale x 16 x i1> %pg, ptr %base, i32 0, i32 %tileslice)
ret void;
}
; CHECK-NEXT: st1h {za1v.h[w12, 7]}, p0, [x0]
; CHECK-NEXT: ret
%tileslice = add i32 %sliceidx, 7
- call void @llvm.aarch64.sme.st1h.horiz(<vscale x 8 x i1> %pg, ptr %ptr, i64 0, i32 %tileslice)
- call void @llvm.aarch64.sme.st1h.horiz(<vscale x 8 x i1> %pg, ptr %ptr, i64 1, i32 0)
- call void @llvm.aarch64.sme.st1h.vert(<vscale x 8 x i1> %pg, ptr %ptr, i64 0, i32 0)
- call void @llvm.aarch64.sme.st1h.vert(<vscale x 8 x i1> %pg, ptr %ptr, i64 1, i32 %tileslice)
+ call void @llvm.aarch64.sme.st1h.horiz(<vscale x 8 x i1> %pg, ptr %ptr, i32 0, i32 %tileslice)
+ call void @llvm.aarch64.sme.st1h.horiz(<vscale x 8 x i1> %pg, ptr %ptr, i32 1, i32 0)
+ call void @llvm.aarch64.sme.st1h.vert(<vscale x 8 x i1> %pg, ptr %ptr, i32 0, i32 0)
+ call void @llvm.aarch64.sme.st1h.vert(<vscale x 8 x i1> %pg, ptr %ptr, i32 1, i32 %tileslice)
ret void;
}
; CHECK-NEXT: ret
%base = getelementptr i16, ptr %ptr, i64 %index
%tileslice = add i32 %sliceidx, 7
- call void @llvm.aarch64.sme.st1h.horiz(<vscale x 8 x i1> %pg, ptr %base, i64 0, i32 %tileslice)
- call void @llvm.aarch64.sme.st1h.vert(<vscale x 8 x i1> %pg, ptr %base, i64 1, i32 0)
+ call void @llvm.aarch64.sme.st1h.horiz(<vscale x 8 x i1> %pg, ptr %base, i32 0, i32 %tileslice)
+ call void @llvm.aarch64.sme.st1h.vert(<vscale x 8 x i1> %pg, ptr %base, i32 1, i32 0)
ret void;
}
; CHECK-NEXT: st1w {za3v.s[w13, 0]}, p0, [x0]
; CHECK-NEXT: ret
%tileslice = add i32 %sliceidx, 3
- call void @llvm.aarch64.sme.st1w.horiz(<vscale x 4 x i1> %pg, ptr %ptr, i64 0, i32 0)
- call void @llvm.aarch64.sme.st1w.horiz(<vscale x 4 x i1> %pg, ptr %ptr, i64 1, i32 0)
- call void @llvm.aarch64.sme.st1w.horiz(<vscale x 4 x i1> %pg, ptr %ptr, i64 2, i32 0)
- call void @llvm.aarch64.sme.st1w.horiz(<vscale x 4 x i1> %pg, ptr %ptr, i64 3, i32 %tileslice)
- call void @llvm.aarch64.sme.st1w.vert(<vscale x 4 x i1> %pg, ptr %ptr, i64 0, i32 0)
- call void @llvm.aarch64.sme.st1w.vert(<vscale x 4 x i1> %pg, ptr %ptr, i64 1, i32 0)
- call void @llvm.aarch64.sme.st1w.vert(<vscale x 4 x i1> %pg, ptr %ptr, i64 2, i32 %tileslice)
- call void @llvm.aarch64.sme.st1w.vert(<vscale x 4 x i1> %pg, ptr %ptr, i64 3, i32 0)
+ call void @llvm.aarch64.sme.st1w.horiz(<vscale x 4 x i1> %pg, ptr %ptr, i32 0, i32 0)
+ call void @llvm.aarch64.sme.st1w.horiz(<vscale x 4 x i1> %pg, ptr %ptr, i32 1, i32 0)
+ call void @llvm.aarch64.sme.st1w.horiz(<vscale x 4 x i1> %pg, ptr %ptr, i32 2, i32 0)
+ call void @llvm.aarch64.sme.st1w.horiz(<vscale x 4 x i1> %pg, ptr %ptr, i32 3, i32 %tileslice)
+ call void @llvm.aarch64.sme.st1w.vert(<vscale x 4 x i1> %pg, ptr %ptr, i32 0, i32 0)
+ call void @llvm.aarch64.sme.st1w.vert(<vscale x 4 x i1> %pg, ptr %ptr, i32 1, i32 0)
+ call void @llvm.aarch64.sme.st1w.vert(<vscale x 4 x i1> %pg, ptr %ptr, i32 2, i32 %tileslice)
+ call void @llvm.aarch64.sme.st1w.vert(<vscale x 4 x i1> %pg, ptr %ptr, i32 3, i32 0)
ret void;
}
; CHECK-NEXT: ret
%base = getelementptr i32, ptr %ptr, i64 %index
%tileslice = add i32 %sliceidx, 3
- call void @llvm.aarch64.sme.st1w.horiz(<vscale x 4 x i1> %pg, ptr %base, i64 0, i32 0)
- call void @llvm.aarch64.sme.st1w.vert(<vscale x 4 x i1> %pg, ptr %base, i64 3, i32 %tileslice)
+ call void @llvm.aarch64.sme.st1w.horiz(<vscale x 4 x i1> %pg, ptr %base, i32 0, i32 0)
+ call void @llvm.aarch64.sme.st1w.vert(<vscale x 4 x i1> %pg, ptr %base, i32 3, i32 %tileslice)
ret void;
}
; CHECK-NEXT: st1d {za7v.d[w12, 1]}, p0, [x0]
; CHECK-NEXT: ret
%tileslice = add i32 %sliceidx, 1
- call void @llvm.aarch64.sme.st1d.horiz(<vscale x 2 x i1> %pg, ptr %ptr, i64 0, i32 0)
- call void @llvm.aarch64.sme.st1d.horiz(<vscale x 2 x i1> %pg, ptr %ptr, i64 1, i32 0)
- call void @llvm.aarch64.sme.st1d.horiz(<vscale x 2 x i1> %pg, ptr %ptr, i64 2, i32 0)
- call void @llvm.aarch64.sme.st1d.horiz(<vscale x 2 x i1> %pg, ptr %ptr, i64 3, i32 0)
- call void @llvm.aarch64.sme.st1d.horiz(<vscale x 2 x i1> %pg, ptr %ptr, i64 4, i32 %tileslice)
- call void @llvm.aarch64.sme.st1d.horiz(<vscale x 2 x i1> %pg, ptr %ptr, i64 5, i32 0)
- call void @llvm.aarch64.sme.st1d.horiz(<vscale x 2 x i1> %pg, ptr %ptr, i64 6, i32 0)
- call void @llvm.aarch64.sme.st1d.horiz(<vscale x 2 x i1> %pg, ptr %ptr, i64 7, i32 0)
- call void @llvm.aarch64.sme.st1d.vert(<vscale x 2 x i1> %pg, ptr %ptr, i64 0, i32 0)
- call void @llvm.aarch64.sme.st1d.vert(<vscale x 2 x i1> %pg, ptr %ptr, i64 1, i32 0)
- call void @llvm.aarch64.sme.st1d.vert(<vscale x 2 x i1> %pg, ptr %ptr, i64 2, i32 0)
- call void @llvm.aarch64.sme.st1d.vert(<vscale x 2 x i1> %pg, ptr %ptr, i64 3, i32 0)
- call void @llvm.aarch64.sme.st1d.vert(<vscale x 2 x i1> %pg, ptr %ptr, i64 4, i32 0)
- call void @llvm.aarch64.sme.st1d.vert(<vscale x 2 x i1> %pg, ptr %ptr, i64 5, i32 0)
- call void @llvm.aarch64.sme.st1d.vert(<vscale x 2 x i1> %pg, ptr %ptr, i64 6, i32 0)
- call void @llvm.aarch64.sme.st1d.vert(<vscale x 2 x i1> %pg, ptr %ptr, i64 7, i32 %tileslice)
+ call void @llvm.aarch64.sme.st1d.horiz(<vscale x 2 x i1> %pg, ptr %ptr, i32 0, i32 0)
+ call void @llvm.aarch64.sme.st1d.horiz(<vscale x 2 x i1> %pg, ptr %ptr, i32 1, i32 0)
+ call void @llvm.aarch64.sme.st1d.horiz(<vscale x 2 x i1> %pg, ptr %ptr, i32 2, i32 0)
+ call void @llvm.aarch64.sme.st1d.horiz(<vscale x 2 x i1> %pg, ptr %ptr, i32 3, i32 0)
+ call void @llvm.aarch64.sme.st1d.horiz(<vscale x 2 x i1> %pg, ptr %ptr, i32 4, i32 %tileslice)
+ call void @llvm.aarch64.sme.st1d.horiz(<vscale x 2 x i1> %pg, ptr %ptr, i32 5, i32 0)
+ call void @llvm.aarch64.sme.st1d.horiz(<vscale x 2 x i1> %pg, ptr %ptr, i32 6, i32 0)
+ call void @llvm.aarch64.sme.st1d.horiz(<vscale x 2 x i1> %pg, ptr %ptr, i32 7, i32 0)
+ call void @llvm.aarch64.sme.st1d.vert(<vscale x 2 x i1> %pg, ptr %ptr, i32 0, i32 0)
+ call void @llvm.aarch64.sme.st1d.vert(<vscale x 2 x i1> %pg, ptr %ptr, i32 1, i32 0)
+ call void @llvm.aarch64.sme.st1d.vert(<vscale x 2 x i1> %pg, ptr %ptr, i32 2, i32 0)
+ call void @llvm.aarch64.sme.st1d.vert(<vscale x 2 x i1> %pg, ptr %ptr, i32 3, i32 0)
+ call void @llvm.aarch64.sme.st1d.vert(<vscale x 2 x i1> %pg, ptr %ptr, i32 4, i32 0)
+ call void @llvm.aarch64.sme.st1d.vert(<vscale x 2 x i1> %pg, ptr %ptr, i32 5, i32 0)
+ call void @llvm.aarch64.sme.st1d.vert(<vscale x 2 x i1> %pg, ptr %ptr, i32 6, i32 0)
+ call void @llvm.aarch64.sme.st1d.vert(<vscale x 2 x i1> %pg, ptr %ptr, i32 7, i32 %tileslice)
ret void;
}
; CHECK-NEXT: ret
%base = getelementptr i64, ptr %ptr, i64 %index
%tileslice = add i32 %sliceidx, 1
- call void @llvm.aarch64.sme.st1d.horiz(<vscale x 2 x i1> %pg, ptr %base, i64 0, i32 %tileslice)
- call void @llvm.aarch64.sme.st1d.vert(<vscale x 2 x i1> %pg, ptr %base, i64 7, i32 0)
+ call void @llvm.aarch64.sme.st1d.horiz(<vscale x 2 x i1> %pg, ptr %base, i32 0, i32 %tileslice)
+ call void @llvm.aarch64.sme.st1d.vert(<vscale x 2 x i1> %pg, ptr %base, i32 7, i32 0)
ret void;
}
; CHECK-NEXT: st1q {za14v.q[w12, 0]}, p0, [x0]
; CHECK-NEXT: st1q {za15v.q[w12, 0]}, p0, [x0]
; CHECK-NEXT: ret
- call void @llvm.aarch64.sme.st1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i64 0, i32 0)
- call void @llvm.aarch64.sme.st1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i64 1, i32 0)
- call void @llvm.aarch64.sme.st1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i64 2, i32 0)
- call void @llvm.aarch64.sme.st1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i64 3, i32 0)
- call void @llvm.aarch64.sme.st1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i64 4, i32 0)
- call void @llvm.aarch64.sme.st1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i64 5, i32 0)
- call void @llvm.aarch64.sme.st1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i64 6, i32 0)
- call void @llvm.aarch64.sme.st1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i64 7, i32 0)
- call void @llvm.aarch64.sme.st1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i64 8, i32 0)
- call void @llvm.aarch64.sme.st1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i64 9, i32 0)
- call void @llvm.aarch64.sme.st1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i64 10, i32 0)
- call void @llvm.aarch64.sme.st1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i64 11, i32 0)
- call void @llvm.aarch64.sme.st1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i64 12, i32 0)
- call void @llvm.aarch64.sme.st1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i64 13, i32 0)
- call void @llvm.aarch64.sme.st1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i64 14, i32 0)
- call void @llvm.aarch64.sme.st1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i64 15, i32 0)
- call void @llvm.aarch64.sme.st1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i64 0, i32 0)
- call void @llvm.aarch64.sme.st1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i64 1, i32 0)
- call void @llvm.aarch64.sme.st1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i64 2, i32 0)
- call void @llvm.aarch64.sme.st1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i64 3, i32 0)
- call void @llvm.aarch64.sme.st1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i64 4, i32 0)
- call void @llvm.aarch64.sme.st1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i64 5, i32 0)
- call void @llvm.aarch64.sme.st1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i64 6, i32 0)
- call void @llvm.aarch64.sme.st1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i64 7, i32 0)
- call void @llvm.aarch64.sme.st1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i64 8, i32 0)
- call void @llvm.aarch64.sme.st1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i64 9, i32 0)
- call void @llvm.aarch64.sme.st1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i64 10, i32 0)
- call void @llvm.aarch64.sme.st1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i64 11, i32 0)
- call void @llvm.aarch64.sme.st1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i64 12, i32 0)
- call void @llvm.aarch64.sme.st1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i64 13, i32 0)
- call void @llvm.aarch64.sme.st1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i64 14, i32 0)
- call void @llvm.aarch64.sme.st1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i64 15, i32 0)
+ call void @llvm.aarch64.sme.st1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i32 0, i32 0)
+ call void @llvm.aarch64.sme.st1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i32 1, i32 0)
+ call void @llvm.aarch64.sme.st1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i32 2, i32 0)
+ call void @llvm.aarch64.sme.st1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i32 3, i32 0)
+ call void @llvm.aarch64.sme.st1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i32 4, i32 0)
+ call void @llvm.aarch64.sme.st1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i32 5, i32 0)
+ call void @llvm.aarch64.sme.st1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i32 6, i32 0)
+ call void @llvm.aarch64.sme.st1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i32 7, i32 0)
+ call void @llvm.aarch64.sme.st1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i32 8, i32 0)
+ call void @llvm.aarch64.sme.st1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i32 9, i32 0)
+ call void @llvm.aarch64.sme.st1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i32 10, i32 0)
+ call void @llvm.aarch64.sme.st1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i32 11, i32 0)
+ call void @llvm.aarch64.sme.st1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i32 12, i32 0)
+ call void @llvm.aarch64.sme.st1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i32 13, i32 0)
+ call void @llvm.aarch64.sme.st1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i32 14, i32 0)
+ call void @llvm.aarch64.sme.st1q.horiz(<vscale x 1 x i1> %pg, ptr %ptr, i32 15, i32 0)
+ call void @llvm.aarch64.sme.st1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i32 0, i32 0)
+ call void @llvm.aarch64.sme.st1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i32 1, i32 0)
+ call void @llvm.aarch64.sme.st1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i32 2, i32 0)
+ call void @llvm.aarch64.sme.st1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i32 3, i32 0)
+ call void @llvm.aarch64.sme.st1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i32 4, i32 0)
+ call void @llvm.aarch64.sme.st1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i32 5, i32 0)
+ call void @llvm.aarch64.sme.st1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i32 6, i32 0)
+ call void @llvm.aarch64.sme.st1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i32 7, i32 0)
+ call void @llvm.aarch64.sme.st1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i32 8, i32 0)
+ call void @llvm.aarch64.sme.st1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i32 9, i32 0)
+ call void @llvm.aarch64.sme.st1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i32 10, i32 0)
+ call void @llvm.aarch64.sme.st1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i32 11, i32 0)
+ call void @llvm.aarch64.sme.st1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i32 12, i32 0)
+ call void @llvm.aarch64.sme.st1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i32 13, i32 0)
+ call void @llvm.aarch64.sme.st1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i32 14, i32 0)
+ call void @llvm.aarch64.sme.st1q.vert(<vscale x 1 x i1> %pg, ptr %ptr, i32 15, i32 0)
ret void;
}
; CHECK-NEXT: st1q {za15v.q[w12, 0]}, p0, [x0, x1, lsl #4]
; CHECK-NEXT: ret
%base = getelementptr i128, ptr %ptr, i64 %index
- call void @llvm.aarch64.sme.st1q.horiz(<vscale x 1 x i1> %pg, ptr %base, i64 0, i32 0)
- call void @llvm.aarch64.sme.st1q.vert(<vscale x 1 x i1> %pg, ptr %base, i64 15, i32 0)
+ call void @llvm.aarch64.sme.st1q.horiz(<vscale x 1 x i1> %pg, ptr %base, i32 0, i32 0)
+ call void @llvm.aarch64.sme.st1q.vert(<vscale x 1 x i1> %pg, ptr %base, i32 15, i32 0)
ret void;
}
for.body:
%i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
- tail call void @llvm.aarch64.sme.st1w.horiz(<vscale x 4 x i1> %pg, ptr %src, i64 0, i32 %base)
- tail call void @llvm.aarch64.sme.st1w.horiz(<vscale x 4 x i1> %pg, ptr %src, i64 0, i32 %add0)
- tail call void @llvm.aarch64.sme.st1w.horiz(<vscale x 4 x i1> %pg, ptr %src, i64 0, i32 %add1)
+ tail call void @llvm.aarch64.sme.st1w.horiz(<vscale x 4 x i1> %pg, ptr %src, i32 0, i32 %base)
+ tail call void @llvm.aarch64.sme.st1w.horiz(<vscale x 4 x i1> %pg, ptr %src, i32 0, i32 %add0)
+ tail call void @llvm.aarch64.sme.st1w.horiz(<vscale x 4 x i1> %pg, ptr %src, i32 0, i32 %add1)
%inc = add nuw nsw i32 %i, 1
%exitcond.not = icmp eq i32 %inc, %N
br i1 %exitcond.not, label %exit, label %for.body
ret void
}
-declare void @llvm.aarch64.sme.st1b.horiz(<vscale x 16 x i1>, ptr, i64, i32)
-declare void @llvm.aarch64.sme.st1h.horiz(<vscale x 8 x i1>, ptr, i64, i32)
-declare void @llvm.aarch64.sme.st1w.horiz(<vscale x 4 x i1>, ptr, i64, i32)
-declare void @llvm.aarch64.sme.st1d.horiz(<vscale x 2 x i1>, ptr, i64, i32)
-declare void @llvm.aarch64.sme.st1q.horiz(<vscale x 1 x i1>, ptr, i64, i32)
-declare void @llvm.aarch64.sme.st1b.vert(<vscale x 16 x i1>, ptr, i64, i32)
-declare void @llvm.aarch64.sme.st1h.vert(<vscale x 8 x i1>, ptr, i64, i32)
-declare void @llvm.aarch64.sme.st1w.vert(<vscale x 4 x i1>, ptr, i64, i32)
-declare void @llvm.aarch64.sme.st1d.vert(<vscale x 2 x i1>, ptr, i64, i32)
-declare void @llvm.aarch64.sme.st1q.vert(<vscale x 1 x i1>, ptr, i64, i32)
+declare void @llvm.aarch64.sme.st1b.horiz(<vscale x 16 x i1>, ptr, i32, i32)
+declare void @llvm.aarch64.sme.st1h.horiz(<vscale x 8 x i1>, ptr, i32, i32)
+declare void @llvm.aarch64.sme.st1w.horiz(<vscale x 4 x i1>, ptr, i32, i32)
+declare void @llvm.aarch64.sme.st1d.horiz(<vscale x 2 x i1>, ptr, i32, i32)
+declare void @llvm.aarch64.sme.st1q.horiz(<vscale x 1 x i1>, ptr, i32, i32)
+declare void @llvm.aarch64.sme.st1b.vert(<vscale x 16 x i1>, ptr, i32, i32)
+declare void @llvm.aarch64.sme.st1h.vert(<vscale x 8 x i1>, ptr, i32, i32)
+declare void @llvm.aarch64.sme.st1w.vert(<vscale x 4 x i1>, ptr, i32, i32)
+declare void @llvm.aarch64.sme.st1d.vert(<vscale x 2 x i1>, ptr, i32, i32)
+declare void @llvm.aarch64.sme.st1q.vert(<vscale x 1 x i1>, ptr, i32, i32)
declare void @llvm.aarch64.sme.str(i32, ptr)
declare i64 @llvm.vscale.i64()
; CHECK-NEXT: zero {za1.d, za2.d, za3.d, za4.d, za5.d, za6.d, za7.d}
; CHECK-NEXT: zero {za}
; CHECK-NEXT: ret
- call void @llvm.aarch64.sme.zero(i64 0)
- call void @llvm.aarch64.sme.zero(i64 1)
- call void @llvm.aarch64.sme.zero(i64 2)
- call void @llvm.aarch64.sme.zero(i64 3)
- call void @llvm.aarch64.sme.zero(i64 4)
- call void @llvm.aarch64.sme.zero(i64 5)
- call void @llvm.aarch64.sme.zero(i64 6)
- call void @llvm.aarch64.sme.zero(i64 7)
- call void @llvm.aarch64.sme.zero(i64 8)
- call void @llvm.aarch64.sme.zero(i64 9)
- call void @llvm.aarch64.sme.zero(i64 10)
- call void @llvm.aarch64.sme.zero(i64 11)
- call void @llvm.aarch64.sme.zero(i64 12)
- call void @llvm.aarch64.sme.zero(i64 13)
- call void @llvm.aarch64.sme.zero(i64 14)
- call void @llvm.aarch64.sme.zero(i64 15)
- call void @llvm.aarch64.sme.zero(i64 16)
- call void @llvm.aarch64.sme.zero(i64 17)
- call void @llvm.aarch64.sme.zero(i64 18)
- call void @llvm.aarch64.sme.zero(i64 19)
- call void @llvm.aarch64.sme.zero(i64 20)
- call void @llvm.aarch64.sme.zero(i64 21)
- call void @llvm.aarch64.sme.zero(i64 22)
- call void @llvm.aarch64.sme.zero(i64 23)
- call void @llvm.aarch64.sme.zero(i64 24)
- call void @llvm.aarch64.sme.zero(i64 25)
- call void @llvm.aarch64.sme.zero(i64 26)
- call void @llvm.aarch64.sme.zero(i64 27)
- call void @llvm.aarch64.sme.zero(i64 28)
- call void @llvm.aarch64.sme.zero(i64 29)
- call void @llvm.aarch64.sme.zero(i64 30)
- call void @llvm.aarch64.sme.zero(i64 31)
- call void @llvm.aarch64.sme.zero(i64 32)
- call void @llvm.aarch64.sme.zero(i64 33)
- call void @llvm.aarch64.sme.zero(i64 34)
- call void @llvm.aarch64.sme.zero(i64 35)
- call void @llvm.aarch64.sme.zero(i64 36)
- call void @llvm.aarch64.sme.zero(i64 37)
- call void @llvm.aarch64.sme.zero(i64 38)
- call void @llvm.aarch64.sme.zero(i64 39)
- call void @llvm.aarch64.sme.zero(i64 40)
- call void @llvm.aarch64.sme.zero(i64 41)
- call void @llvm.aarch64.sme.zero(i64 42)
- call void @llvm.aarch64.sme.zero(i64 43)
- call void @llvm.aarch64.sme.zero(i64 44)
- call void @llvm.aarch64.sme.zero(i64 45)
- call void @llvm.aarch64.sme.zero(i64 46)
- call void @llvm.aarch64.sme.zero(i64 47)
- call void @llvm.aarch64.sme.zero(i64 48)
- call void @llvm.aarch64.sme.zero(i64 49)
- call void @llvm.aarch64.sme.zero(i64 50)
- call void @llvm.aarch64.sme.zero(i64 51)
- call void @llvm.aarch64.sme.zero(i64 52)
- call void @llvm.aarch64.sme.zero(i64 53)
- call void @llvm.aarch64.sme.zero(i64 54)
- call void @llvm.aarch64.sme.zero(i64 55)
- call void @llvm.aarch64.sme.zero(i64 56)
- call void @llvm.aarch64.sme.zero(i64 57)
- call void @llvm.aarch64.sme.zero(i64 58)
- call void @llvm.aarch64.sme.zero(i64 59)
- call void @llvm.aarch64.sme.zero(i64 60)
- call void @llvm.aarch64.sme.zero(i64 61)
- call void @llvm.aarch64.sme.zero(i64 62)
- call void @llvm.aarch64.sme.zero(i64 63)
- call void @llvm.aarch64.sme.zero(i64 64)
- call void @llvm.aarch64.sme.zero(i64 65)
- call void @llvm.aarch64.sme.zero(i64 66)
- call void @llvm.aarch64.sme.zero(i64 67)
- call void @llvm.aarch64.sme.zero(i64 68)
- call void @llvm.aarch64.sme.zero(i64 69)
- call void @llvm.aarch64.sme.zero(i64 70)
- call void @llvm.aarch64.sme.zero(i64 71)
- call void @llvm.aarch64.sme.zero(i64 72)
- call void @llvm.aarch64.sme.zero(i64 73)
- call void @llvm.aarch64.sme.zero(i64 74)
- call void @llvm.aarch64.sme.zero(i64 75)
- call void @llvm.aarch64.sme.zero(i64 76)
- call void @llvm.aarch64.sme.zero(i64 77)
- call void @llvm.aarch64.sme.zero(i64 78)
- call void @llvm.aarch64.sme.zero(i64 79)
- call void @llvm.aarch64.sme.zero(i64 80)
- call void @llvm.aarch64.sme.zero(i64 81)
- call void @llvm.aarch64.sme.zero(i64 82)
- call void @llvm.aarch64.sme.zero(i64 83)
- call void @llvm.aarch64.sme.zero(i64 84)
- call void @llvm.aarch64.sme.zero(i64 85)
- call void @llvm.aarch64.sme.zero(i64 86)
- call void @llvm.aarch64.sme.zero(i64 87)
- call void @llvm.aarch64.sme.zero(i64 88)
- call void @llvm.aarch64.sme.zero(i64 89)
- call void @llvm.aarch64.sme.zero(i64 90)
- call void @llvm.aarch64.sme.zero(i64 91)
- call void @llvm.aarch64.sme.zero(i64 92)
- call void @llvm.aarch64.sme.zero(i64 93)
- call void @llvm.aarch64.sme.zero(i64 94)
- call void @llvm.aarch64.sme.zero(i64 95)
- call void @llvm.aarch64.sme.zero(i64 96)
- call void @llvm.aarch64.sme.zero(i64 97)
- call void @llvm.aarch64.sme.zero(i64 98)
- call void @llvm.aarch64.sme.zero(i64 99)
- call void @llvm.aarch64.sme.zero(i64 100)
- call void @llvm.aarch64.sme.zero(i64 101)
- call void @llvm.aarch64.sme.zero(i64 102)
- call void @llvm.aarch64.sme.zero(i64 103)
- call void @llvm.aarch64.sme.zero(i64 104)
- call void @llvm.aarch64.sme.zero(i64 105)
- call void @llvm.aarch64.sme.zero(i64 106)
- call void @llvm.aarch64.sme.zero(i64 107)
- call void @llvm.aarch64.sme.zero(i64 108)
- call void @llvm.aarch64.sme.zero(i64 109)
- call void @llvm.aarch64.sme.zero(i64 110)
- call void @llvm.aarch64.sme.zero(i64 111)
- call void @llvm.aarch64.sme.zero(i64 112)
- call void @llvm.aarch64.sme.zero(i64 113)
- call void @llvm.aarch64.sme.zero(i64 114)
- call void @llvm.aarch64.sme.zero(i64 115)
- call void @llvm.aarch64.sme.zero(i64 116)
- call void @llvm.aarch64.sme.zero(i64 117)
- call void @llvm.aarch64.sme.zero(i64 118)
- call void @llvm.aarch64.sme.zero(i64 119)
- call void @llvm.aarch64.sme.zero(i64 120)
- call void @llvm.aarch64.sme.zero(i64 121)
- call void @llvm.aarch64.sme.zero(i64 122)
- call void @llvm.aarch64.sme.zero(i64 123)
- call void @llvm.aarch64.sme.zero(i64 124)
- call void @llvm.aarch64.sme.zero(i64 125)
- call void @llvm.aarch64.sme.zero(i64 126)
- call void @llvm.aarch64.sme.zero(i64 127)
- call void @llvm.aarch64.sme.zero(i64 128)
- call void @llvm.aarch64.sme.zero(i64 129)
- call void @llvm.aarch64.sme.zero(i64 130)
- call void @llvm.aarch64.sme.zero(i64 131)
- call void @llvm.aarch64.sme.zero(i64 132)
- call void @llvm.aarch64.sme.zero(i64 133)
- call void @llvm.aarch64.sme.zero(i64 134)
- call void @llvm.aarch64.sme.zero(i64 135)
- call void @llvm.aarch64.sme.zero(i64 136)
- call void @llvm.aarch64.sme.zero(i64 137)
- call void @llvm.aarch64.sme.zero(i64 138)
- call void @llvm.aarch64.sme.zero(i64 139)
- call void @llvm.aarch64.sme.zero(i64 140)
- call void @llvm.aarch64.sme.zero(i64 141)
- call void @llvm.aarch64.sme.zero(i64 142)
- call void @llvm.aarch64.sme.zero(i64 143)
- call void @llvm.aarch64.sme.zero(i64 144)
- call void @llvm.aarch64.sme.zero(i64 145)
- call void @llvm.aarch64.sme.zero(i64 146)
- call void @llvm.aarch64.sme.zero(i64 147)
- call void @llvm.aarch64.sme.zero(i64 148)
- call void @llvm.aarch64.sme.zero(i64 149)
- call void @llvm.aarch64.sme.zero(i64 150)
- call void @llvm.aarch64.sme.zero(i64 151)
- call void @llvm.aarch64.sme.zero(i64 152)
- call void @llvm.aarch64.sme.zero(i64 153)
- call void @llvm.aarch64.sme.zero(i64 154)
- call void @llvm.aarch64.sme.zero(i64 155)
- call void @llvm.aarch64.sme.zero(i64 156)
- call void @llvm.aarch64.sme.zero(i64 157)
- call void @llvm.aarch64.sme.zero(i64 158)
- call void @llvm.aarch64.sme.zero(i64 159)
- call void @llvm.aarch64.sme.zero(i64 160)
- call void @llvm.aarch64.sme.zero(i64 161)
- call void @llvm.aarch64.sme.zero(i64 162)
- call void @llvm.aarch64.sme.zero(i64 163)
- call void @llvm.aarch64.sme.zero(i64 164)
- call void @llvm.aarch64.sme.zero(i64 165)
- call void @llvm.aarch64.sme.zero(i64 166)
- call void @llvm.aarch64.sme.zero(i64 167)
- call void @llvm.aarch64.sme.zero(i64 168)
- call void @llvm.aarch64.sme.zero(i64 169)
- call void @llvm.aarch64.sme.zero(i64 170)
- call void @llvm.aarch64.sme.zero(i64 171)
- call void @llvm.aarch64.sme.zero(i64 172)
- call void @llvm.aarch64.sme.zero(i64 173)
- call void @llvm.aarch64.sme.zero(i64 174)
- call void @llvm.aarch64.sme.zero(i64 175)
- call void @llvm.aarch64.sme.zero(i64 176)
- call void @llvm.aarch64.sme.zero(i64 177)
- call void @llvm.aarch64.sme.zero(i64 178)
- call void @llvm.aarch64.sme.zero(i64 179)
- call void @llvm.aarch64.sme.zero(i64 180)
- call void @llvm.aarch64.sme.zero(i64 181)
- call void @llvm.aarch64.sme.zero(i64 182)
- call void @llvm.aarch64.sme.zero(i64 183)
- call void @llvm.aarch64.sme.zero(i64 184)
- call void @llvm.aarch64.sme.zero(i64 185)
- call void @llvm.aarch64.sme.zero(i64 186)
- call void @llvm.aarch64.sme.zero(i64 187)
- call void @llvm.aarch64.sme.zero(i64 188)
- call void @llvm.aarch64.sme.zero(i64 189)
- call void @llvm.aarch64.sme.zero(i64 190)
- call void @llvm.aarch64.sme.zero(i64 191)
- call void @llvm.aarch64.sme.zero(i64 192)
- call void @llvm.aarch64.sme.zero(i64 193)
- call void @llvm.aarch64.sme.zero(i64 194)
- call void @llvm.aarch64.sme.zero(i64 195)
- call void @llvm.aarch64.sme.zero(i64 196)
- call void @llvm.aarch64.sme.zero(i64 197)
- call void @llvm.aarch64.sme.zero(i64 198)
- call void @llvm.aarch64.sme.zero(i64 199)
- call void @llvm.aarch64.sme.zero(i64 200)
- call void @llvm.aarch64.sme.zero(i64 201)
- call void @llvm.aarch64.sme.zero(i64 202)
- call void @llvm.aarch64.sme.zero(i64 203)
- call void @llvm.aarch64.sme.zero(i64 204)
- call void @llvm.aarch64.sme.zero(i64 205)
- call void @llvm.aarch64.sme.zero(i64 206)
- call void @llvm.aarch64.sme.zero(i64 207)
- call void @llvm.aarch64.sme.zero(i64 208)
- call void @llvm.aarch64.sme.zero(i64 209)
- call void @llvm.aarch64.sme.zero(i64 210)
- call void @llvm.aarch64.sme.zero(i64 211)
- call void @llvm.aarch64.sme.zero(i64 212)
- call void @llvm.aarch64.sme.zero(i64 213)
- call void @llvm.aarch64.sme.zero(i64 214)
- call void @llvm.aarch64.sme.zero(i64 215)
- call void @llvm.aarch64.sme.zero(i64 216)
- call void @llvm.aarch64.sme.zero(i64 217)
- call void @llvm.aarch64.sme.zero(i64 218)
- call void @llvm.aarch64.sme.zero(i64 219)
- call void @llvm.aarch64.sme.zero(i64 220)
- call void @llvm.aarch64.sme.zero(i64 221)
- call void @llvm.aarch64.sme.zero(i64 222)
- call void @llvm.aarch64.sme.zero(i64 223)
- call void @llvm.aarch64.sme.zero(i64 224)
- call void @llvm.aarch64.sme.zero(i64 225)
- call void @llvm.aarch64.sme.zero(i64 226)
- call void @llvm.aarch64.sme.zero(i64 227)
- call void @llvm.aarch64.sme.zero(i64 228)
- call void @llvm.aarch64.sme.zero(i64 229)
- call void @llvm.aarch64.sme.zero(i64 230)
- call void @llvm.aarch64.sme.zero(i64 231)
- call void @llvm.aarch64.sme.zero(i64 232)
- call void @llvm.aarch64.sme.zero(i64 233)
- call void @llvm.aarch64.sme.zero(i64 234)
- call void @llvm.aarch64.sme.zero(i64 235)
- call void @llvm.aarch64.sme.zero(i64 236)
- call void @llvm.aarch64.sme.zero(i64 237)
- call void @llvm.aarch64.sme.zero(i64 238)
- call void @llvm.aarch64.sme.zero(i64 239)
- call void @llvm.aarch64.sme.zero(i64 240)
- call void @llvm.aarch64.sme.zero(i64 241)
- call void @llvm.aarch64.sme.zero(i64 242)
- call void @llvm.aarch64.sme.zero(i64 243)
- call void @llvm.aarch64.sme.zero(i64 244)
- call void @llvm.aarch64.sme.zero(i64 245)
- call void @llvm.aarch64.sme.zero(i64 246)
- call void @llvm.aarch64.sme.zero(i64 247)
- call void @llvm.aarch64.sme.zero(i64 248)
- call void @llvm.aarch64.sme.zero(i64 249)
- call void @llvm.aarch64.sme.zero(i64 250)
- call void @llvm.aarch64.sme.zero(i64 251)
- call void @llvm.aarch64.sme.zero(i64 252)
- call void @llvm.aarch64.sme.zero(i64 253)
- call void @llvm.aarch64.sme.zero(i64 254)
- call void @llvm.aarch64.sme.zero(i64 255)
+ call void @llvm.aarch64.sme.zero(i32 0)
+ call void @llvm.aarch64.sme.zero(i32 1)
+ call void @llvm.aarch64.sme.zero(i32 2)
+ call void @llvm.aarch64.sme.zero(i32 3)
+ call void @llvm.aarch64.sme.zero(i32 4)
+ call void @llvm.aarch64.sme.zero(i32 5)
+ call void @llvm.aarch64.sme.zero(i32 6)
+ call void @llvm.aarch64.sme.zero(i32 7)
+ call void @llvm.aarch64.sme.zero(i32 8)
+ call void @llvm.aarch64.sme.zero(i32 9)
+ call void @llvm.aarch64.sme.zero(i32 10)
+ call void @llvm.aarch64.sme.zero(i32 11)
+ call void @llvm.aarch64.sme.zero(i32 12)
+ call void @llvm.aarch64.sme.zero(i32 13)
+ call void @llvm.aarch64.sme.zero(i32 14)
+ call void @llvm.aarch64.sme.zero(i32 15)
+ call void @llvm.aarch64.sme.zero(i32 16)
+ call void @llvm.aarch64.sme.zero(i32 17)
+ call void @llvm.aarch64.sme.zero(i32 18)
+ call void @llvm.aarch64.sme.zero(i32 19)
+ call void @llvm.aarch64.sme.zero(i32 20)
+ call void @llvm.aarch64.sme.zero(i32 21)
+ call void @llvm.aarch64.sme.zero(i32 22)
+ call void @llvm.aarch64.sme.zero(i32 23)
+ call void @llvm.aarch64.sme.zero(i32 24)
+ call void @llvm.aarch64.sme.zero(i32 25)
+ call void @llvm.aarch64.sme.zero(i32 26)
+ call void @llvm.aarch64.sme.zero(i32 27)
+ call void @llvm.aarch64.sme.zero(i32 28)
+ call void @llvm.aarch64.sme.zero(i32 29)
+ call void @llvm.aarch64.sme.zero(i32 30)
+ call void @llvm.aarch64.sme.zero(i32 31)
+ call void @llvm.aarch64.sme.zero(i32 32)
+ call void @llvm.aarch64.sme.zero(i32 33)
+ call void @llvm.aarch64.sme.zero(i32 34)
+ call void @llvm.aarch64.sme.zero(i32 35)
+ call void @llvm.aarch64.sme.zero(i32 36)
+ call void @llvm.aarch64.sme.zero(i32 37)
+ call void @llvm.aarch64.sme.zero(i32 38)
+ call void @llvm.aarch64.sme.zero(i32 39)
+ call void @llvm.aarch64.sme.zero(i32 40)
+ call void @llvm.aarch64.sme.zero(i32 41)
+ call void @llvm.aarch64.sme.zero(i32 42)
+ call void @llvm.aarch64.sme.zero(i32 43)
+ call void @llvm.aarch64.sme.zero(i32 44)
+ call void @llvm.aarch64.sme.zero(i32 45)
+ call void @llvm.aarch64.sme.zero(i32 46)
+ call void @llvm.aarch64.sme.zero(i32 47)
+ call void @llvm.aarch64.sme.zero(i32 48)
+ call void @llvm.aarch64.sme.zero(i32 49)
+ call void @llvm.aarch64.sme.zero(i32 50)
+ call void @llvm.aarch64.sme.zero(i32 51)
+ call void @llvm.aarch64.sme.zero(i32 52)
+ call void @llvm.aarch64.sme.zero(i32 53)
+ call void @llvm.aarch64.sme.zero(i32 54)
+ call void @llvm.aarch64.sme.zero(i32 55)
+ call void @llvm.aarch64.sme.zero(i32 56)
+ call void @llvm.aarch64.sme.zero(i32 57)
+ call void @llvm.aarch64.sme.zero(i32 58)
+ call void @llvm.aarch64.sme.zero(i32 59)
+ call void @llvm.aarch64.sme.zero(i32 60)
+ call void @llvm.aarch64.sme.zero(i32 61)
+ call void @llvm.aarch64.sme.zero(i32 62)
+ call void @llvm.aarch64.sme.zero(i32 63)
+ call void @llvm.aarch64.sme.zero(i32 64)
+ call void @llvm.aarch64.sme.zero(i32 65)
+ call void @llvm.aarch64.sme.zero(i32 66)
+ call void @llvm.aarch64.sme.zero(i32 67)
+ call void @llvm.aarch64.sme.zero(i32 68)
+ call void @llvm.aarch64.sme.zero(i32 69)
+ call void @llvm.aarch64.sme.zero(i32 70)
+ call void @llvm.aarch64.sme.zero(i32 71)
+ call void @llvm.aarch64.sme.zero(i32 72)
+ call void @llvm.aarch64.sme.zero(i32 73)
+ call void @llvm.aarch64.sme.zero(i32 74)
+ call void @llvm.aarch64.sme.zero(i32 75)
+ call void @llvm.aarch64.sme.zero(i32 76)
+ call void @llvm.aarch64.sme.zero(i32 77)
+ call void @llvm.aarch64.sme.zero(i32 78)
+ call void @llvm.aarch64.sme.zero(i32 79)
+ call void @llvm.aarch64.sme.zero(i32 80)
+ call void @llvm.aarch64.sme.zero(i32 81)
+ call void @llvm.aarch64.sme.zero(i32 82)
+ call void @llvm.aarch64.sme.zero(i32 83)
+ call void @llvm.aarch64.sme.zero(i32 84)
+ call void @llvm.aarch64.sme.zero(i32 85)
+ call void @llvm.aarch64.sme.zero(i32 86)
+ call void @llvm.aarch64.sme.zero(i32 87)
+ call void @llvm.aarch64.sme.zero(i32 88)
+ call void @llvm.aarch64.sme.zero(i32 89)
+ call void @llvm.aarch64.sme.zero(i32 90)
+ call void @llvm.aarch64.sme.zero(i32 91)
+ call void @llvm.aarch64.sme.zero(i32 92)
+ call void @llvm.aarch64.sme.zero(i32 93)
+ call void @llvm.aarch64.sme.zero(i32 94)
+ call void @llvm.aarch64.sme.zero(i32 95)
+ call void @llvm.aarch64.sme.zero(i32 96)
+ call void @llvm.aarch64.sme.zero(i32 97)
+ call void @llvm.aarch64.sme.zero(i32 98)
+ call void @llvm.aarch64.sme.zero(i32 99)
+ call void @llvm.aarch64.sme.zero(i32 100)
+ call void @llvm.aarch64.sme.zero(i32 101)
+ call void @llvm.aarch64.sme.zero(i32 102)
+ call void @llvm.aarch64.sme.zero(i32 103)
+ call void @llvm.aarch64.sme.zero(i32 104)
+ call void @llvm.aarch64.sme.zero(i32 105)
+ call void @llvm.aarch64.sme.zero(i32 106)
+ call void @llvm.aarch64.sme.zero(i32 107)
+ call void @llvm.aarch64.sme.zero(i32 108)
+ call void @llvm.aarch64.sme.zero(i32 109)
+ call void @llvm.aarch64.sme.zero(i32 110)
+ call void @llvm.aarch64.sme.zero(i32 111)
+ call void @llvm.aarch64.sme.zero(i32 112)
+ call void @llvm.aarch64.sme.zero(i32 113)
+ call void @llvm.aarch64.sme.zero(i32 114)
+ call void @llvm.aarch64.sme.zero(i32 115)
+ call void @llvm.aarch64.sme.zero(i32 116)
+ call void @llvm.aarch64.sme.zero(i32 117)
+ call void @llvm.aarch64.sme.zero(i32 118)
+ call void @llvm.aarch64.sme.zero(i32 119)
+ call void @llvm.aarch64.sme.zero(i32 120)
+ call void @llvm.aarch64.sme.zero(i32 121)
+ call void @llvm.aarch64.sme.zero(i32 122)
+ call void @llvm.aarch64.sme.zero(i32 123)
+ call void @llvm.aarch64.sme.zero(i32 124)
+ call void @llvm.aarch64.sme.zero(i32 125)
+ call void @llvm.aarch64.sme.zero(i32 126)
+ call void @llvm.aarch64.sme.zero(i32 127)
+ call void @llvm.aarch64.sme.zero(i32 128)
+ call void @llvm.aarch64.sme.zero(i32 129)
+ call void @llvm.aarch64.sme.zero(i32 130)
+ call void @llvm.aarch64.sme.zero(i32 131)
+ call void @llvm.aarch64.sme.zero(i32 132)
+ call void @llvm.aarch64.sme.zero(i32 133)
+ call void @llvm.aarch64.sme.zero(i32 134)
+ call void @llvm.aarch64.sme.zero(i32 135)
+ call void @llvm.aarch64.sme.zero(i32 136)
+ call void @llvm.aarch64.sme.zero(i32 137)
+ call void @llvm.aarch64.sme.zero(i32 138)
+ call void @llvm.aarch64.sme.zero(i32 139)
+ call void @llvm.aarch64.sme.zero(i32 140)
+ call void @llvm.aarch64.sme.zero(i32 141)
+ call void @llvm.aarch64.sme.zero(i32 142)
+ call void @llvm.aarch64.sme.zero(i32 143)
+ call void @llvm.aarch64.sme.zero(i32 144)
+ call void @llvm.aarch64.sme.zero(i32 145)
+ call void @llvm.aarch64.sme.zero(i32 146)
+ call void @llvm.aarch64.sme.zero(i32 147)
+ call void @llvm.aarch64.sme.zero(i32 148)
+ call void @llvm.aarch64.sme.zero(i32 149)
+ call void @llvm.aarch64.sme.zero(i32 150)
+ call void @llvm.aarch64.sme.zero(i32 151)
+ call void @llvm.aarch64.sme.zero(i32 152)
+ call void @llvm.aarch64.sme.zero(i32 153)
+ call void @llvm.aarch64.sme.zero(i32 154)
+ call void @llvm.aarch64.sme.zero(i32 155)
+ call void @llvm.aarch64.sme.zero(i32 156)
+ call void @llvm.aarch64.sme.zero(i32 157)
+ call void @llvm.aarch64.sme.zero(i32 158)
+ call void @llvm.aarch64.sme.zero(i32 159)
+ call void @llvm.aarch64.sme.zero(i32 160)
+ call void @llvm.aarch64.sme.zero(i32 161)
+ call void @llvm.aarch64.sme.zero(i32 162)
+ call void @llvm.aarch64.sme.zero(i32 163)
+ call void @llvm.aarch64.sme.zero(i32 164)
+ call void @llvm.aarch64.sme.zero(i32 165)
+ call void @llvm.aarch64.sme.zero(i32 166)
+ call void @llvm.aarch64.sme.zero(i32 167)
+ call void @llvm.aarch64.sme.zero(i32 168)
+ call void @llvm.aarch64.sme.zero(i32 169)
+ call void @llvm.aarch64.sme.zero(i32 170)
+ call void @llvm.aarch64.sme.zero(i32 171)
+ call void @llvm.aarch64.sme.zero(i32 172)
+ call void @llvm.aarch64.sme.zero(i32 173)
+ call void @llvm.aarch64.sme.zero(i32 174)
+ call void @llvm.aarch64.sme.zero(i32 175)
+ call void @llvm.aarch64.sme.zero(i32 176)
+ call void @llvm.aarch64.sme.zero(i32 177)
+ call void @llvm.aarch64.sme.zero(i32 178)
+ call void @llvm.aarch64.sme.zero(i32 179)
+ call void @llvm.aarch64.sme.zero(i32 180)
+ call void @llvm.aarch64.sme.zero(i32 181)
+ call void @llvm.aarch64.sme.zero(i32 182)
+ call void @llvm.aarch64.sme.zero(i32 183)
+ call void @llvm.aarch64.sme.zero(i32 184)
+ call void @llvm.aarch64.sme.zero(i32 185)
+ call void @llvm.aarch64.sme.zero(i32 186)
+ call void @llvm.aarch64.sme.zero(i32 187)
+ call void @llvm.aarch64.sme.zero(i32 188)
+ call void @llvm.aarch64.sme.zero(i32 189)
+ call void @llvm.aarch64.sme.zero(i32 190)
+ call void @llvm.aarch64.sme.zero(i32 191)
+ call void @llvm.aarch64.sme.zero(i32 192)
+ call void @llvm.aarch64.sme.zero(i32 193)
+ call void @llvm.aarch64.sme.zero(i32 194)
+ call void @llvm.aarch64.sme.zero(i32 195)
+ call void @llvm.aarch64.sme.zero(i32 196)
+ call void @llvm.aarch64.sme.zero(i32 197)
+ call void @llvm.aarch64.sme.zero(i32 198)
+ call void @llvm.aarch64.sme.zero(i32 199)
+ call void @llvm.aarch64.sme.zero(i32 200)
+ call void @llvm.aarch64.sme.zero(i32 201)
+ call void @llvm.aarch64.sme.zero(i32 202)
+ call void @llvm.aarch64.sme.zero(i32 203)
+ call void @llvm.aarch64.sme.zero(i32 204)
+ call void @llvm.aarch64.sme.zero(i32 205)
+ call void @llvm.aarch64.sme.zero(i32 206)
+ call void @llvm.aarch64.sme.zero(i32 207)
+ call void @llvm.aarch64.sme.zero(i32 208)
+ call void @llvm.aarch64.sme.zero(i32 209)
+ call void @llvm.aarch64.sme.zero(i32 210)
+ call void @llvm.aarch64.sme.zero(i32 211)
+ call void @llvm.aarch64.sme.zero(i32 212)
+ call void @llvm.aarch64.sme.zero(i32 213)
+ call void @llvm.aarch64.sme.zero(i32 214)
+ call void @llvm.aarch64.sme.zero(i32 215)
+ call void @llvm.aarch64.sme.zero(i32 216)
+ call void @llvm.aarch64.sme.zero(i32 217)
+ call void @llvm.aarch64.sme.zero(i32 218)
+ call void @llvm.aarch64.sme.zero(i32 219)
+ call void @llvm.aarch64.sme.zero(i32 220)
+ call void @llvm.aarch64.sme.zero(i32 221)
+ call void @llvm.aarch64.sme.zero(i32 222)
+ call void @llvm.aarch64.sme.zero(i32 223)
+ call void @llvm.aarch64.sme.zero(i32 224)
+ call void @llvm.aarch64.sme.zero(i32 225)
+ call void @llvm.aarch64.sme.zero(i32 226)
+ call void @llvm.aarch64.sme.zero(i32 227)
+ call void @llvm.aarch64.sme.zero(i32 228)
+ call void @llvm.aarch64.sme.zero(i32 229)
+ call void @llvm.aarch64.sme.zero(i32 230)
+ call void @llvm.aarch64.sme.zero(i32 231)
+ call void @llvm.aarch64.sme.zero(i32 232)
+ call void @llvm.aarch64.sme.zero(i32 233)
+ call void @llvm.aarch64.sme.zero(i32 234)
+ call void @llvm.aarch64.sme.zero(i32 235)
+ call void @llvm.aarch64.sme.zero(i32 236)
+ call void @llvm.aarch64.sme.zero(i32 237)
+ call void @llvm.aarch64.sme.zero(i32 238)
+ call void @llvm.aarch64.sme.zero(i32 239)
+ call void @llvm.aarch64.sme.zero(i32 240)
+ call void @llvm.aarch64.sme.zero(i32 241)
+ call void @llvm.aarch64.sme.zero(i32 242)
+ call void @llvm.aarch64.sme.zero(i32 243)
+ call void @llvm.aarch64.sme.zero(i32 244)
+ call void @llvm.aarch64.sme.zero(i32 245)
+ call void @llvm.aarch64.sme.zero(i32 246)
+ call void @llvm.aarch64.sme.zero(i32 247)
+ call void @llvm.aarch64.sme.zero(i32 248)
+ call void @llvm.aarch64.sme.zero(i32 249)
+ call void @llvm.aarch64.sme.zero(i32 250)
+ call void @llvm.aarch64.sme.zero(i32 251)
+ call void @llvm.aarch64.sme.zero(i32 252)
+ call void @llvm.aarch64.sme.zero(i32 253)
+ call void @llvm.aarch64.sme.zero(i32 254)
+ call void @llvm.aarch64.sme.zero(i32 255)
ret void
}
-declare void @llvm.aarch64.sme.zero(i64)
+declare void @llvm.aarch64.sme.zero(i32)