// any moves that we can prove are unnecessary.
multiclass subvec_zero_lowering<string MoveStr,
RegisterClass RC, ValueType DstTy,
- ValueType SrcTy, ValueType ZeroTy,
- SubRegIndex SubIdx> {
+ ValueType SrcTy, SubRegIndex SubIdx> {
def : Pat<(DstTy (insert_subvector immAllZerosV,
(SrcTy RC:$src), (iPTR 0))),
(SUBREG_TO_REG (i64 0),
}
let Predicates = [HasAVX, NoVLX] in {
- defm : subvec_zero_lowering<"APD", VR128, v4f64, v2f64, v8i32, sub_xmm>;
- defm : subvec_zero_lowering<"APS", VR128, v8f32, v4f32, v8i32, sub_xmm>;
- defm : subvec_zero_lowering<"DQA", VR128, v4i64, v2i64, v8i32, sub_xmm>;
- defm : subvec_zero_lowering<"DQA", VR128, v8i32, v4i32, v8i32, sub_xmm>;
- defm : subvec_zero_lowering<"DQA", VR128, v16i16, v8i16, v8i32, sub_xmm>;
- defm : subvec_zero_lowering<"DQA", VR128, v32i8, v16i8, v8i32, sub_xmm>;
+ defm : subvec_zero_lowering<"APD", VR128, v4f64, v2f64, sub_xmm>;
+ defm : subvec_zero_lowering<"APS", VR128, v8f32, v4f32, sub_xmm>;
+ defm : subvec_zero_lowering<"DQA", VR128, v4i64, v2i64, sub_xmm>;
+ defm : subvec_zero_lowering<"DQA", VR128, v8i32, v4i32, sub_xmm>;
+ defm : subvec_zero_lowering<"DQA", VR128, v16i16, v8i16, sub_xmm>;
+ defm : subvec_zero_lowering<"DQA", VR128, v32i8, v16i8, sub_xmm>;
}
let Predicates = [HasVLX] in {
- defm : subvec_zero_lowering<"APDZ128", VR128X, v4f64, v2f64, v8i32, sub_xmm>;
- defm : subvec_zero_lowering<"APSZ128", VR128X, v8f32, v4f32, v8i32, sub_xmm>;
- defm : subvec_zero_lowering<"DQA64Z128", VR128X, v4i64, v2i64, v8i32, sub_xmm>;
- defm : subvec_zero_lowering<"DQA64Z128", VR128X, v8i32, v4i32, v8i32, sub_xmm>;
- defm : subvec_zero_lowering<"DQA64Z128", VR128X, v16i16, v8i16, v8i32, sub_xmm>;
- defm : subvec_zero_lowering<"DQA64Z128", VR128X, v32i8, v16i8, v8i32, sub_xmm>;
-
- defm : subvec_zero_lowering<"APDZ128", VR128X, v8f64, v2f64, v16i32, sub_xmm>;
- defm : subvec_zero_lowering<"APSZ128", VR128X, v16f32, v4f32, v16i32, sub_xmm>;
- defm : subvec_zero_lowering<"DQA64Z128", VR128X, v8i64, v2i64, v16i32, sub_xmm>;
- defm : subvec_zero_lowering<"DQA64Z128", VR128X, v16i32, v4i32, v16i32, sub_xmm>;
- defm : subvec_zero_lowering<"DQA64Z128", VR128X, v32i16, v8i16, v16i32, sub_xmm>;
- defm : subvec_zero_lowering<"DQA64Z128", VR128X, v64i8, v16i8, v16i32, sub_xmm>;
-
- defm : subvec_zero_lowering<"APDZ256", VR256X, v8f64, v4f64, v16i32, sub_ymm>;
- defm : subvec_zero_lowering<"APSZ256", VR256X, v16f32, v8f32, v16i32, sub_ymm>;
- defm : subvec_zero_lowering<"DQA64Z256", VR256X, v8i64, v4i64, v16i32, sub_ymm>;
- defm : subvec_zero_lowering<"DQA64Z256", VR256X, v16i32, v8i32, v16i32, sub_ymm>;
- defm : subvec_zero_lowering<"DQA64Z256", VR256X, v32i16, v16i16, v16i32, sub_ymm>;
- defm : subvec_zero_lowering<"DQA64Z256", VR256X, v64i8, v32i8, v16i32, sub_ymm>;
+ defm : subvec_zero_lowering<"APDZ128", VR128X, v4f64, v2f64, sub_xmm>;
+ defm : subvec_zero_lowering<"APSZ128", VR128X, v8f32, v4f32, sub_xmm>;
+ defm : subvec_zero_lowering<"DQA64Z128", VR128X, v4i64, v2i64, sub_xmm>;
+ defm : subvec_zero_lowering<"DQA64Z128", VR128X, v8i32, v4i32, sub_xmm>;
+ defm : subvec_zero_lowering<"DQA64Z128", VR128X, v16i16, v8i16, sub_xmm>;
+ defm : subvec_zero_lowering<"DQA64Z128", VR128X, v32i8, v16i8, sub_xmm>;
+
+ defm : subvec_zero_lowering<"APDZ128", VR128X, v8f64, v2f64, sub_xmm>;
+ defm : subvec_zero_lowering<"APSZ128", VR128X, v16f32, v4f32, sub_xmm>;
+ defm : subvec_zero_lowering<"DQA64Z128", VR128X, v8i64, v2i64, sub_xmm>;
+ defm : subvec_zero_lowering<"DQA64Z128", VR128X, v16i32, v4i32, sub_xmm>;
+ defm : subvec_zero_lowering<"DQA64Z128", VR128X, v32i16, v8i16, sub_xmm>;
+ defm : subvec_zero_lowering<"DQA64Z128", VR128X, v64i8, v16i8, sub_xmm>;
+
+ defm : subvec_zero_lowering<"APDZ256", VR256X, v8f64, v4f64, sub_ymm>;
+ defm : subvec_zero_lowering<"APSZ256", VR256X, v16f32, v8f32, sub_ymm>;
+ defm : subvec_zero_lowering<"DQA64Z256", VR256X, v8i64, v4i64, sub_ymm>;
+ defm : subvec_zero_lowering<"DQA64Z256", VR256X, v16i32, v8i32, sub_ymm>;
+ defm : subvec_zero_lowering<"DQA64Z256", VR256X, v32i16, v16i16, sub_ymm>;
+ defm : subvec_zero_lowering<"DQA64Z256", VR256X, v64i8, v32i8, sub_ymm>;
}
let Predicates = [HasAVX512, NoVLX] in {
- defm : subvec_zero_lowering<"APD", VR128, v8f64, v2f64, v16i32, sub_xmm>;
- defm : subvec_zero_lowering<"APS", VR128, v16f32, v4f32, v16i32, sub_xmm>;
- defm : subvec_zero_lowering<"DQA", VR128, v8i64, v2i64, v16i32, sub_xmm>;
- defm : subvec_zero_lowering<"DQA", VR128, v16i32, v4i32, v16i32, sub_xmm>;
- defm : subvec_zero_lowering<"DQA", VR128, v32i16, v8i16, v16i32, sub_xmm>;
- defm : subvec_zero_lowering<"DQA", VR128, v64i8, v16i8, v16i32, sub_xmm>;
-
- defm : subvec_zero_lowering<"APDY", VR256, v8f64, v4f64, v16i32, sub_ymm>;
- defm : subvec_zero_lowering<"APSY", VR256, v16f32, v8f32, v16i32, sub_ymm>;
- defm : subvec_zero_lowering<"DQAY", VR256, v8i64, v4i64, v16i32, sub_ymm>;
- defm : subvec_zero_lowering<"DQAY", VR256, v16i32, v8i32, v16i32, sub_ymm>;
- defm : subvec_zero_lowering<"DQAY", VR256, v32i16, v16i16, v16i32, sub_ymm>;
- defm : subvec_zero_lowering<"DQAY", VR256, v64i8, v32i8, v16i32, sub_ymm>;
+ defm : subvec_zero_lowering<"APD", VR128, v8f64, v2f64, sub_xmm>;
+ defm : subvec_zero_lowering<"APS", VR128, v16f32, v4f32, sub_xmm>;
+ defm : subvec_zero_lowering<"DQA", VR128, v8i64, v2i64, sub_xmm>;
+ defm : subvec_zero_lowering<"DQA", VR128, v16i32, v4i32, sub_xmm>;
+ defm : subvec_zero_lowering<"DQA", VR128, v32i16, v8i16, sub_xmm>;
+ defm : subvec_zero_lowering<"DQA", VR128, v64i8, v16i8, sub_xmm>;
+
+ defm : subvec_zero_lowering<"APDY", VR256, v8f64, v4f64, sub_ymm>;
+ defm : subvec_zero_lowering<"APSY", VR256, v16f32, v8f32, sub_ymm>;
+ defm : subvec_zero_lowering<"DQAY", VR256, v8i64, v4i64, sub_ymm>;
+ defm : subvec_zero_lowering<"DQAY", VR256, v16i32, v8i32, sub_ymm>;
+ defm : subvec_zero_lowering<"DQAY", VR256, v32i16, v16i16, sub_ymm>;
+ defm : subvec_zero_lowering<"DQAY", VR256, v64i8, v32i8, sub_ymm>;
}
let Predicates = [HasFP16, HasVLX] in {
- defm : subvec_zero_lowering<"APSZ128", VR128X, v16f16, v8f16, v8i32, sub_xmm>;
- defm : subvec_zero_lowering<"APSZ128", VR128X, v32f16, v8f16, v16i32, sub_xmm>;
- defm : subvec_zero_lowering<"APSZ256", VR256X, v32f16, v16f16, v16i32, sub_ymm>;
+ defm : subvec_zero_lowering<"APSZ128", VR128X, v16f16, v8f16, sub_xmm>;
+ defm : subvec_zero_lowering<"APSZ128", VR128X, v32f16, v8f16, sub_xmm>;
+ defm : subvec_zero_lowering<"APSZ256", VR256X, v32f16, v16f16, sub_ymm>;
}
class maskzeroupper<ValueType vt, RegisterClass RC> :