def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)),
(VBLENDPSrri VR128:$src1, VR128:$src2, (i8 1))>;
+ def : Pat<(v4f32 (X86Movss VR128:$src1, (loadv4f32 addr:$src2))),
+ (VBLENDPSrmi VR128:$src1, addr:$src2, (i8 1))>;
+ def : Pat<(v4f32 (X86Movss (loadv4f32 addr:$src2), VR128:$src1)),
+ (VBLENDPSrmi VR128:$src1, addr:$src2, (i8 0xe))>;
def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)),
(VPBLENDWrri VR128:$src1, VR128:$src2, (i8 3))>;
+ def : Pat<(v4i32 (X86Movss VR128:$src1, (bc_v4i32 (loadv2i64 addr:$src2)))),
+ (VPBLENDWrmi VR128:$src1, addr:$src2, (i8 3))>;
+ def : Pat<(v4i32 (X86Movss (bc_v4i32 (loadv2i64 addr:$src2)), VR128:$src1)),
+ (VPBLENDWrmi VR128:$src1, addr:$src2, (i8 0xfc))>;
def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)),
(VBLENDPDrri VR128:$src1, VR128:$src2, (i8 1))>;
+ def : Pat<(v2f64 (X86Movsd VR128:$src1, (loadv2f64 addr:$src2))),
+ (VBLENDPDrmi VR128:$src1, addr:$src2, (i8 1))>;
+ def : Pat<(v2f64 (X86Movsd (loadv2f64 addr:$src2), VR128:$src1)),
+ (VBLENDPDrmi VR128:$src1, addr:$src2, (i8 2))>;
def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)),
(VPBLENDWrri VR128:$src1, VR128:$src2, (i8 0xf))>;
+ def : Pat<(v2i64 (X86Movsd VR128:$src1, (loadv2i64 addr:$src2))),
+ (VPBLENDWrmi VR128:$src1, addr:$src2, (i8 0xf))>;
+ def : Pat<(v2i64 (X86Movsd (loadv2i64 addr:$src2), VR128:$src1)),
+ (VPBLENDWrmi VR128:$src1, addr:$src2, (i8 0xf0))>;
// Move low f32 and clear high bits.
def : Pat<(v8f32 (X86vzmovl (v8f32 VR256:$src))),
def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)),
(BLENDPSrri VR128:$src1, VR128:$src2, (i8 1))>;
+ def : Pat<(v4f32 (X86Movss VR128:$src1, (memopv4f32 addr:$src2))),
+ (BLENDPSrmi VR128:$src1, addr:$src2, (i8 1))>;
+ def : Pat<(v4f32 (X86Movss (memopv4f32 addr:$src2), VR128:$src1)),
+ (BLENDPSrmi VR128:$src1, addr:$src2, (i8 0xe))>;
def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)),
(PBLENDWrri VR128:$src1, VR128:$src2, (i8 3))>;
+ def : Pat<(v4i32 (X86Movss VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))),
+ (PBLENDWrmi VR128:$src1, addr:$src2, (i8 3))>;
+ def : Pat<(v4i32 (X86Movss (bc_v4i32 (memopv2i64 addr:$src2)), VR128:$src1)),
+ (PBLENDWrmi VR128:$src1, addr:$src2, (i8 0xfc))>;
def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)),
(BLENDPDrri VR128:$src1, VR128:$src2, (i8 1))>;
+ def : Pat<(v2f64 (X86Movsd VR128:$src1, (memopv2f64 addr:$src2))),
+ (BLENDPDrmi VR128:$src1, addr:$src2, (i8 1))>;
+ def : Pat<(v2f64 (X86Movsd (memopv2f64 addr:$src2), VR128:$src1)),
+ (BLENDPDrmi VR128:$src1, addr:$src2, (i8 2))>;
def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)),
(PBLENDWrri VR128:$src1, VR128:$src2, (i8 0xf))>;
+ def : Pat<(v2i64 (X86Movsd VR128:$src1, (memopv2i64 addr:$src2))),
+ (PBLENDWrmi VR128:$src1, addr:$src2, (i8 0xf))>;
+ def : Pat<(v2i64 (X86Movsd (memopv2i64 addr:$src2), VR128:$src1)),
+ (PBLENDWrmi VR128:$src1, addr:$src2, (i8 0xf0))>;
}