SDValue StoredVal = St->getOperand(1);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ // If this is a store of a scalar_to_vector to v1i1, just use a scalar store.
+ // This will avoid a copy to k-register.
+ if (VT == MVT::v1i1 && VT == StVT && Subtarget.hasAVX512() &&
+ StoredVal.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ StoredVal.getOperand(0).getValueType() == MVT::i8) {
+ return DAG.getStore(St->getChain(), dl, StoredVal.getOperand(0),
+ St->getBasePtr(), St->getPointerInfo(),
+ St->getAlignment(), St->getMemOperand()->getFlags());
+ }
+
// If we are saving a concatenation of two XMM registers and 32-byte stores
// are slow, such as on Sandy Bridge, perform two 16-byte stores.
bool Fast;
define void @load_v1i2_trunc_v1i1_store(<1 x i2>* %a0,<1 x i1>* %a1) {
; AVX512-ALL-LABEL: load_v1i2_trunc_v1i1_store:
; AVX512-ALL: # %bb.0:
-; AVX512-ALL-NEXT: kmovb (%rdi), %k0
-; AVX512-ALL-NEXT: kmovb %k0, (%rsi)
+; AVX512-ALL-NEXT: movb (%rdi), %al
+; AVX512-ALL-NEXT: movb %al, (%rsi)
; AVX512-ALL-NEXT: retq
;
; AVX512-ONLY-LABEL: load_v1i2_trunc_v1i1_store:
define void @load_v1i3_trunc_v1i1_store(<1 x i3>* %a0,<1 x i1>* %a1) {
; AVX512-ALL-LABEL: load_v1i3_trunc_v1i1_store:
; AVX512-ALL: # %bb.0:
-; AVX512-ALL-NEXT: kmovb (%rdi), %k0
-; AVX512-ALL-NEXT: kmovb %k0, (%rsi)
+; AVX512-ALL-NEXT: movb (%rdi), %al
+; AVX512-ALL-NEXT: movb %al, (%rsi)
; AVX512-ALL-NEXT: retq
;
; AVX512-ONLY-LABEL: load_v1i3_trunc_v1i1_store:
define void @load_v1i4_trunc_v1i1_store(<1 x i4>* %a0,<1 x i1>* %a1) {
; AVX512-ALL-LABEL: load_v1i4_trunc_v1i1_store:
; AVX512-ALL: # %bb.0:
-; AVX512-ALL-NEXT: kmovb (%rdi), %k0
-; AVX512-ALL-NEXT: kmovb %k0, (%rsi)
+; AVX512-ALL-NEXT: movb (%rdi), %al
+; AVX512-ALL-NEXT: movb %al, (%rsi)
; AVX512-ALL-NEXT: retq
;
; AVX512-ONLY-LABEL: load_v1i4_trunc_v1i1_store:
define void @load_v1i8_trunc_v1i1_store(<1 x i8>* %a0,<1 x i1>* %a1) {
; AVX512-ALL-LABEL: load_v1i8_trunc_v1i1_store:
; AVX512-ALL: # %bb.0:
-; AVX512-ALL-NEXT: kmovb (%rdi), %k0
-; AVX512-ALL-NEXT: kmovb %k0, (%rsi)
+; AVX512-ALL-NEXT: movb (%rdi), %al
+; AVX512-ALL-NEXT: movb %al, (%rsi)
; AVX512-ALL-NEXT: retq
;
; AVX512-ONLY-LABEL: load_v1i8_trunc_v1i1_store:
define void @load_v1i16_trunc_v1i1_store(<1 x i16>* %a0,<1 x i1>* %a1) {
; AVX512-ALL-LABEL: load_v1i16_trunc_v1i1_store:
; AVX512-ALL: # %bb.0:
-; AVX512-ALL-NEXT: kmovb (%rdi), %k0
-; AVX512-ALL-NEXT: kmovb %k0, (%rsi)
+; AVX512-ALL-NEXT: movb (%rdi), %al
+; AVX512-ALL-NEXT: movb %al, (%rsi)
; AVX512-ALL-NEXT: retq
;
; AVX512-ONLY-LABEL: load_v1i16_trunc_v1i1_store:
define void @load_v1i32_trunc_v1i1_store(<1 x i32>* %a0,<1 x i1>* %a1) {
; AVX512-ALL-LABEL: load_v1i32_trunc_v1i1_store:
; AVX512-ALL: # %bb.0:
-; AVX512-ALL-NEXT: kmovb (%rdi), %k0
-; AVX512-ALL-NEXT: kmovb %k0, (%rsi)
+; AVX512-ALL-NEXT: movb (%rdi), %al
+; AVX512-ALL-NEXT: movb %al, (%rsi)
; AVX512-ALL-NEXT: retq
;
; AVX512-ONLY-LABEL: load_v1i32_trunc_v1i1_store:
define void @load_v1i64_trunc_v1i1_store(<1 x i64>* %a0,<1 x i1>* %a1) {
; AVX512-ALL-LABEL: load_v1i64_trunc_v1i1_store:
; AVX512-ALL: # %bb.0:
-; AVX512-ALL-NEXT: kmovb (%rdi), %k0
-; AVX512-ALL-NEXT: kmovb %k0, (%rsi)
+; AVX512-ALL-NEXT: movb (%rdi), %al
+; AVX512-ALL-NEXT: movb %al, (%rsi)
; AVX512-ALL-NEXT: retq
;
; AVX512-ONLY-LABEL: load_v1i64_trunc_v1i1_store: