// If the bits are not zero we have to fall back to explicitly zeroing by
// using shifts.
-let Predicates = [HasAVX512] in {
+let Predicates = [HasAVX512, NoDQI] in {
def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
(v8i1 VK8:$mask), (iPTR 0))),
(KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK8:$mask, VK16),
(i8 8)), (i8 8))>;
}
+let Predicates = [HasDQI] in {
+ def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
+ (v8i1 VK8:$mask), (iPTR 0))),
+ (COPY_TO_REGCLASS (KMOVBkk VK8:$mask), VK16)>;
+}
+
let Predicates = [HasVLX, HasDQI] in {
def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV),
(v2i1 VK2:$mask), (iPTR 0))),
}
let Predicates = [HasBWI] in {
+ def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
+ (v16i1 VK16:$mask), (iPTR 0))),
+ (COPY_TO_REGCLASS (KMOVWkk VK16:$mask), VK32)>;
+
+ def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
+ (v16i1 VK16:$mask), (iPTR 0))),
+ (COPY_TO_REGCLASS (KMOVWkk VK16:$mask), VK64)>;
+ def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
+ (v32i1 VK32:$mask), (iPTR 0))),
+ (COPY_TO_REGCLASS (KMOVDkk VK32:$mask), VK64)>;
+}
+
+let Predicates = [HasBWI, NoDQI] in {
def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
(v8i1 VK8:$mask), (iPTR 0))),
(KSHIFTRDri (KSHIFTLDri (COPY_TO_REGCLASS VK8:$mask, VK32),
(i8 24)), (i8 24))>;
- def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
- (v16i1 VK16:$mask), (iPTR 0))),
- (KSHIFTRDri (KSHIFTLDri (COPY_TO_REGCLASS VK16:$mask, VK32),
- (i8 16)), (i8 16))>;
def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
(v8i1 VK8:$mask), (iPTR 0))),
(KSHIFTRQri (KSHIFTLQri (COPY_TO_REGCLASS VK8:$mask, VK64),
(i8 56)), (i8 56))>;
+}
+
+let Predicates = [HasBWI, HasDQI] in {
+ def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
+ (v8i1 VK8:$mask), (iPTR 0))),
+ (COPY_TO_REGCLASS (KMOVBkk VK8:$mask), VK32)>;
+
def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
- (v16i1 VK16:$mask), (iPTR 0))),
- (KSHIFTRQri (KSHIFTLQri (COPY_TO_REGCLASS VK16:$mask, VK64),
- (i8 48)), (i8 48))>;
- def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
- (v32i1 VK32:$mask), (iPTR 0))),
- (KSHIFTRQri (KSHIFTLQri (COPY_TO_REGCLASS VK32:$mask, VK64),
- (i8 32)), (i8 32))>;
+ (v8i1 VK8:$mask), (iPTR 0))),
+ (COPY_TO_REGCLASS (KMOVBkk VK8:$mask), VK64)>;
}
let Predicates = [HasBWI, HasVLX] in {
; CHECK: ## %bb.0:
; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0
; CHECK-NEXT: vpmovb2m %zmm0, %k0
-; CHECK-NEXT: kshiftlq $48, %k0, %k0
-; CHECK-NEXT: kshiftrq $48, %k0, %k1
+; CHECK-NEXT: kmovw %k0, %k1
; CHECK-NEXT: vmovdqu8 (%rdi), %zmm0 {%k1} {z}
; CHECK-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0
; CHECK-NEXT: retq
; CHECK: ## %bb.0:
; CHECK-NEXT: vpsllw $7, %ymm0, %ymm0
; CHECK-NEXT: vpmovb2m %zmm0, %k0
-; CHECK-NEXT: kshiftlq $32, %k0, %k0
-; CHECK-NEXT: kshiftrq $32, %k0, %k1
+; CHECK-NEXT: kmovd %k0, %k1
; CHECK-NEXT: vmovdqu8 (%rdi), %zmm0 {%k1} {z}
; CHECK-NEXT: ## kill: def %ymm0 killed %ymm0 killed %zmm0
; CHECK-NEXT: retq
; CHECK: ## %bb.0:
; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0
; CHECK-NEXT: vpmovb2m %zmm0, %k0
-; CHECK-NEXT: kshiftld $16, %k0, %k0
-; CHECK-NEXT: kshiftrd $16, %k0, %k1
+; CHECK-NEXT: kmovw %k0, %k1
; CHECK-NEXT: vmovdqu16 (%rdi), %zmm0 {%k1} {z}
; CHECK-NEXT: ## kill: def %ymm0 killed %ymm0 killed %zmm0
; CHECK-NEXT: retq
; CHECK-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1
; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0
; CHECK-NEXT: vpmovb2m %zmm0, %k0
-; CHECK-NEXT: kshiftlq $48, %k0, %k0
-; CHECK-NEXT: kshiftrq $48, %k0, %k1
+; CHECK-NEXT: kmovw %k0, %k1
; CHECK-NEXT: vmovdqu8 %zmm1, (%rdi) {%k1}
; CHECK-NEXT: retq
call void @llvm.masked.store.v16i8(<16 x i8> %val, <16 x i8>* %addr, i32 4, <16 x i1>%mask)
; CHECK-NEXT: ## kill: def %ymm1 killed %ymm1 def %zmm1
; CHECK-NEXT: vpsllw $7, %ymm0, %ymm0
; CHECK-NEXT: vpmovb2m %zmm0, %k0
-; CHECK-NEXT: kshiftlq $32, %k0, %k0
-; CHECK-NEXT: kshiftrq $32, %k0, %k1
+; CHECK-NEXT: kmovd %k0, %k1
; CHECK-NEXT: vmovdqu8 %zmm1, (%rdi) {%k1}
; CHECK-NEXT: retq
call void @llvm.masked.store.v32i8(<32 x i8> %val, <32 x i8>* %addr, i32 4, <32 x i1>%mask)
; CHECK-NEXT: ## kill: def %ymm1 killed %ymm1 def %zmm1
; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0
; CHECK-NEXT: vpmovb2m %zmm0, %k0
-; CHECK-NEXT: kshiftld $16, %k0, %k0
-; CHECK-NEXT: kshiftrd $16, %k0, %k1
+; CHECK-NEXT: kmovw %k0, %k1
; CHECK-NEXT: vmovdqu16 %zmm1, (%rdi) {%k1}
; CHECK-NEXT: retq
call void @llvm.masked.store.v16i16(<16 x i16> %val, <16 x i16>* %addr, i32 4, <16 x i1>%mask)
; CHECK-NEXT: kunpckdq %k0, %k1, %k0
; CHECK-NEXT: movl $1, %ecx
; CHECK-NEXT: kmovd %ecx, %k1
-; CHECK-NEXT: kshiftlq $32, %k1, %k1
-; CHECK-NEXT: kshiftrq $32, %k1, %k1
+; CHECK-NEXT: kmovd %k1, %k1
; CHECK-NEXT: kandq %k1, %k0, %k1
; CHECK-NEXT: vmovdqu8 {{\.LCPI.*}}, %zmm0 {%k1} {z}
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1