KnownBits KnownOp;
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
+
+ // Don't mask bits on 32-bit AVX512 targets which might lose a broadcast.
// FIXME: Can we bound this better?
APInt DemandedMask = APInt::getLowBitsSet(64, 32);
- if (SimplifyDemandedBits(LHS, DemandedMask, OriginalDemandedElts, KnownOp,
- TLO, Depth + 1))
+ APInt DemandedMaskLHS = APInt::getAllOnes(64);
+ APInt DemandedMaskRHS = APInt::getAllOnes(64);
+
+ bool Is32BitAVX512 = !Subtarget.is64Bit() && Subtarget.hasAVX512();
+ if (!Is32BitAVX512 || !TLO.DAG.isSplatValue(LHS))
+ DemandedMaskLHS = DemandedMask;
+ if (!Is32BitAVX512 || !TLO.DAG.isSplatValue(RHS))
+ DemandedMaskRHS = DemandedMask;
+
+ if (SimplifyDemandedBits(LHS, DemandedMaskLHS, OriginalDemandedElts,
+ KnownOp, TLO, Depth + 1))
return true;
- if (SimplifyDemandedBits(RHS, DemandedMask, OriginalDemandedElts, KnownOp,
- TLO, Depth + 1))
+ if (SimplifyDemandedBits(RHS, DemandedMaskRHS, OriginalDemandedElts,
+ KnownOp, TLO, Depth + 1))
return true;
// Aggressively peek through ops to get at the demanded low bits.
SDValue DemandedLHS = SimplifyMultipleUseDemandedBits(
- LHS, DemandedMask, OriginalDemandedElts, TLO.DAG, Depth + 1);
+ LHS, DemandedMaskLHS, OriginalDemandedElts, TLO.DAG, Depth + 1);
SDValue DemandedRHS = SimplifyMultipleUseDemandedBits(
- RHS, DemandedMask, OriginalDemandedElts, TLO.DAG, Depth + 1);
+ RHS, DemandedMaskRHS, OriginalDemandedElts, TLO.DAG, Depth + 1);
if (DemandedLHS || DemandedRHS) {
DemandedLHS = DemandedLHS ? DemandedLHS : LHS;
DemandedRHS = DemandedRHS ? DemandedRHS : RHS;
; X86-LABEL: test_mask_mul_epi32_rmbk_buildvector:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT: vpbroadcastd (%eax), %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x58,0x10]
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08]
-; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
-; X86-NEXT: vpmuldq %zmm2, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0xca]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
+; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
+; X86-NEXT: vpmuldq (%eax){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x59,0x28,0x08]
; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
; X86-NEXT: retl ## encoding: [0xc3]
;
; X86-LABEL: test_mask_mul_epi32_rmbkz_buildvector:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT: vpbroadcastd (%eax), %zmm1 ## encoding: [0x62,0xf2,0x7d,0x48,0x58,0x08]
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08]
-; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
-; X86-NEXT: vpmuldq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0xc1]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
+; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
+; X86-NEXT: vpmuldq (%eax){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xd9,0x28,0x00]
; X86-NEXT: retl ## encoding: [0xc3]
;
; X64-LABEL: test_mask_mul_epi32_rmbkz_buildvector:
; X86-LABEL: test_mask_mul_epi32_rmbk_128_buildvector:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT: vpbroadcastd (%eax), %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x58,0x10]
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
-; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
-; X86-NEXT: vpmuldq %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x28,0xca]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
+; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
+; X86-NEXT: vpmuldq (%eax){1to2}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x19,0x28,0x08]
; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
; X86-NEXT: retl # encoding: [0xc3]
;
; X86-LABEL: test_mask_mul_epi32_rmbkz_128_buildvector:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT: vpbroadcastd (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x58,0x08]
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
-; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
-; X86-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x28,0xc1]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
+; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
+; X86-NEXT: vpmuldq (%eax){1to2}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x99,0x28,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_mask_mul_epi32_rmbkz_128_buildvector:
; X86-LABEL: test_mask_mul_epi32_rmbk_256_buildvector:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT: vpbroadcastd (%eax), %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x58,0x10]
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
-; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
-; X86-NEXT: vpmuldq %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x28,0xca]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
+; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
+; X86-NEXT: vpmuldq (%eax){1to4}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x39,0x28,0x08]
; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
; X86-NEXT: retl # encoding: [0xc3]
;
; X86-LABEL: test_mask_mul_epi32_rmbkz_256_buildvector:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT: vpbroadcastd (%eax), %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x58,0x08]
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
-; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
-; X86-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x28,0xc1]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
+; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
+; X86-NEXT: vpmuldq (%eax){1to4}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xb9,0x28,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_mask_mul_epi32_rmbkz_256_buildvector: