Fixes issue identified on #63980 where the undef rotate amounts (during widening from v2i32 -> v4i32) were being constant folded to 0 when the shift amounts are created during expansion, losing the splat'd shift amounts.
}
// Rotate by an uniform constant - expand back to shifts.
- if (IsCstSplat)
- return SDValue();
+ // TODO: Can't use generic expansion as UNDEF amt elements can be converted
+ // to other values when folded to shift amounts, losing the splat.
+ if (IsCstSplat) {
+ uint64_t RotAmt = CstSplatValue.urem(EltSizeInBits);
+ uint64_t ShlAmt = IsROTL ? RotAmt : (EltSizeInBits - RotAmt);
+ uint64_t SrlAmt = IsROTL ? (EltSizeInBits - RotAmt) : RotAmt;
+ SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, R,
+ DAG.getShiftAmountConstant(ShlAmt, VT, DL));
+ SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, R,
+ DAG.getShiftAmountConstant(SrlAmt, VT, DL));
+ return DAG.getNode(ISD::OR, DL, VT, Shl, Srl);
+ }
// Split 512-bit integers on non 512-bit BWI targets.
if (VT.is512BitVector() && !Subtarget.useBWIRegs())
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: psrld $28, %xmm1
-; SSE2-NEXT: movdqa %xmm0, %xmm2
-; SSE2-NEXT: pslld $4, %xmm2
-; SSE2-NEXT: por %xmm1, %xmm2
-; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
+; SSE2-NEXT: pslld $4, %xmm0
+; SSE2-NEXT: por %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: splatconstant_funnnel_v2i32:
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa %xmm0, %xmm1
; SSE41-NEXT: psrld $28, %xmm1
-; SSE41-NEXT: movdqa %xmm0, %xmm2
-; SSE41-NEXT: pslld $4, %xmm2
-; SSE41-NEXT: por %xmm1, %xmm2
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5,6,7]
+; SSE41-NEXT: pslld $4, %xmm0
+; SSE41-NEXT: por %xmm1, %xmm0
; SSE41-NEXT: retq
;
; AVX1-LABEL: splatconstant_funnnel_v2i32:
; AVX1: # %bb.0:
; AVX1-NEXT: vpsrld $28, %xmm0, %xmm1
-; AVX1-NEXT: vpslld $4, %xmm0, %xmm2
-; AVX1-NEXT: vpor %xmm1, %xmm2, %xmm1
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; AVX1-NEXT: vpslld $4, %xmm0, %xmm0
+; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: splatconstant_funnnel_v2i32:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
-; AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: vpsrld $28, %xmm0, %xmm1
+; AVX2-NEXT: vpslld $4, %xmm0, %xmm0
; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq
;
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
; X86-SSE2-NEXT: psrld $28, %xmm1
-; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
-; X86-SSE2-NEXT: pslld $4, %xmm2
-; X86-SSE2-NEXT: por %xmm1, %xmm2
-; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
+; X86-SSE2-NEXT: pslld $4, %xmm0
+; X86-SSE2-NEXT: por %xmm1, %xmm0
; X86-SSE2-NEXT: retl
%res = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> %x, <2 x i32> %x, <2 x i32> <i32 4, i32 4>)
ret <2 x i32> %res
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: psrld $4, %xmm1
-; SSE2-NEXT: movdqa %xmm0, %xmm2
-; SSE2-NEXT: pslld $28, %xmm2
-; SSE2-NEXT: por %xmm1, %xmm2
-; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
+; SSE2-NEXT: pslld $28, %xmm0
+; SSE2-NEXT: por %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: splatconstant_funnnel_v2i32:
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa %xmm0, %xmm1
; SSE41-NEXT: psrld $4, %xmm1
-; SSE41-NEXT: movdqa %xmm0, %xmm2
-; SSE41-NEXT: pslld $28, %xmm2
-; SSE41-NEXT: por %xmm1, %xmm2
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5,6,7]
+; SSE41-NEXT: pslld $28, %xmm0
+; SSE41-NEXT: por %xmm1, %xmm0
; SSE41-NEXT: retq
;
; AVX1-LABEL: splatconstant_funnnel_v2i32:
; AVX1: # %bb.0:
; AVX1-NEXT: vpsrld $4, %xmm0, %xmm1
-; AVX1-NEXT: vpslld $28, %xmm0, %xmm2
-; AVX1-NEXT: vpor %xmm1, %xmm2, %xmm1
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; AVX1-NEXT: vpslld $28, %xmm0, %xmm0
+; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: splatconstant_funnnel_v2i32:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
-; AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: vpsrld $4, %xmm0, %xmm1
+; AVX2-NEXT: vpslld $28, %xmm0, %xmm0
; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq
;
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
; X86-SSE2-NEXT: psrld $4, %xmm1
-; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
-; X86-SSE2-NEXT: pslld $28, %xmm2
-; X86-SSE2-NEXT: por %xmm1, %xmm2
-; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
+; X86-SSE2-NEXT: pslld $28, %xmm0
+; X86-SSE2-NEXT: por %xmm1, %xmm0
; X86-SSE2-NEXT: retl
%res = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %x, <2 x i32> <i32 4, i32 4>)
ret <2 x i32> %res