if (TLI->isCommutativeBinOp(Opcode))
if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Ops[1]))
return FoldSymbolOffset(Opcode, VT, GA, Ops[0].getNode());
-
- // If this is a bitwise logic opcode see if we can fold bitcasted ops.
- // TODO: Can we generalize this and fold any bitcasted constant data?
- if (ISD::isBitwiseLogicOp(Opcode) && Ops[0].getOpcode() == ISD::BITCAST &&
- Ops[1].getOpcode() == ISD::BITCAST) {
- SDValue InnerN1 = peekThroughBitcasts(Ops[0].getOperand(0));
- SDValue InnerN2 = peekThroughBitcasts(Ops[1].getOperand(0));
- EVT InnerVT = InnerN1.getValueType();
- if (InnerVT == InnerN2.getValueType() && InnerVT.isInteger())
- if (SDValue C =
- FoldConstantArithmetic(Opcode, DL, InnerVT, {InnerN1, InnerN2}))
- return getBitcast(VT, C);
- }
}
// This is for vector folding only from here on.
ElementCount NumElts = VT.getVectorElementCount();
+ // See if we can fold through bitcasted integer ops.
+ // TODO: Can we handle undef elements?
+ if (NumOps == 2 && VT.isFixedLengthVector() && VT.isInteger() &&
+ Ops[0].getValueType() == VT && Ops[1].getValueType() == VT &&
+ Ops[0].getOpcode() == ISD::BITCAST &&
+ Ops[1].getOpcode() == ISD::BITCAST) {
+ SDValue N1 = peekThroughBitcasts(Ops[0]);
+ SDValue N2 = peekThroughBitcasts(Ops[1]);
+ auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
+ auto *BV2 = dyn_cast<BuildVectorSDNode>(N2);
+ EVT BVVT = N1.getValueType();
+ if (BV1 && BV2 && BVVT.isInteger() && BVVT == N2.getValueType()) {
+ bool IsLE = getDataLayout().isLittleEndian();
+ unsigned EltBits = VT.getScalarSizeInBits();
+ SmallVector<APInt> RawBits1, RawBits2;
+ BitVector UndefElts1, UndefElts2;
+ if (BV1->getConstantRawBits(IsLE, EltBits, RawBits1, UndefElts1) &&
+ BV2->getConstantRawBits(IsLE, EltBits, RawBits2, UndefElts2) &&
+ UndefElts1.none() && UndefElts2.none()) {
+ SmallVector<APInt> RawBits;
+ for (unsigned I = 0, E = NumElts.getFixedValue(); I != E; ++I) {
+ Optional<APInt> Fold = FoldValue(Opcode, RawBits1[I], RawBits2[I]);
+ if (!Fold)
+ break;
+ RawBits.push_back(Fold.getValue());
+ }
+ if (RawBits.size() == NumElts.getFixedValue()) {
+ // We have constant folded, but we need to cast this again back to
+ // the original (possibly legalized) type.
+ SmallVector<APInt> DstBits;
+ BitVector DstUndefs;
+ BuildVectorSDNode::recastRawBits(IsLE, BVVT.getScalarSizeInBits(),
+ DstBits, RawBits, DstUndefs,
+ BitVector(RawBits.size(), false));
+ EVT BVEltVT = BV1->getOperand(0).getValueType();
+ unsigned BVEltBits = BVEltVT.getSizeInBits();
+ SmallVector<SDValue> Ops(DstBits.size(), getUNDEF(BVEltVT));
+ for (unsigned I = 0, E = DstBits.size(); I != E; ++I) {
+ if (DstUndefs[I])
+ continue;
+ Ops[I] = getConstant(DstBits[I].sextOrSelf(BVEltBits), DL, BVEltVT);
+ }
+ return getBitcast(VT, getBuildVector(BVVT, DL, Ops));
+ }
+ }
+ }
+ }
+
auto IsScalarOrSameVectorSize = [NumElts](const SDValue &Op) {
return !Op.getValueType().isVector() ||
Op.getValueType().getVectorElementCount() == NumElts;
; X86-SSE-NEXT: psllq $63, %xmm1
; X86-SSE-NEXT: psllq $63, %xmm0
; X86-SSE-NEXT: psrlq $63, %xmm0
-; X86-SSE-NEXT: movdqa {{.*#+}} xmm2 = [1,0,0,0]
+; X86-SSE-NEXT: movdqa {{.*#+}} xmm2 = [1,0,0,2147483648]
; X86-SSE-NEXT: pxor %xmm2, %xmm0
; X86-SSE-NEXT: psubq %xmm2, %xmm0
; X86-SSE-NEXT: psrlq $63, %xmm1
; X86-AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [63,0,0,0,63,0,0,0]
; X86-AVX2-NEXT: # ymm1 = mem[0,1,0,1]
; X86-AVX2-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2147483648,0,2147483648,0,2147483648,0,2147483648]
-; X86-AVX2-NEXT: vpsrlvq %ymm1, %ymm2, %ymm2
; X86-AVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0
+; X86-AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [1,0,0,2147483648,1,0,0,2147483648]
+; X86-AVX2-NEXT: # ymm1 = mem[0,1,0,1]
+; X86-AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpsubq %ymm1, %ymm0, %ymm0
; X86-AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; X86-AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
;
; X86-SSE2-LABEL: constant_funnnel_v2i64:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pxor %xmm1, %xmm1
-; X86-SSE2-NEXT: psubq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
+; X86-SSE2-NEXT: psrlq $60, %xmm1
; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
-; X86-SSE2-NEXT: psrlq %xmm1, %xmm2
-; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
-; X86-SSE2-NEXT: movdqa %xmm0, %xmm3
-; X86-SSE2-NEXT: psrlq %xmm1, %xmm3
-; X86-SSE2-NEXT: movsd {{.*#+}} xmm3 = xmm2[0],xmm3[1]
+; X86-SSE2-NEXT: psrlq $50, %xmm2
+; X86-SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
; X86-SSE2-NEXT: psllq $4, %xmm1
; X86-SSE2-NEXT: psllq $14, %xmm0
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
-; X86-SSE2-NEXT: orpd %xmm3, %xmm0
+; X86-SSE2-NEXT: orpd %xmm2, %xmm0
; X86-SSE2-NEXT: retl
%res = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %x, <2 x i64> %x, <2 x i64> <i64 4, i64 14>)
ret <2 x i64> %res
; X86-SSE2-LABEL: splatconstant_funnnel_v2i64:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
-; X86-SSE2-NEXT: psllq $14, %xmm1
-; X86-SSE2-NEXT: psrlq $50, %xmm0
-; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm0[0,1]
-; X86-SSE2-NEXT: orpd %xmm1, %xmm0
+; X86-SSE2-NEXT: psrlq $50, %xmm1
+; X86-SSE2-NEXT: psllq $14, %xmm0
+; X86-SSE2-NEXT: por %xmm1, %xmm0
; X86-SSE2-NEXT: retl
%res = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %x, <2 x i64> %x, <2 x i64> <i64 14, i64 14>)
ret <2 x i64> %res
;
; X86-SSE2-LABEL: constant_funnnel_v2i64:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pxor %xmm1, %xmm1
-; X86-SSE2-NEXT: psubq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
+; X86-SSE2-NEXT: psllq $60, %xmm1
; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
-; X86-SSE2-NEXT: psllq %xmm1, %xmm2
-; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
-; X86-SSE2-NEXT: movdqa %xmm0, %xmm3
-; X86-SSE2-NEXT: psllq %xmm1, %xmm3
-; X86-SSE2-NEXT: movsd {{.*#+}} xmm3 = xmm2[0],xmm3[1]
+; X86-SSE2-NEXT: psllq $50, %xmm2
+; X86-SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
; X86-SSE2-NEXT: psrlq $4, %xmm1
; X86-SSE2-NEXT: psrlq $14, %xmm0
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
-; X86-SSE2-NEXT: orpd %xmm3, %xmm0
+; X86-SSE2-NEXT: orpd %xmm2, %xmm0
; X86-SSE2-NEXT: retl
%res = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %x, <2 x i64> %x, <2 x i64> <i64 4, i64 14>)
ret <2 x i64> %res
; X86-SSE2-LABEL: splatconstant_funnnel_v2i64:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
-; X86-SSE2-NEXT: psrlq $14, %xmm1
-; X86-SSE2-NEXT: psllq $50, %xmm0
-; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm0[0,1]
-; X86-SSE2-NEXT: orpd %xmm1, %xmm0
+; X86-SSE2-NEXT: psllq $50, %xmm1
+; X86-SSE2-NEXT: psrlq $14, %xmm0
+; X86-SSE2-NEXT: por %xmm1, %xmm0
; X86-SSE2-NEXT: retl
%res = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %x, <2 x i64> %x, <2 x i64> <i64 14, i64 14>)
ret <2 x i64> %res
; X86-SSE-NEXT: psrlq $1, %xmm1
; X86-SSE-NEXT: psrlq $7, %xmm0
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
-; X86-SSE-NEXT: movapd {{.*#+}} xmm1 = [2.0E+0,7.2911220195563975E-304]
+; X86-SSE-NEXT: movapd {{.*#+}} xmm1 = [0,1073741824,0,16777216]
; X86-SSE-NEXT: xorpd %xmm1, %xmm0
; X86-SSE-NEXT: psubq %xmm1, %xmm0
; X86-SSE-NEXT: retl
;
; X86-AVX2-LABEL: constant_shift_v4i64:
; X86-AVX2: # %bb.0:
-; X86-AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,0,7,0,31,0,62,0]
-; X86-AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2147483648,0,2147483648,0,2147483648,0,2147483648]
-; X86-AVX2-NEXT: vpsrlvq %ymm1, %ymm2, %ymm2
-; X86-AVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpsrlvq {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
+; X86-AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1073741824,0,16777216,0,1,2,0]
+; X86-AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpsubq %ymm1, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
%shift = ashr <4 x i64> %a, <i64 1, i64 7, i64 31, i64 62>
ret <4 x i64> %shift