case Intrinsic::masked_compressstore:
visitMaskedStore(I, true /* IsCompressing */);
return;
- case Intrinsic::x86_mmx_pslli_w:
- case Intrinsic::x86_mmx_pslli_d:
- case Intrinsic::x86_mmx_pslli_q:
- case Intrinsic::x86_mmx_psrli_w:
- case Intrinsic::x86_mmx_psrli_d:
- case Intrinsic::x86_mmx_psrli_q:
- case Intrinsic::x86_mmx_psrai_w:
- case Intrinsic::x86_mmx_psrai_d: {
- SDValue ShAmt = getValue(I.getArgOperand(1));
- if (isa<ConstantSDNode>(ShAmt)) {
- visitTargetIntrinsic(I, Intrinsic);
- return;
- }
- unsigned NewIntrinsic = 0;
- EVT ShAmtVT = MVT::v2i32;
- switch (Intrinsic) {
- case Intrinsic::x86_mmx_pslli_w:
- NewIntrinsic = Intrinsic::x86_mmx_psll_w;
- break;
- case Intrinsic::x86_mmx_pslli_d:
- NewIntrinsic = Intrinsic::x86_mmx_psll_d;
- break;
- case Intrinsic::x86_mmx_pslli_q:
- NewIntrinsic = Intrinsic::x86_mmx_psll_q;
- break;
- case Intrinsic::x86_mmx_psrli_w:
- NewIntrinsic = Intrinsic::x86_mmx_psrl_w;
- break;
- case Intrinsic::x86_mmx_psrli_d:
- NewIntrinsic = Intrinsic::x86_mmx_psrl_d;
- break;
- case Intrinsic::x86_mmx_psrli_q:
- NewIntrinsic = Intrinsic::x86_mmx_psrl_q;
- break;
- case Intrinsic::x86_mmx_psrai_w:
- NewIntrinsic = Intrinsic::x86_mmx_psra_w;
- break;
- case Intrinsic::x86_mmx_psrai_d:
- NewIntrinsic = Intrinsic::x86_mmx_psra_d;
- break;
- default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
- }
-
- // The vector shift intrinsics with scalars uses 32b shift amounts but
- // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits
- // to be zero.
- // We must do this early because v2i32 is not a legal type.
- SDValue ShOps[2];
- ShOps[0] = ShAmt;
- ShOps[1] = DAG.getConstant(0, sdl, MVT::i32);
- ShAmt = DAG.getBuildVector(ShAmtVT, sdl, ShOps);
- EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
- ShAmt = DAG.getNode(ISD::BITCAST, sdl, DestVT, ShAmt);
- Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, sdl, DestVT,
- DAG.getConstant(NewIntrinsic, sdl, MVT::i32),
- getValue(I.getArgOperand(0)), ShAmt);
- setValue(&I, Res);
- return;
- }
case Intrinsic::powi:
setValue(&I, ExpandPowI(sdl, getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1)), DAG));
MaskVT, Operation);
return DAG.getMergeValues({Result0, Result1}, DL);
}
+ case Intrinsic::x86_mmx_pslli_w:
+ case Intrinsic::x86_mmx_pslli_d:
+ case Intrinsic::x86_mmx_pslli_q:
+ case Intrinsic::x86_mmx_psrli_w:
+ case Intrinsic::x86_mmx_psrli_d:
+ case Intrinsic::x86_mmx_psrli_q:
+ case Intrinsic::x86_mmx_psrai_w:
+ case Intrinsic::x86_mmx_psrai_d: {
+ SDValue ShAmt = Op.getOperand(2);
+ // If the argument is a constant, this is fine.
+ if (isa<ConstantSDNode>(ShAmt))
+ return Op;
+
+ unsigned NewIntrinsic;
+ switch (IntNo) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::x86_mmx_pslli_w:
+ NewIntrinsic = Intrinsic::x86_mmx_psll_w;
+ break;
+ case Intrinsic::x86_mmx_pslli_d:
+ NewIntrinsic = Intrinsic::x86_mmx_psll_d;
+ break;
+ case Intrinsic::x86_mmx_pslli_q:
+ NewIntrinsic = Intrinsic::x86_mmx_psll_q;
+ break;
+ case Intrinsic::x86_mmx_psrli_w:
+ NewIntrinsic = Intrinsic::x86_mmx_psrl_w;
+ break;
+ case Intrinsic::x86_mmx_psrli_d:
+ NewIntrinsic = Intrinsic::x86_mmx_psrl_d;
+ break;
+ case Intrinsic::x86_mmx_psrli_q:
+ NewIntrinsic = Intrinsic::x86_mmx_psrl_q;
+ break;
+ case Intrinsic::x86_mmx_psrai_w:
+ NewIntrinsic = Intrinsic::x86_mmx_psra_w;
+ break;
+ case Intrinsic::x86_mmx_psrai_d:
+ NewIntrinsic = Intrinsic::x86_mmx_psra_d;
+ break;
+ }
+
+ // The vector shift intrinsics with scalars uses 32b shift amounts but
+ // the sse2/mmx shift instructions reads 64 bits. Copy the 32 bits to an
+ // MMX register.
+ SDLoc DL(Op);
+ ShAmt = DAG.getNode(X86ISD::MMX_MOVW2D, DL, MVT::x86mmx, ShAmt);
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(),
+ DAG.getConstant(NewIntrinsic, DL, MVT::i32),
+ Op.getOperand(1), ShAmt);
+
+ }
}
}
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
; X86-NEXT: subl $8, %esp
-; X86-NEXT: movd 16(%ebp), %mm0
-; X86-NEXT: movq 8(%ebp), %mm1
-; X86-NEXT: psllq %mm0, %mm1
-; X86-NEXT: movq %mm1, (%esp)
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: movd 16(%ebp), %mm1
+; X86-NEXT: psllq %mm1, %mm0
+; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl %ebp, %esp
;
; X64-LABEL: t1:
; X64: # %bb.0: # %entry
-; X64-NEXT: movd %esi, %mm0
-; X64-NEXT: movq %rdi, %mm1
-; X64-NEXT: psllq %mm0, %mm1
-; X64-NEXT: movq %mm1, %rax
+; X64-NEXT: movq %rdi, %mm0
+; X64-NEXT: movd %esi, %mm1
+; X64-NEXT: psllq %mm1, %mm0
+; X64-NEXT: movq %mm0, %rax
; X64-NEXT: retq
entry:
%0 = bitcast i64 %x to x86_mmx
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
; X86-NEXT: subl $8, %esp
-; X86-NEXT: movd 16(%ebp), %mm0
-; X86-NEXT: movd 20(%ebp), %mm1
-; X86-NEXT: psllq %mm0, %mm1
-; X86-NEXT: por 8(%ebp), %mm1
-; X86-NEXT: movq %mm1, (%esp)
+; X86-NEXT: movd 20(%ebp), %mm0
+; X86-NEXT: movd 16(%ebp), %mm1
+; X86-NEXT: psllq %mm1, %mm0
+; X86-NEXT: por 8(%ebp), %mm0
+; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl %ebp, %esp
;
; X64-LABEL: t2:
; X64: # %bb.0: # %entry
-; X64-NEXT: movd %esi, %mm0
-; X64-NEXT: movd %edx, %mm1
-; X64-NEXT: psllq %mm0, %mm1
-; X64-NEXT: movq %rdi, %mm0
-; X64-NEXT: por %mm1, %mm0
-; X64-NEXT: movq %mm0, %rax
+; X64-NEXT: movd %edx, %mm0
+; X64-NEXT: movd %esi, %mm1
+; X64-NEXT: psllq %mm1, %mm0
+; X64-NEXT: movq %rdi, %mm1
+; X64-NEXT: por %mm0, %mm1
+; X64-NEXT: movq %mm1, %rax
; X64-NEXT: retq
entry:
%0 = insertelement <2 x i32> undef, i32 %w, i32 0
; X86-NEXT: pushl %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl $1, (%esp)
-; X86-NEXT: movd (%esp), %mm0
; X86-NEXT: movl $255, %ecx
-; X86-NEXT: movd %ecx, %mm1
-; X86-NEXT: psrlq %mm0, %mm1
-; X86-NEXT: movq %mm1, (%eax)
+; X86-NEXT: movd %ecx, %mm0
+; X86-NEXT: movd (%esp), %mm1
+; X86-NEXT: psrlq %mm1, %mm0
+; X86-NEXT: movq %mm0, (%eax)
; X86-NEXT: popl %eax
; X86-NEXT: retl
;
; X64-LABEL: test_psrlq_by_volatile_shift_amount:
; X64: # %bb.0: # %entry
; X64-NEXT: movl $1, -{{[0-9]+}}(%rsp)
-; X64-NEXT: movd -{{[0-9]+}}(%rsp), %mm0
; X64-NEXT: movl $255, %eax
-; X64-NEXT: movd %eax, %mm1
-; X64-NEXT: psrlq %mm0, %mm1
-; X64-NEXT: movq %mm1, (%rdi)
+; X64-NEXT: movd %eax, %mm0
+; X64-NEXT: movd -{{[0-9]+}}(%rsp), %mm1
+; X64-NEXT: psrlq %mm1, %mm0
+; X64-NEXT: movq %mm0, (%rdi)
; X64-NEXT: retq
entry:
%0 = alloca i32, align 4