From: Bruno Cardoso Lopes Date: Mon, 23 Feb 2015 15:23:14 +0000 (+0000) Subject: [X86][MMX] Support folding loads in psll, psrl and psra intrinsics X-Git-Tag: llvmorg-3.7.0-rc1~11148 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=9e1c4c17d92d93d7bdc183d9f85a3a795e32e80a;p=platform%2Fupstream%2Fllvm.git [X86][MMX] Support folding loads in psll, psrl and psra intrinsics llvm-svn: 230225 --- diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index 25058a2..a80843f 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -27,6 +27,8 @@ def MMX_X86movw2d : SDNode<"X86ISD::MMX_MOVW2D", SDTypeProfile<1, 1, //===----------------------------------------------------------------------===// def load_mmx : PatFrag<(ops node:$ptr), (x86mmx (load node:$ptr))>; +def load_mvmmx : PatFrag<(ops node:$ptr), + (x86mmx (MMX_X86movw2d (load node:$ptr)))>; def bc_mmx : PatFrag<(ops node:$in), (x86mmx (bitconvert node:$in))>; //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/X86/X86InstrMMX.td b/llvm/lib/Target/X86/X86InstrMMX.td index 5a2d314..ed2822d 100644 --- a/llvm/lib/Target/X86/X86InstrMMX.td +++ b/llvm/lib/Target/X86/X86InstrMMX.td @@ -471,6 +471,13 @@ defm MMX_PSRLQ : MMXI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq", int_x86_mmx_psrl_q, int_x86_mmx_psrli_q, MMX_SHIFT_ITINS>; +def : Pat<(int_x86_mmx_psrl_w VR64:$src1, (load_mvmmx addr:$src2)), + (MMX_PSRLWrm VR64:$src1, addr:$src2)>; +def : Pat<(int_x86_mmx_psrl_d VR64:$src1, (load_mvmmx addr:$src2)), + (MMX_PSRLDrm VR64:$src1, addr:$src2)>; +def : Pat<(int_x86_mmx_psrl_q VR64:$src1, (load_mvmmx addr:$src2)), + (MMX_PSRLQrm VR64:$src1, addr:$src2)>; + defm MMX_PSLLW : MMXI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw", int_x86_mmx_psll_w, int_x86_mmx_pslli_w, MMX_SHIFT_ITINS>; @@ -481,6 +488,13 @@ defm MMX_PSLLQ : MMXI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq", int_x86_mmx_psll_q, int_x86_mmx_pslli_q, MMX_SHIFT_ITINS>; +def : Pat<(int_x86_mmx_psll_w VR64:$src1, (load_mvmmx addr:$src2)), + (MMX_PSLLWrm VR64:$src1, addr:$src2)>; +def : Pat<(int_x86_mmx_psll_d VR64:$src1, (load_mvmmx addr:$src2)), + (MMX_PSLLDrm VR64:$src1, addr:$src2)>; +def : Pat<(int_x86_mmx_psll_q VR64:$src1, (load_mvmmx addr:$src2)), + (MMX_PSLLQrm VR64:$src1, addr:$src2)>; + defm MMX_PSRAW : MMXI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw", int_x86_mmx_psra_w, int_x86_mmx_psrai_w, MMX_SHIFT_ITINS>; @@ -488,6 +502,11 @@ defm MMX_PSRAD : MMXI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad", int_x86_mmx_psra_d, int_x86_mmx_psrai_d, MMX_SHIFT_ITINS>; +def : Pat<(int_x86_mmx_psra_w VR64:$src1, (load_mvmmx addr:$src2)), + (MMX_PSRAWrm VR64:$src1, addr:$src2)>; +def : Pat<(int_x86_mmx_psra_d VR64:$src1, (load_mvmmx addr:$src2)), + (MMX_PSRADrm VR64:$src1, addr:$src2)>; + // Comparison Instructions defm MMX_PCMPEQB : MMXI_binop_rm_int<0x74, "pcmpeqb", int_x86_mmx_pcmpeq_b, MMX_INTALU_ITINS>; diff --git a/llvm/test/CodeGen/X86/mmx-fold-load.ll b/llvm/test/CodeGen/X86/mmx-fold-load.ll index 861706f..71209e1 100644 --- a/llvm/test/CodeGen/X86/mmx-fold-load.ll +++ b/llvm/test/CodeGen/X86/mmx-fold-load.ll @@ -4,8 +4,7 @@ define i64 @t0(<1 x i64>* %a, i32* %b) { ; CHECK-LABEL: t0: ; CHECK: ## BB#0: ## %entry ; CHECK-NEXT: movq (%rdi), %mm0 -; CHECK-NEXT: movd (%rsi), %mm1 -; CHECK-NEXT: psllq %mm1, %mm0 +; CHECK-NEXT: psllq (%rsi), %mm0 ; CHECK-NEXT: movd %mm0, %rax ; CHECK-NEXT: retq entry: @@ -22,8 +21,7 @@ define i64 @t1(<1 x i64>* %a, i32* %b) { ; CHECK-LABEL: t1: ; CHECK: ## BB#0: ## %entry ; CHECK-NEXT: movq (%rdi), %mm0 -; CHECK-NEXT: movd (%rsi), %mm1 -; CHECK-NEXT: psrlq %mm1, %mm0 +; CHECK-NEXT: psrlq (%rsi), %mm0 ; CHECK-NEXT: movd %mm0, %rax ; CHECK-NEXT: retq entry: @@ -40,8 +38,7 @@ define i64 @t2(<1 x i64>* %a, i32* %b) { ; CHECK-LABEL: t2: ; CHECK: ## BB#0: ## %entry ; CHECK-NEXT: movq (%rdi), %mm0 -; CHECK-NEXT: movd (%rsi), %mm1 -; CHECK-NEXT: psllw %mm1, %mm0 +; CHECK-NEXT: psllw (%rsi), %mm0 ; CHECK-NEXT: movd %mm0, %rax ; CHECK-NEXT: retq entry: @@ -58,8 +55,7 @@ define i64 @t3(<1 x i64>* %a, i32* %b) { ; CHECK-LABEL: t3: ; CHECK: ## BB#0: ## %entry ; CHECK-NEXT: movq (%rdi), %mm0 -; CHECK-NEXT: movd (%rsi), %mm1 -; CHECK-NEXT: psrlw %mm1, %mm0 +; CHECK-NEXT: psrlw (%rsi), %mm0 ; CHECK-NEXT: movd %mm0, %rax ; CHECK-NEXT: retq entry: @@ -76,8 +72,7 @@ define i64 @t4(<1 x i64>* %a, i32* %b) { ; CHECK-LABEL: t4: ; CHECK: ## BB#0: ## %entry ; CHECK-NEXT: movq (%rdi), %mm0 -; CHECK-NEXT: movd (%rsi), %mm1 -; CHECK-NEXT: pslld %mm1, %mm0 +; CHECK-NEXT: pslld (%rsi), %mm0 ; CHECK-NEXT: movd %mm0, %rax ; CHECK-NEXT: retq entry: @@ -94,8 +89,7 @@ define i64 @t5(<1 x i64>* %a, i32* %b) { ; CHECK-LABEL: t5: ; CHECK: ## BB#0: ## %entry ; CHECK-NEXT: movq (%rdi), %mm0 -; CHECK-NEXT: movd (%rsi), %mm1 -; CHECK-NEXT: psrld %mm1, %mm0 +; CHECK-NEXT: psrld (%rsi), %mm0 ; CHECK-NEXT: movd %mm0, %rax ; CHECK-NEXT: retq entry: @@ -112,8 +106,7 @@ define i64 @t6(<1 x i64>* %a, i32* %b) { ; CHECK-LABEL: t6: ; CHECK: ## BB#0: ## %entry ; CHECK-NEXT: movq (%rdi), %mm0 -; CHECK-NEXT: movd (%rsi), %mm1 -; CHECK-NEXT: psraw %mm1, %mm0 +; CHECK-NEXT: psraw (%rsi), %mm0 ; CHECK-NEXT: movd %mm0, %rax ; CHECK-NEXT: retq entry: @@ -130,8 +123,7 @@ define i64 @t7(<1 x i64>* %a, i32* %b) { ; CHECK-LABEL: t7: ; CHECK: ## BB#0: ## %entry ; CHECK-NEXT: movq (%rdi), %mm0 -; CHECK-NEXT: movd (%rsi), %mm1 -; CHECK-NEXT: psrad %mm1, %mm0 +; CHECK-NEXT: psrad (%rsi), %mm0 ; CHECK-NEXT: movd %mm0, %rax ; CHECK-NEXT: retq entry: