From 8561283b10f79b8bdb0772c401cf9b585f5f9bbb Mon Sep 17 00:00:00 2001 From: tianleli Date: Fri, 16 Jun 2023 09:15:43 +0800 Subject: [PATCH] [DAG] Unroll opereand when its type is illegal for ldexp. Reviewed By: pengfei Differential Revision: https://reviews.llvm.org/D152997 --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 2 +- .../CodeGen/SelectionDAG/LegalizeVectorTypes.cpp | 5 +- llvm/test/CodeGen/X86/ldexp.ll | 190 +++++++++++++++++++-- 3 files changed, 177 insertions(+), 20 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index b7280a3..ece1201 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -1009,7 +1009,7 @@ private: SDValue WidenVecOp_Convert(SDNode *N); SDValue WidenVecOp_FP_TO_XINT_SAT(SDNode *N); - SDValue WidenVecOp_FCOPYSIGN(SDNode *N); + SDValue WidenVecOp_UnrollVectorOp(SDNode *N); SDValue WidenVecOp_IS_FPCLASS(SDNode *N); SDValue WidenVecOp_VECREDUCE(SDNode *N); SDValue WidenVecOp_VECREDUCE_SEQ(SDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 1892f4c..21d7952 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -5882,7 +5882,8 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { case ISD::STRICT_FSETCC: case ISD::STRICT_FSETCCS: Res = WidenVecOp_STRICT_FSETCC(N); break; case ISD::VSELECT: Res = WidenVecOp_VSELECT(N); break; - case ISD::FCOPYSIGN: Res = WidenVecOp_FCOPYSIGN(N); break; + case ISD::FLDEXP: + case ISD::FCOPYSIGN: Res = WidenVecOp_UnrollVectorOp(N); break; case ISD::IS_FPCLASS: Res = WidenVecOp_IS_FPCLASS(N); break; case ISD::ANY_EXTEND: @@ -6031,7 +6032,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) { } } -SDValue DAGTypeLegalizer::WidenVecOp_FCOPYSIGN(SDNode *N) { +SDValue DAGTypeLegalizer::WidenVecOp_UnrollVectorOp(SDNode *N) { // The result (and first input) is legal, but the second input is illegal. // We can't do much to fix that, so just unroll and let the extracts off of // the second input be widened as needed later. diff --git a/llvm/test/CodeGen/X86/ldexp.ll b/llvm/test/CodeGen/X86/ldexp.ll index bbf0f97..85ba8ef 100644 --- a/llvm/test/CodeGen/X86/ldexp.ll +++ b/llvm/test/CodeGen/X86/ldexp.ll @@ -674,6 +674,160 @@ define <4 x float> @ldexp_v4f32(<4 x float> %val, <4 x i32> %exp) { ret <4 x float> %1 } +define <2 x double> @ldexp_v2f64(<2 x double> %val, <2 x i32> %exp) { +; X64-LABEL: ldexp_v2f64: +; X64: # %bb.0: +; X64-NEXT: subq $56, %rsp +; X64-NEXT: .cfi_def_cfa_offset 64 +; X64-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; X64-NEXT: movd %xmm1, %edi +; X64-NEXT: callq ldexp@PLT +; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; X64-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; X64-NEXT: pshufd $85, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload +; X64-NEXT: # xmm1 = mem[1,1,1,1] +; X64-NEXT: movd %xmm1, %edi +; X64-NEXT: callq ldexp@PLT +; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; X64-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; X64-NEXT: movaps %xmm1, %xmm0 +; X64-NEXT: addq $56, %rsp +; X64-NEXT: .cfi_def_cfa_offset 8 +; X64-NEXT: retq +; +; WIN32-LABEL: ldexp_v2f64: +; WIN32: # %bb.0: +; WIN32-NEXT: pushl %esi +; WIN32-NEXT: subl $28, %esp +; WIN32-NEXT: fldl {{[0-9]+}}(%esp) +; WIN32-NEXT: fstpl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi +; WIN32-NEXT: fldl {{[0-9]+}}(%esp) +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax +; WIN32-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN32-NEXT: fstpl (%esp) +; WIN32-NEXT: calll _ldexp +; WIN32-NEXT: fstpl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill +; WIN32-NEXT: movl %esi, {{[0-9]+}}(%esp) +; WIN32-NEXT: fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload +; WIN32-NEXT: fstpl (%esp) +; WIN32-NEXT: calll _ldexp +; WIN32-NEXT: fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload +; WIN32-NEXT: fxch %st(1) +; WIN32-NEXT: addl $28, %esp +; WIN32-NEXT: popl %esi +; WIN32-NEXT: retl + %1 = call <2 x double> @llvm.ldexp.v2f64.v2i32(<2 x double> %val, <2 x i32> %exp) + ret <2 x double> %1 +} + +define <4 x double> @ldexp_v4f64(<4 x double> %val, <4 x i32> %exp) { +; X64-LABEL: ldexp_v4f64: +; X64: # %bb.0: +; X64-NEXT: pushq %rbp +; X64-NEXT: .cfi_def_cfa_offset 16 +; X64-NEXT: pushq %rbx +; X64-NEXT: .cfi_def_cfa_offset 24 +; X64-NEXT: subq $72, %rsp +; X64-NEXT: .cfi_def_cfa_offset 96 +; X64-NEXT: .cfi_offset %rbx, -24 +; X64-NEXT: .cfi_offset %rbp, -16 +; X64-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-NEXT: movdqa %xmm2, (%rsp) # 16-byte Spill +; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,2,3] +; X64-NEXT: movd %xmm1, %ebx +; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm2[3,3,3,3] +; X64-NEXT: movd %xmm1, %ebp +; X64-NEXT: movd %xmm2, %edi +; X64-NEXT: callq ldexp@PLT +; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; X64-NEXT: pshufd $85, (%rsp), %xmm1 # 16-byte Folded Reload +; X64-NEXT: # xmm1 = mem[1,1,1,1] +; X64-NEXT: movd %xmm1, %edi +; X64-NEXT: callq ldexp@PLT +; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; X64-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; X64-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; X64-NEXT: movl %ebp, %edi +; X64-NEXT: callq ldexp@PLT +; X64-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-NEXT: movl %ebx, %edi +; X64-NEXT: callq ldexp@PLT +; X64-NEXT: movaps %xmm0, %xmm1 +; X64-NEXT: unpcklpd (%rsp), %xmm1 # 16-byte Folded Reload +; X64-NEXT: # xmm1 = xmm1[0],mem[0] +; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-NEXT: addq $72, %rsp +; X64-NEXT: .cfi_def_cfa_offset 24 +; X64-NEXT: popq %rbx +; X64-NEXT: .cfi_def_cfa_offset 16 +; X64-NEXT: popq %rbp +; X64-NEXT: .cfi_def_cfa_offset 8 +; X64-NEXT: retq +; +; WIN32-LABEL: ldexp_v4f64: +; WIN32: # %bb.0: +; WIN32-NEXT: pushl %ebp +; WIN32-NEXT: pushl %ebx +; WIN32-NEXT: pushl %edi +; WIN32-NEXT: pushl %esi +; WIN32-NEXT: subl $44, %esp +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi +; WIN32-NEXT: fldl {{[0-9]+}}(%esp) +; WIN32-NEXT: fstpl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi +; WIN32-NEXT: fldl {{[0-9]+}}(%esp) +; WIN32-NEXT: fstpl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebx +; WIN32-NEXT: fldl {{[0-9]+}}(%esp) +; WIN32-NEXT: fstpl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp +; WIN32-NEXT: fldl {{[0-9]+}}(%esp) +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax +; WIN32-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN32-NEXT: fstpl (%esp) +; WIN32-NEXT: calll _ldexp +; WIN32-NEXT: fstpl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill +; WIN32-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; WIN32-NEXT: fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload +; WIN32-NEXT: fstpl (%esp) +; WIN32-NEXT: calll _ldexp +; WIN32-NEXT: fstpl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill +; WIN32-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; WIN32-NEXT: fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload +; WIN32-NEXT: fstpl (%esp) +; WIN32-NEXT: calll _ldexp +; WIN32-NEXT: fstpl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill +; WIN32-NEXT: movl %edi, {{[0-9]+}}(%esp) +; WIN32-NEXT: fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload +; WIN32-NEXT: fstpl (%esp) +; WIN32-NEXT: calll _ldexp +; WIN32-NEXT: fstpl 24(%esi) +; WIN32-NEXT: fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload +; WIN32-NEXT: fstpl 16(%esi) +; WIN32-NEXT: fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload +; WIN32-NEXT: fstpl 8(%esi) +; WIN32-NEXT: fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload +; WIN32-NEXT: fstpl (%esi) +; WIN32-NEXT: movl %esi, %eax +; WIN32-NEXT: addl $44, %esp +; WIN32-NEXT: popl %esi +; WIN32-NEXT: popl %edi +; WIN32-NEXT: popl %ebx +; WIN32-NEXT: popl %ebp +; WIN32-NEXT: retl + %1 = call <4 x double> @llvm.ldexp.v4f64.v4i32(<4 x double> %val, <4 x i32> %exp) + ret <4 x double> %1 +} + define half @ldexp_f16(half %arg0, i32 %arg1) { ; X64-LABEL: ldexp_f16: ; X64: # %bb.0: @@ -699,66 +853,66 @@ define half @ldexp_f16(half %arg0, i32 %arg1) { ; WIN32-NEXT: movl %eax, (%esp) ; WIN32-NEXT: cmpl $381, %edi # imm = 0x17D ; WIN32-NEXT: movl %edi, %esi -; WIN32-NEXT: jl LBB4_2 +; WIN32-NEXT: jl LBB6_2 ; WIN32-NEXT: # %bb.1: ; WIN32-NEXT: movl $381, %esi # imm = 0x17D -; WIN32-NEXT: LBB4_2: +; WIN32-NEXT: LBB6_2: ; WIN32-NEXT: addl $-254, %esi ; WIN32-NEXT: calll ___gnu_h2f_ieee ; WIN32-NEXT: leal -127(%edi), %eax ; WIN32-NEXT: cmpl $255, %edi -; WIN32-NEXT: jae LBB4_4 +; WIN32-NEXT: jae LBB6_4 ; WIN32-NEXT: # %bb.3: ; WIN32-NEXT: movl %eax, %esi -; WIN32-NEXT: LBB4_4: +; WIN32-NEXT: LBB6_4: ; WIN32-NEXT: flds __real@7f000000 ; WIN32-NEXT: fld %st(1) ; WIN32-NEXT: fmul %st(1), %st ; WIN32-NEXT: fmul %st, %st(1) -; WIN32-NEXT: jae LBB4_6 +; WIN32-NEXT: jae LBB6_6 ; WIN32-NEXT: # %bb.5: ; WIN32-NEXT: fstp %st(1) ; WIN32-NEXT: fldz -; WIN32-NEXT: LBB4_6: +; WIN32-NEXT: LBB6_6: ; WIN32-NEXT: fstp %st(0) ; WIN32-NEXT: cmpl $-329, %edi # imm = 0xFEB7 ; WIN32-NEXT: movl %edi, %eax -; WIN32-NEXT: jge LBB4_8 +; WIN32-NEXT: jge LBB6_8 ; WIN32-NEXT: # %bb.7: ; WIN32-NEXT: movl $-330, %eax # imm = 0xFEB6 -; WIN32-NEXT: LBB4_8: +; WIN32-NEXT: LBB6_8: ; WIN32-NEXT: flds __real@0c800000 ; WIN32-NEXT: fld %st(2) ; WIN32-NEXT: fmul %st(1), %st ; WIN32-NEXT: fmul %st, %st(1) ; WIN32-NEXT: cmpl $-228, %edi -; WIN32-NEXT: jb LBB4_9 +; WIN32-NEXT: jb LBB6_9 ; WIN32-NEXT: # %bb.10: ; WIN32-NEXT: fstp %st(1) ; WIN32-NEXT: leal 102(%edi), %eax ; WIN32-NEXT: cmpl $-126, %edi -; WIN32-NEXT: jge LBB4_12 -; WIN32-NEXT: jmp LBB4_13 -; WIN32-NEXT: LBB4_9: +; WIN32-NEXT: jge LBB6_12 +; WIN32-NEXT: jmp LBB6_13 +; WIN32-NEXT: LBB6_9: ; WIN32-NEXT: fstp %st(0) ; WIN32-NEXT: addl $204, %eax ; WIN32-NEXT: cmpl $-126, %edi -; WIN32-NEXT: jl LBB4_13 -; WIN32-NEXT: LBB4_12: +; WIN32-NEXT: jl LBB6_13 +; WIN32-NEXT: LBB6_12: ; WIN32-NEXT: fstp %st(0) ; WIN32-NEXT: movl %edi, %eax ; WIN32-NEXT: fldz ; WIN32-NEXT: fxch %st(2) -; WIN32-NEXT: LBB4_13: +; WIN32-NEXT: LBB6_13: ; WIN32-NEXT: fstp %st(2) ; WIN32-NEXT: cmpl $127, %edi -; WIN32-NEXT: jg LBB4_15 +; WIN32-NEXT: jg LBB6_15 ; WIN32-NEXT: # %bb.14: ; WIN32-NEXT: fstp %st(0) ; WIN32-NEXT: movl %eax, %esi ; WIN32-NEXT: fldz ; WIN32-NEXT: fxch %st(1) -; WIN32-NEXT: LBB4_15: +; WIN32-NEXT: LBB6_15: ; WIN32-NEXT: fstp %st(1) ; WIN32-NEXT: shll $23, %esi ; WIN32-NEXT: addl $1065353216, %esi # imm = 0x3F800000 @@ -778,6 +932,8 @@ declare double @llvm.ldexp.f64.i32(double, i32) #0 declare float @llvm.ldexp.f32.i32(float, i32) #0 declare <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float>, <2 x i32>) #0 declare <4 x float> @llvm.ldexp.v4f32.v4i32(<4 x float>, <4 x i32>) #0 +declare <2 x double> @llvm.ldexp.v2f64.v2i32(<2 x double>, <2 x i32>) #0 +declare <4 x double> @llvm.ldexp.v4f64.v4i32(<4 x double>, <4 x i32>) #0 declare half @llvm.ldexp.f16.i32(half, i32) #0 attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } -- 2.7.4