From fed432523edfb29db0c4e28552695446d8cc4b1b Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 3 Jul 2020 22:08:28 -0700 Subject: [PATCH] [X86] Directly emit VPTERNLOG from canonicalizeBitSelect when possible. Seems to produce better results on some rotate tests. And is neutral for other tests. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 10 ++++++++++ llvm/test/CodeGen/X86/min-legal-vector-width.ll | 8 +++----- llvm/test/CodeGen/X86/vector-rotate-256.ll | 6 ++---- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 0b84635..1146b61 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -42782,6 +42782,16 @@ static SDValue canonicalizeBitSelect(SDNode *N, SelectionDAG &DAG, } SDLoc DL(N); + + if (UseVPTERNLOG) { + // Emit a VPTERNLOG node directly. + SDValue A = DAG.getBitcast(VT, N0.getOperand(1)); + SDValue B = DAG.getBitcast(VT, N0.getOperand(0)); + SDValue C = DAG.getBitcast(VT, N1.getOperand(0)); + SDValue Imm = DAG.getTargetConstant(0xCA, DL, MVT::i8); + return DAG.getNode(X86ISD::VPTERNLOG, DL, VT, A, B, C, Imm); + } + SDValue X = N->getOperand(0); SDValue Y = DAG.getNode(X86ISD::ANDNP, DL, VT, DAG.getBitcast(VT, N0.getOperand(1)), diff --git a/llvm/test/CodeGen/X86/min-legal-vector-width.ll b/llvm/test/CodeGen/X86/min-legal-vector-width.ll index 1807646..2980a16 100644 --- a/llvm/test/CodeGen/X86/min-legal-vector-width.ll +++ b/llvm/test/CodeGen/X86/min-legal-vector-width.ll @@ -1786,12 +1786,10 @@ define <32 x i8> @splatconstant_rotate_v32i8(<32 x i8> %a) nounwind "min-legal-v define <32 x i8> @splatconstant_rotate_mask_v32i8(<32 x i8> %a) nounwind "min-legal-vector-width"="256" { ; CHECK-LABEL: splatconstant_rotate_mask_v32i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] -; CHECK-NEXT: vpsllw $4, %ymm0, %ymm2 -; CHECK-NEXT: vpand %ymm1, %ymm2, %ymm2 +; CHECK-NEXT: vpsllw $4, %ymm0, %ymm1 ; CHECK-NEXT: vpsrlw $4, %ymm0, %ymm0 -; CHECK-NEXT: vpandn %ymm0, %ymm1, %ymm0 -; CHECK-NEXT: vpternlogq $168, {{.*}}(%rip), %ymm2, %ymm0 +; CHECK-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm1, %ymm0 +; CHECK-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 ; CHECK-NEXT: retq %shl = shl <32 x i8> %a, %lshr = lshr <32 x i8> %a, diff --git a/llvm/test/CodeGen/X86/vector-rotate-256.ll b/llvm/test/CodeGen/X86/vector-rotate-256.ll index d27d398..31fe575 100644 --- a/llvm/test/CodeGen/X86/vector-rotate-256.ll +++ b/llvm/test/CodeGen/X86/vector-rotate-256.ll @@ -1802,11 +1802,9 @@ define <32 x i8> @splatconstant_rotate_mask_v32i8(<32 x i8> %a) nounwind { ; AVX512VL-LABEL: splatconstant_rotate_mask_v32i8: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm1 -; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] -; AVX512VL-NEXT: vpand %ymm2, %ymm1, %ymm1 ; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512VL-NEXT: vpandn %ymm0, %ymm2, %ymm0 -; AVX512VL-NEXT: vpternlogq $168, {{.*}}(%rip), %ymm1, %ymm0 +; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm1, %ymm0 +; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_rotate_mask_v32i8: -- 2.7.4