[x86] scalarize extractelement 0 of FP vselect

author Sanjay Patel <spatel@rotateright.com>

Tue, 12 Mar 2019 19:20:45 +0000 (19:20 +0000)

committer Sanjay Patel <spatel@rotateright.com>

Tue, 12 Mar 2019 19:20:45 +0000 (19:20 +0000)
author Sanjay Patel <spatel@rotateright.com>
Tue, 12 Mar 2019 19:20:45 +0000 (19:20 +0000)
committer Sanjay Patel <spatel@rotateright.com>
Tue, 12 Mar 2019 19:20:45 +0000 (19:20 +0000)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp

index 08061a1bcc51289aa5043b708429b68fd48e7015..e8d47c62b31738ba89c9cac42199924c45a33fa6 100644 (file)
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -34389,6 +34389,24 @@ static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG) {
    if (VT != MVT::f32 && VT != MVT::f64)
      return SDValue();
  
+  // Vector FP selects don't fit the pattern of FP math ops (because the
+  // condition has a different type and we have to change the opcode), so deal
+  // with those here.
+  if (Vec.getOpcode() == ISD::VSELECT &&
+      Vec.getOperand(0).getOpcode() == ISD::SETCC &&
+      Vec.getOperand(0).getOperand(0).getValueType() == VecVT) {
+    // ext (sel Cond, X, Y), 0 --> sel (ext Cond, 0), (ext X, 0), (ext Y, 0)
+    SDLoc DL(ExtElt);
+    SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+                               Vec.getOperand(0).getValueType().getScalarType(),
+                               Vec.getOperand(0), Index);
+    SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
+                               Vec.getOperand(1), Index);
+    SDValue Ext2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
+                               Vec.getOperand(2), Index);
+    return DAG.getNode(ISD::SELECT, DL, VT, Ext0, Ext1, Ext2);
+  }
+
    // TODO: This switch could include FNEG and the x86-specific FP logic ops
    // (FAND, FANDN, FOR, FXOR). But that may require enhancements to avoid 
    // missed load folding and fma+fneg combining.
diff --git a/llvm/test/CodeGen/X86/extractelement-fp.ll b/llvm/test/CodeGen/X86/extractelement-fp.ll

index 206348eb2081e2eec1fa524a51918bf518e9fc83..d77671c518d477ab0d0016a87cbf8290528dceef 100644 (file)
--- a/llvm/test/CodeGen/X86/extractelement-fp.ll
+++ b/llvm/test/CodeGen/X86/extractelement-fp.ll
@@ -155,7 +155,7 @@ define i1 @fcmp_v4f64(<4 x double> %x, <4 x double> %y) nounwind {
  define float @select_fcmp_v4f32(<4 x float> %x, <4 x float> %y, <4 x float> %z, <4 x float> %w) nounwind {
  ; CHECK-LABEL: select_fcmp_v4f32:
  ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vcmpneq_oqps %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vcmpneq_oqss %xmm1, %xmm0, %xmm0
  ; CHECK-NEXT:    vblendvps %xmm0, %xmm2, %xmm3, %xmm0
  ; CHECK-NEXT:    retq
    %c = fcmp one <4 x float> %x, %y
@@ -167,9 +167,8 @@ define float @select_fcmp_v4f32(<4 x float> %x, <4 x float> %y, <4 x float> %z,
  define double @select_fcmp_v4f64(<4 x double> %x, <4 x double> %y, <4 x double> %z, <4 x double> %w) nounwind {
  ; CHECK-LABEL: select_fcmp_v4f64:
  ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vcmpnltpd %ymm0, %ymm1, %ymm0
-; CHECK-NEXT:    vblendvpd %ymm0, %ymm2, %ymm3, %ymm0
-; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
+; CHECK-NEXT:    vcmpnltsd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    vblendvpd %xmm0, %xmm2, %xmm3, %xmm0
  ; CHECK-NEXT:    vzeroupper
  ; CHECK-NEXT:    retq
    %c = fcmp ule <4 x double> %x, %y
author	Sanjay Patel <spatel@rotateright.com>
	Tue, 12 Mar 2019 19:20:45 +0000 (19:20 +0000)
committer	Sanjay Patel <spatel@rotateright.com>
	Tue, 12 Mar 2019 19:20:45 +0000 (19:20 +0000)
llvm/lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
llvm/test/CodeGen/X86/extractelement-fp.ll		patch \| blob \| history