i386.md (sse_mov?fcc*): New patterns and splitters.
authorJan Hubicka <jh@suse.cz>
Wed, 28 Feb 2001 18:29:14 +0000 (19:29 +0100)
committerJan Hubicka <hubicka@gcc.gnu.org>
Wed, 28 Feb 2001 18:29:14 +0000 (18:29 +0000)
* i386.md (sse_mov?fcc*): New patterns and splitters.
* i386.c (ix86_expand_movcc): Work post-reload; recognize
the SSE based conditional moves.

From-SVN: r40133

gcc/ChangeLog
gcc/config/i386/i386.c
gcc/config/i386/i386.md

index e42a406..8ed7bbe 100644 (file)
@@ -1,3 +1,9 @@
+Wed Feb 28 19:28:06 CET 2001  Jan Hubicka  <jh@suse.cz>
+
+       * i386.md (sse_mov?fcc*): New patterns and splitters.
+       * i386.c (ix86_expand_movcc): Work post-reload; recognize
+       the SSE based conditional moves.
+
 Wed Feb 28 19:18:23 CET 2001  Jan Hubicka  <jh@suse.cz>
 
        * i386.md (attribute mode): Add "TI".
index 88ba6d1..da323ab 100644 (file)
@@ -6016,6 +6016,92 @@ ix86_expand_fp_movcc (operands)
   rtx tmp;
   rtx compare_op, second_test, bypass_test;
 
+  /* For SF/DFmode conditional moves based on comparisons
+     in same mode, we may want to use SSE min/max instructions.  */
+  if (((TARGET_SSE && GET_MODE (operands[0]) == SFmode)
+       || (TARGET_SSE2 && GET_MODE (operands[0]) == DFmode))
+      && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
+      /* We may be called from the post-reload splitter.  */
+      && (!REG_P (operands[0])
+         || SSE_REG_P (operands[0])
+         || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
+    {
+      rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
+      code = GET_CODE (operands[1]);
+
+      /* See if we have (cross) match between comparison operands and
+         conditional move operands.  */
+      if (rtx_equal_p (operands[2], op1))
+       {
+         rtx tmp = op0;
+         op0 = op1;
+         op1 = tmp;
+         code = reverse_condition_maybe_unordered (code);
+       }
+      if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
+       {
+         /* Check for min operation.  */
+         if (code == LT)
+           {
+              operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
+              if (memory_operand (op0, VOIDmode))
+                op0 = force_reg (GET_MODE (operands[0]), op0);
+              if (GET_MODE (operands[0]) == SFmode)
+                emit_insn (gen_minsf3 (operands[0], op0, op1));
+              else
+                emit_insn (gen_mindf3 (operands[0], op0, op1));
+              return 1;
+           }
+         /* Check for max operation.  */
+         if (code == GT)
+           {
+              operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
+              if (memory_operand (op0, VOIDmode))
+                op0 = force_reg (GET_MODE (operands[0]), op0);
+              if (GET_MODE (operands[0]) == SFmode)
+                emit_insn (gen_maxsf3 (operands[0], op0, op1));
+              else
+                emit_insn (gen_maxdf3 (operands[0], op0, op1));
+              return 1;
+           }
+       }
+      /* Manage condition to be sse_comparison_operator.  In case we are
+        in non-ieee mode, try to canonicalize the destination operand
+        to be first in the comparison - this helps reload to avoid extra
+        moves.  */
+      if (!sse_comparison_operator (operands[1], VOIDmode)
+         || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
+       {
+         rtx tmp = ix86_compare_op0;
+         ix86_compare_op0 = ix86_compare_op1;
+         ix86_compare_op1 = tmp;
+         operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
+                                       VOIDmode, ix86_compare_op0,
+                                       ix86_compare_op1);
+       }
+      /* Similary try to manage result to be first operand of conditional
+        move. */
+      if (rtx_equal_p (operands[0], operands[3]))
+       {
+         rtx tmp = operands[2];
+         operands[2] = operands[3];
+         operands[2] = tmp;
+         operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
+                                         (GET_CODE (operands[1])),
+                                       VOIDmode, ix86_compare_op0,
+                                       ix86_compare_op1);
+       }
+      if (GET_MODE (operands[0]) == SFmode)
+       emit_insn (gen_sse_movsfcc (operands[0], operands[1],
+                                   operands[2], operands[3],
+                                   ix86_compare_op0, ix86_compare_op1));
+      else
+       emit_insn (gen_sse_movdfcc (operands[0], operands[1],
+                                   operands[2], operands[3],
+                                   ix86_compare_op0, ix86_compare_op1));
+      return 1;
+    }
+
   /* The floating point conditional move instructions don't directly
      support conditions resulting from a signed integer comparison.  */
 
index d6a751e..e6257a7 100644 (file)
              (const_string "lea")))
    (set_attr "mode" "SI")])
 
+;; Placeholder for the conditional moves.  This one is split eighter to SSE
+;; based moves emulation or to usual cmove sequence.  Little bit unfortunate
+;; fact is that compares supported by the cmp??ss instructions are exactly
+;; swapped of those supported by cmove sequence.
+
+(define_insn "sse_movsfcc"
+  [(set (match_operand:SF 0 "register_operand" "=&x#rf,x#rf,?f#xr,?f#xr,?f#xr,?f#xr,?r#xf,?r#xf,?r#xf,?r#xf")
+       (if_then_else:SF (match_operator 1 "sse_comparison_operator"
+                       [(match_operand:SF 4 "nonimmediate_operand" "0#fx,x#fx,f#x,f#x,xm#f,xm#f,f#x,f#x,xm#f,xm#f")
+                        (match_operand:SF 5 "nonimmediate_operand" "xm#f,xm#f,f#x,f#x,x#f,x#f,f#x,f#x,x#f,x#f")])
+                     (match_operand:SF 2 "nonimmediate_operand" "x#fr,0#fr,f#fx,0#fx,f#fx,0#fx,rm#rx,0#rx,rm#rx,0#rx")
+                     (match_operand:SF 3 "nonimmediate_operand" "x#fr,x#fr,0#fx,f#fx,0#fx,f#fx,0#fx,rm#rx,0#rx,rm#rx")))
+   (clobber (match_scratch:SF 6 "=2,&5,X,X,X,X,X,X,X,X"))
+   (clobber (reg:CC 17))]
+  "TARGET_SSE
+   && (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM)"
+  "#")
+
+(define_insn "sse_movdfcc"
+  [(set (match_operand:DF 0 "register_operand" "=&x#rf,x#rf,?f#xr,?f#xr,?f#xr,?f#xr,?r#xf,?r#xf,?r#xf,?r#xf")
+       (if_then_else:DF (match_operator 1 "sse_comparison_operator"
+                       [(match_operand:DF 4 "nonimmediate_operand" "0#fx,x#fx,f#x,f#x,xm#f,xm#f,f#x,f#x,xm#f,xm#f")
+                        (match_operand:DF 5 "nonimmediate_operand" "xm#f,xm#f,f#x,f#x,x#f,x#f,f#x,f#x,x#f,x#f")])
+                     (match_operand:DF 2 "nonimmediate_operand" "x#fr,0#fr,f#fx,0#fx,f#fx,0#fx,rm#rx,0#rx,rm#rx,0#rx")
+                     (match_operand:DF 3 "nonimmediate_operand" "x#fr,x#fr,0#fx,f#fx,0#fx,f#fx,0#fx,rm#rx,0#rx,rm#rx")))
+   (clobber (match_scratch:DF 6 "=2,&5,X,X,X,X,X,X,X,X"))
+   (clobber (reg:CC 17))]
+  "TARGET_SSE2
+   && (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM)"
+  "#")
+
+;; For non-sse moves just expand the usual cmove sequence.
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+       (if_then_else (match_operator 1 "comparison_operator"
+                       [(match_operand 4 "nonimmediate_operand" "")
+                        (match_operand 5 "register_operand" "")])
+                     (match_operand 2 "nonimmediate_operand" "")
+                     (match_operand 3 "nonimmediate_operand" "")))
+   (clobber (match_operand 6 "" ""))
+   (clobber (reg:CC 17))]
+  "!SSE_REG_P (operands[0]) && reload_completed
+   && VALID_SSE_REG_MODE (GET_MODE (operands[0]))"
+  [(const_int 0)]
+  "
+{
+   ix86_compare_op0 = operands[5];
+   ix86_compare_op1 = operands[4];
+   operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
+                                VOIDmode, operands[5], operands[4]);
+   ix86_expand_fp_movcc (operands);
+   DONE;
+}")
+
+;; Split SSE based conditional move into seqence:
+;; cmpCC op0, op4   -  set op0 to 0 or ffffffff depending on the comparison
+;; and   op2, op0   -  zero op2 if comparison was false
+;; nand  op0, op3   -  load op3 to op0 if comparison was false
+;; or   op2, op0   -  get the non-zero one into the result.
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+       (if_then_else (match_operator 1 "sse_comparison_operator"
+                       [(match_operand 4 "register_operand" "")
+                        (match_operand 5 "nonimmediate_operand" "")])
+                     (match_operand 2 "register_operand" "")
+                     (match_operand 3 "register_operand" "")))
+   (clobber (match_dup 2))
+   (clobber (reg:CC 17))]
+  "SSE_REG_P (operands[0]) && reload_completed"
+  [(set (match_dup 4) (match_op_dup 1 [(match_dup 4) (match_dup 5)]))
+   (set (subreg:TI (match_dup 2) 0) (and:TI (subreg:TI (match_dup 2) 0)
+                                           (subreg:TI (match_dup 0) 0)))
+   (set (subreg:TI (match_dup 4) 0) (and:TI (not:TI (subreg:TI (match_dup 0) 0))
+                                           (subreg:TI (match_dup 3) 0)))
+   (set (subreg:TI (match_dup 0) 0) (ior:TI (subreg:TI (match_dup 6) 0)
+                                           (subreg:TI (match_dup 7) 0)))]
+  "
+{
+  PUT_MODE (operands[1], GET_MODE (operands[0]));
+  if (rtx_equal_p (operands[0], operands[4]))
+    operands[6] = operands[4], operands[7] = operands[2];
+  else
+    operands[6] = operands[2], operands[7] = operands[0];
+}")
+
+;; Special case of conditional move we can handle effectivly.
+;; Do not brother with the integer/floating point case, since these are
+;; bot considerably slower, unlike in the generic case.
+(define_insn "*sse_movsfcc_const0_1"
+  [(set (match_operand:SF 0 "register_operand" "=x")
+       (if_then_else:SF (match_operator 1 "sse_comparison_operator"
+                       [(match_operand:SF 4 "register_operand" "0")
+                        (match_operand:SF 5 "nonimmediate_operand" "xm")])
+                     (match_operand:SF 2 "register_operand" "x")
+                     (match_operand:SF 3 "const0_operand" "X")))]
+  "TARGET_SSE"
+  "#")
+
+(define_insn "*sse_movsfcc_const0_2"
+  [(set (match_operand:SF 0 "register_operand" "=x")
+       (if_then_else:SF (match_operator 1 "sse_comparison_operator"
+                       [(match_operand:SF 4 "register_operand" "0")
+                        (match_operand:SF 5 "nonimmediate_operand" "xm")])
+                     (match_operand:SF 2 "const0_operand" "x")
+                     (match_operand:SF 3 "register_operand" "X")))]
+  "TARGET_SSE"
+  "#")
+
+(define_insn "*sse_movsfcc_const0_3"
+  [(set (match_operand:SF 0 "register_operand" "=x")
+       (if_then_else:SF (match_operator 1 "fcmov_comparison_operator"
+                       [(match_operand:SF 4 "nonimmediate_operand" "xm")
+                        (match_operand:SF 5 "register_operand" "0")])
+                     (match_operand:SF 2 "register_operand" "x")
+                     (match_operand:SF 3 "const0_operand" "X")))]
+  "TARGET_SSE"
+  "#")
+
+(define_insn "*sse_movsfcc_const0_4"
+  [(set (match_operand:SF 0 "register_operand" "=x")
+       (if_then_else:SF (match_operator 1 "fcmov_comparison_operator"
+                       [(match_operand:SF 4 "nonimmediate_operand" "xm")
+                        (match_operand:SF 5 "register_operand" "0")])
+                     (match_operand:SF 2 "const0_operand" "x")
+                     (match_operand:SF 3 "register_operand" "X")))]
+  "TARGET_SSE"
+  "#")
+
+(define_insn "*sse_movdfcc_const0_1"
+  [(set (match_operand:SF 0 "register_operand" "=x")
+       (if_then_else:SF (match_operator 1 "sse_comparison_operator"
+                       [(match_operand:SF 4 "register_operand" "0")
+                        (match_operand:SF 5 "nonimmediate_operand" "xm")])
+                     (match_operand:SF 2 "register_operand" "x")
+                     (match_operand:SF 3 "const0_operand" "X")))]
+  "TARGET_SSE2"
+  "#")
+
+(define_insn "*sse_movdfcc_const0_2"
+  [(set (match_operand:SF 0 "register_operand" "=x")
+       (if_then_else:SF (match_operator 1 "sse_comparison_operator"
+                       [(match_operand:SF 4 "register_operand" "0")
+                        (match_operand:SF 5 "nonimmediate_operand" "xm")])
+                     (match_operand:SF 2 "const0_operand" "x")
+                     (match_operand:SF 3 "register_operand" "X")))]
+  "TARGET_SSE2"
+  "#")
+
+(define_insn "*sse_movdfcc_const0_3"
+  [(set (match_operand:SF 0 "register_operand" "=x")
+       (if_then_else:SF (match_operator 1 "fcmov_comparison_operator"
+                       [(match_operand:SF 4 "nonimmediate_operand" "xm")
+                        (match_operand:SF 5 "register_operand" "0")])
+                     (match_operand:SF 2 "register_operand" "x")
+                     (match_operand:SF 3 "const0_operand" "X")))]
+  "TARGET_SSE2"
+  "#")
+
+(define_insn "*sse_movdfcc_const0_4"
+  [(set (match_operand:SF 0 "register_operand" "=x")
+       (if_then_else:SF (match_operator 1 "fcmov_comparison_operator"
+                       [(match_operand:SF 4 "nonimmediate_operand" "xm")
+                        (match_operand:SF 5 "register_operand" "0")])
+                     (match_operand:SF 2 "const0_operand" "x")
+                     (match_operand:SF 3 "register_operand" "X")))]
+  "TARGET_SSE2"
+  "#")
+
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+       (if_then_else (match_operator 1 "comparison_operator"
+                       [(match_operand 4 "register_operand" "")
+                        (match_operand 5 "nonimmediate_operand" "")])
+                     (match_operand 2 "nonmemory_operand" "")
+                     (match_operand 3 "nonmemory_operand" "")))]
+  "SSE_REG_P (operands[0]) && reload_completed
+   && (const0_operand (operands[2], GET_MODE (operands[0]))
+       || const0_operand (operands[3], GET_MODE (operands[0])))"
+  [(set (match_dup 0) (match_op_dup 1 [(match_dup 0) (match_dup 5)]))
+   (set (subreg:TI (match_dup 0) 0) (and:TI (match_dup 6)
+                                           (subreg:TI (match_dup 7) 0)))]
+  "
+{
+  PUT_MODE (operands[1], GET_MODE (operands[0]));
+  if (!sse_comparison_operator (operands[1], VOIDmode))
+    {
+      rtx tmp = operands[5];
+      operands[5] = operands[4];
+      operands[4] = tmp;
+      PUT_CODE (operands[1], swap_condition (GET_CODE (operands[1])));
+    }
+  if (const0_operand (operands[2], GET_MODE (operands[0])))
+    {
+      operands[7] = operands[3];
+      operands[6] = gen_rtx_NOT (TImode, gen_rtx_SUBREG (TImode, operands[0],
+                                                        0));
+    }
+  else
+    {
+      operands[7] = operands[2];
+      operands[6] = gen_rtx_SUBREG (TImode, operands[0], 0);
+    }
+}")
+
 (define_insn "allocate_stack_worker"
   [(unspec:SI [(match_operand:SI 0 "register_operand" "a")] 3)
    (set (reg:SI 7) (minus:SI (reg:SI 7) (match_dup 0)))