re PR target/53712 (Does not combine unaligned load with _mm_cmpistri, redundant...
authorUros Bizjak <ubizjak@gmail.com>
Mon, 18 Jun 2012 18:41:25 +0000 (20:41 +0200)
committerUros Bizjak <uros@gcc.gnu.org>
Mon, 18 Jun 2012 18:41:25 +0000 (20:41 +0200)
PR target/53712
* config/i386/sse.md (*sse4_2_pcmpestr_unaligned): New.
(*sse4_2_pcmpistr_unaligned): New.

testsuite/ChangeLog:

PR target/53712
* gcc.target/i386/pr53712.c: New test.

From-SVN: r188753

gcc/ChangeLog
gcc/config/i386/sse.md
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/i386/pr53712.c [new file with mode: 0644]

index 26adde90bf54404d8148de3f7249df19e2e9377b..5deedb6ef5059890d49efd67d844cc797de475c5 100644 (file)
@@ -1,3 +1,9 @@
+2012-06-18  Uros Bizjak  <ubizjak@gmail.com>
+
+       PR target/53712
+       * config/i386/sse.md (*sse4_2_pcmpestr_unaligned): New.
+       (*sse4_2_pcmpistr_unaligned): New.
+
 2012-06-18  Ian Bolton  <ian.bolton@arm.com>
            Sameera Deshpande  <sameera.deshpande@arm.com>
            Greta Yorsh  <greta.yorsh@arm.com>
index 654969fa52a8201ef65d4ddcaceda275fa4fff0e..38ade496f171078cac026d6cc09085bcbe9c0687 100644 (file)
    (set_attr "memory" "none,load")
    (set_attr "mode" "TI")])
 
+(define_insn_and_split "*sse4_2_pcmpestr_unaligned"
+  [(set (match_operand:SI 0 "register_operand" "=c")
+       (unspec:SI
+         [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x")
+          (match_operand:SI 3 "register_operand" "a")
+          (unspec:V16QI
+            [(match_operand:V16QI 4 "memory_operand" "m")]
+            UNSPEC_MOVU)
+          (match_operand:SI 5 "register_operand" "d")
+          (match_operand:SI 6 "const_0_to_255_operand" "n")]
+         UNSPEC_PCMPESTR))
+   (set (match_operand:V16QI 1 "register_operand" "=Yz")
+       (unspec:V16QI
+         [(match_dup 2)
+          (match_dup 3)
+          (unspec:V16QI [(match_dup 4)] UNSPEC_MOVU)
+          (match_dup 5)
+          (match_dup 6)]
+         UNSPEC_PCMPESTR))
+   (set (reg:CC FLAGS_REG)
+       (unspec:CC
+         [(match_dup 2)
+          (match_dup 3)
+          (unspec:V16QI [(match_dup 4)] UNSPEC_MOVU)
+          (match_dup 5)
+          (match_dup 6)]
+         UNSPEC_PCMPESTR))]
+  "TARGET_SSE4_2
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
+  int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
+  int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
+
+  if (ecx)
+    emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
+                                    operands[3], operands[4],
+                                    operands[5], operands[6]));
+  if (xmm0)
+    emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
+                                    operands[3], operands[4],
+                                    operands[5], operands[6]));
+  if (flags && !(ecx || xmm0))
+    emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
+                                          operands[2], operands[3],
+                                          operands[4], operands[5],
+                                          operands[6]));
+  if (!(flags || ecx || xmm0))
+    emit_note (NOTE_INSN_DELETED);
+
+  DONE;
+}
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_data16" "1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "memory" "load")
+   (set_attr "mode" "TI")])
+
 (define_insn "sse4_2_pcmpestri"
   [(set (match_operand:SI 0 "register_operand" "=c,c")
        (unspec:SI
    (set_attr "memory" "none,load")
    (set_attr "mode" "TI")])
 
+(define_insn_and_split "*sse4_2_pcmpistr_unaligned"
+  [(set (match_operand:SI 0 "register_operand" "=c")
+       (unspec:SI
+         [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x")
+          (unspec:V16QI
+            [(match_operand:V16QI 3 "memory_operand" "m")]
+            UNSPEC_MOVU)
+          (match_operand:SI 4 "const_0_to_255_operand" "n")]
+         UNSPEC_PCMPISTR))
+   (set (match_operand:V16QI 1 "register_operand" "=Yz")
+       (unspec:V16QI
+         [(match_dup 2)
+          (unspec:V16QI [(match_dup 3)] UNSPEC_MOVU)
+          (match_dup 4)]
+         UNSPEC_PCMPISTR))
+   (set (reg:CC FLAGS_REG)
+       (unspec:CC
+         [(match_dup 2)
+          (unspec:V16QI [(match_dup 3)] UNSPEC_MOVU)
+          (match_dup 4)]
+         UNSPEC_PCMPISTR))]
+  "TARGET_SSE4_2
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
+  int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
+  int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
+
+  if (ecx)
+    emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
+                                    operands[3], operands[4]));
+  if (xmm0)
+    emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
+                                    operands[3], operands[4]));
+  if (flags && !(ecx || xmm0))
+    emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
+                                          operands[2], operands[3],
+                                          operands[4]));
+  if (!(flags || ecx || xmm0))
+    emit_note (NOTE_INSN_DELETED);
+
+  DONE;
+}
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_data16" "1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "memory" "load")
+   (set_attr "mode" "TI")])
+
 (define_insn "sse4_2_pcmpistri"
   [(set (match_operand:SI 0 "register_operand" "=c,c")
        (unspec:SI
index 6dc143e64ebea61404077f0d10f914d806ebab98..1deb816228ece92caaf1660389454ca08f5a36d3 100644 (file)
@@ -1,3 +1,8 @@
+2012-06-18  Uros Bizjak  <ubizjak@gmail.com>
+
+       PR target/53712
+       * gcc.target/i386/pr53712.c: New test.
+
 2012-06-18  Tobias Burnus  <burnus@net-b.de>
 
        PR fortran/53692
diff --git a/gcc/testsuite/gcc.target/i386/pr53712.c b/gcc/testsuite/gcc.target/i386/pr53712.c
new file mode 100644 (file)
index 0000000..5c47e20
--- /dev/null
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse4.2" } */
+
+typedef char v16qi __attribute__ ((__vector_size__ (16)));
+
+int test (const char *s1, const char *s2)
+{
+  v16qi s1chars = __builtin_ia32_loaddqu ((const char *) s2);
+  v16qi s2chars = __builtin_ia32_loaddqu ((const char *) s1);
+  return __builtin_ia32_pcmpistri128 (s1chars, s2chars, 0);
+}
+
+/* { dg-final { scan-assembler-times "movdqu" 1 } } */