Enable gather on zen3 hardware.

author Jan Hubicka <jh@suse.cz>

Wed, 17 Mar 2021 21:37:11 +0000 (22:37 +0100)

committer Jan Hubicka <jh@suse.cz>

Wed, 17 Mar 2021 21:37:11 +0000 (22:37 +0100)
author Jan Hubicka <jh@suse.cz>
Wed, 17 Mar 2021 21:37:11 +0000 (22:37 +0100)
committer Jan Hubicka <jh@suse.cz>
Wed, 17 Mar 2021 21:37:11 +0000 (22:37 +0100)
diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h

index e655e66..db03738 100644 (file)
--- a/gcc/config/i386/x86-tune-costs.h
+++ b/gcc/config/i386/x86-tune-costs.h
@@ -1767,11 +1767,11 @@ struct processor_costs znver3_cost = {
    2, 2, 3,                             /* cost of moving XMM,YMM,ZMM
                                            register.  */
    6,                                   /* cost of moving SSE register to integer.  */
-  /* VGATHERDPD is 23 uops and throughput is 9, VGATHERDPD is 35 uops,
-     throughput 12.  Approx 9 uops do not depend on vector size and every load
-     is 7 uops.  */
-  18, 8,                               /* Gather load static, per_elt.  */
-  18, 10,                              /* Gather store static, per_elt.  */
+  /* VGATHERDPD is 15 uops and throughput is 4, VGATHERDPS is 23 uops,
+     throughput 9.  Approx 7 uops do not depend on vector size and every load
+     is 4 uops.  */
+  14, 8,                               /* Gather load static, per_elt.  */
+  14, 10,                              /* Gather store static, per_elt.  */
    32,                                  /* size of l1 cache.  */
    512,                                 /* size of l2 cache.  */
    64,                                  /* size of prefetch block.  */
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def

index 140ccb3..caebf76 100644 (file)
--- a/gcc/config/i386/x86-tune.def
+++ b/gcc/config/i386/x86-tune.def
@@ -436,7 +436,7 @@ DEF_TUNE (X86_TUNE_AVOID_4BYTE_PREFIXES, "avoid_4byte_prefixes",
  
  /* X86_TUNE_USE_GATHER: Use gather instructions.  */
  DEF_TUNE (X86_TUNE_USE_GATHER, "use_gather",
-         ~(m_ZNVER | m_GENERIC))
+         ~(m_ZNVER1 | m_ZNVER2 | m_GENERIC))
  
  /* X86_TUNE_AVOID_128FMA_CHAINS: Avoid creating loops with tight 128bit or
     smaller FMA chain.  */
author	Jan Hubicka <jh@suse.cz>
	Wed, 17 Mar 2021 21:37:11 +0000 (22:37 +0100)
committer	Jan Hubicka <jh@suse.cz>
	Wed, 17 Mar 2021 21:37:11 +0000 (22:37 +0100)
gcc/config/i386/x86-tune-costs.h		patch \| blob \| history
gcc/config/i386/x86-tune.def		patch \| blob \| history