static unsigned int
rest_of_handle_insert_vzeroupper (void)
{
- int i;
-
- /* vzeroupper instructions are inserted immediately after reload to
- account for possible spills from 256bit or 512bit registers. The pass
- reuses mode switching infrastructure by re-running mode insertion
- pass, so disable entities that have already been processed. */
- for (i = 0; i < MAX_386_ENTITIES; i++)
- ix86_optimize_mode_switching[i] = 0;
+ if (TARGET_VZEROUPPER
+ && flag_expensive_optimizations
+ && !optimize_size)
+ {
+ /* vzeroupper instructions are inserted immediately after reload to
+ account for possible spills from 256bit or 512bit registers. The pass
+ reuses mode switching infrastructure by re-running mode insertion
+ pass, so disable entities that have already been processed. */
+ for (int i = 0; i < MAX_386_ENTITIES; i++)
+ ix86_optimize_mode_switching[i] = 0;
- ix86_optimize_mode_switching[AVX_U128] = 1;
+ ix86_optimize_mode_switching[AVX_U128] = 1;
- /* Call optimize_mode_switching. */
- g->get_passes ()->execute_pass_mode_switching ();
+ /* Call optimize_mode_switching. */
+ g->get_passes ()->execute_pass_mode_switching ();
+ }
ix86_add_reg_usage_to_vzerouppers ();
return 0;
}
virtual bool gate (function *)
{
return TARGET_AVX
- && TARGET_VZEROUPPER && flag_expensive_optimizations
- && !optimize_size;
+ && ((TARGET_VZEROUPPER
+ && flag_expensive_optimizations
+ && !optimize_size)
+ || cfun->machine->has_explicit_vzeroupper);
}
virtual unsigned int execute (function *)
/* True if the function needs a stack frame. */
BOOL_BITFIELD stack_frame_required : 1;
+ /* True if __builtin_ia32_vzeroupper () has been expanded in current
+ function. */
+ BOOL_BITFIELD has_explicit_vzeroupper : 1;
+
/* The largest alignment, in bytes, of stack slot actually used. */
unsigned int max_used_stack_alignment;
--- /dev/null
+/* PR target/99563 */
+/* { dg-do run { target avx } } */
+/* { dg-options "-O2 -mavx -mno-vzeroupper" } */
+
+#include "avx-check.h"
+#include <immintrin.h>
+
+
+__attribute__((noipa)) float
+compute_generic (void)
+{
+ return 0.0f;
+}
+
+static inline __attribute__((always_inline))
+float compute_avx (unsigned long block_count)
+{
+ __m128d mm_res = _mm_set1_pd (256.0);
+ float res = (float) (_mm_cvtsd_f64 (mm_res) / (double) block_count);
+ _mm256_zeroupper ();
+ return res;
+}
+
+__attribute__((noipa)) float
+compute (unsigned long block_count)
+{
+ if (block_count >= 64)
+ return compute_avx (block_count);
+ else
+ return compute_generic ();
+}
+
+static void
+avx_test (void)
+{
+ if (compute (128) != 2.0f || compute (32) != 0.0f)
+ abort ();
+}