From 199988774d74091e467aef695d0d985528360613 Mon Sep 17 00:00:00 2001 From: Tamar Christina Date: Thu, 5 Nov 2020 11:46:35 +0000 Subject: [PATCH] middle-end: optimize slp simplify back to back permutes. This optimizes sequential permutes. i.e. if there are two permutes back to back this function applies the permute of the parent to the child and removed the parent. This relies on the materialization point calculation in optimize SLP. This allows us to remove useless permutes such as ldr q0, [x0, x3] ldr q2, [x1, x3] trn1 v1.4s, v0.4s, v0.4s trn2 v0.4s, v0.4s, v0.4s trn1 v0.4s, v1.4s, v0.4s mov v1.16b, v3.16b fcmla v1.4s, v0.4s, v2.4s, #0 fcmla v1.4s, v0.4s, v2.4s, #90 str q1, [x2, x3] from the sequence the vectorizer puts out and give ldr q0, [x0, x3] ldr q2, [x1, x3] mov v1.16b, v3.16b fcmla v1.4s, v0.4s, v2.4s, #0 fcmla v1.4s, v0.4s, v2.4s, #90 str q1, [x2, x3] instead. gcc/ChangeLog: * tree-vect-slp.c (vect_slp_tree_permute_noop_p): New. (vect_optimize_slp): Optimize permutes. (vectorizable_slp_permutation): Fix typo. --- gcc/tree-vect-slp.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index 420c3c9..016883a 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -2941,6 +2941,18 @@ vect_optimize_slp (vec_info *vinfo) /* For loads simply drop the permutation, the load permutation already performs the desired permutation. */ ; + else if (SLP_TREE_LANE_PERMUTATION (node).exists ()) + { + /* If the node if already a permute node we just need to apply + the permutation to the permute node itself. */ + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "simplifying permute node %p\n", + node); + + vect_slp_permute (perms[perm], SLP_TREE_LANE_PERMUTATION (node), + true); + } else { if (dump_enabled_p ()) @@ -5152,7 +5164,7 @@ vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi, if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, "permutation requires at " - "least three vectors"); + "least three vectors\n"); gcc_assert (!gsi); return false; } -- 2.7.4