This will enable optimization for below pattern.
(set (reg:V2DF 87 [ xx ])
(vec_concat:V2DF (vec_select:DF (reg:V4DF 92)
(parallel [
(const_int 2 [0x2])
]))
(vec_select:DF (reg:V4DF 92)
(parallel [
(const_int 3 [0x3])
]))))
gcc/ChangeLog:
* simplify-rtx.c
(simplify_context::simplify_binary_operation_1): Relax
condition of simplifying (vec_concat:M (vec_select op0
index0)(vec_select op1 index1)) to allow different modes
between op0 and M, but have same inner mode.
gcc/testsuite/ChangeLog:
* gcc.target/i386/vect-rebuild.c: Adjust testcases.
* gcc.target/i386/avx512f-vect-rebuild.c: New test.
if (GET_CODE (trueop0) == VEC_SELECT
&& GET_CODE (trueop1) == VEC_SELECT
&& rtx_equal_p (XEXP (trueop0, 0), XEXP (trueop1, 0))
- && GET_MODE (XEXP (trueop0, 0)) == mode)
+ && GET_MODE_INNER (GET_MODE (XEXP (trueop0, 0)))
+ == GET_MODE_INNER(mode))
{
rtx par0 = XEXP (trueop0, 1);
rtx par1 = XEXP (trueop1, 1);
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O -mavx512vl -mavx512dq -fno-tree-forwprop" } */
+
+typedef double v2df __attribute__ ((__vector_size__ (16)));
+typedef double v4df __attribute__ ((__vector_size__ (32)));
+
+v2df h (v4df x)
+{
+ v2df xx = { x[2], x[3] };
+ return xx;
+}
+
+v4df f2 (v4df x)
+{
+ v4df xx = { x[0], x[1], x[2], x[3] };
+ return xx;
+}
+
+/* { dg-final { scan-assembler-not "unpck" } } */
+/* { dg-final { scan-assembler-not "valign" } } */
+/* { dg-final { scan-assembler-times "\tv?extract(?:f128|f64x2)\[ \t\]" 1 } } */
/* { dg-final { scan-assembler-not "unpck" } } */
/* { dg-final { scan-assembler-times "\tv?permilpd\[ \t\]" 1 } } */
-/* { dg-final { scan-assembler-times "\tv?extractf128\[ \t\]" 1 } } */
+/* { dg-final { scan-assembler-times "\tv?extract(?:f128|f64x2)\[ \t\]" 1 } } */