--- /dev/null
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-add-options arm_v8_1m_mve } */
+/* { dg-additional-options "-O1" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "arm_mve.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+**test:
+** ...
+** vld20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vld21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** ...
+** vld20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vld21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** ...
+** vst20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vst21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** ...
+** vst20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vst21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** ...
+*/
+void
+test(const uint8_t * in, uint8_t * out, int width)
+{
+ uint8x16x2_t rg = vld2q(in);
+ uint8x16x2_t gb = vld2q(in + width);
+ vst2q (out, rg);
+ vst2q (out + width, gb);
+}
+
+/*
+**test2:
+** ...
+** vld20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vld21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]!
+** vld20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vld21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vst20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vst21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]!
+** vst20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vst21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** ...
+*/
+void
+test2(const uint8_t * in, uint8_t * out)
+{
+ uint8x16x2_t rg = vld2q(in);
+ uint8x16x2_t gb = vld2q(in + 32);
+ vst2q (out, rg);
+ vst2q (out + 32, gb);
+}
+
+/*
+**test3:
+** ...
+** vld20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vld21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** ...
+** vld20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vld21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** ...
+** vst20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vst21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** ...
+** vst20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vst21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** ...
+*/
+void
+test3(const uint8_t * in, uint8_t * out)
+{
+ uint8x16x2_t rg = vld2q(in);
+ uint8x16x2_t gb = vld2q(in - 32);
+ vst2q (out, rg);
+ vst2q (out - 32, gb);
+}
+
+/*
+**test4:
+** ...
+** vld20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vld21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** ...
+** vld20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vld21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** ...
+** vst20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vst21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** ...
+** vst20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vst21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** ...
+*/
+void
+test4(const uint8_t * in, uint8_t * out)
+{
+ uint8x16x2_t rg = vld2q(in);
+ uint8x16x2_t gb = vld2q(in + 64);
+ vst2q (out, rg);
+ vst2q (out + 64, gb);
+}
+
+/*
+**test5:
+** ...
+** vld20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vld21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** ...
+** vld20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vld21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** ...
+** vst20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vst21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** ...
+** vst20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vst21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** ...
+*/
+void
+test5(const uint8_t * in, uint8_t * out)
+{
+ uint8x16x2_t rg = vld2q(in);
+ uint8x16x2_t gb = vld2q(in + 42);
+ vst2q (out, rg);
+ vst2q (out + 42, gb);
+}
+
+/*
+**test6:
+** ...
+** vld40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vld41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vld42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vld43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** ...
+** vld40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vld41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vld42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vld43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** ...
+** vst40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vst41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vst42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vst43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** ...
+** vst40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vst41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vst42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vst43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** ...
+*/
+void
+test6(const uint8_t * in, uint8_t * out, int width)
+{
+ uint8x16x4_t rg = vld4q(in);
+ uint8x16x4_t gb = vld4q(in + width);
+ vst4q (out, rg);
+ vst4q (out + width, gb);
+}
+
+/*
+**test7:
+** ...
+** vld40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vld41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vld42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vld43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** ...
+** vld40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vld41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vld42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vld43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** ...
+** vst40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vst41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vst42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vst43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** ...
+** vst40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vst41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vst42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vst43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** ...
+*/
+void
+test7(const uint8_t * in, uint8_t * out)
+{
+ uint8x16x4_t rg = vld4q(in);
+ uint8x16x4_t gb = vld4q(in + 32);
+ vst4q (out, rg);
+ vst4q (out + 32, gb);
+}
+
+/*
+**test8:
+** ...
+** vld40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vld41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vld42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vld43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]!
+** vld40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vld41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vld42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vld43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vst40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vst41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vst42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vst43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]!
+** vst40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vst41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vst42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vst43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** ...
+*/
+void
+test8(const uint8_t * in, uint8_t * out)
+{
+ uint8x16x4_t rg = vld4q(in);
+ uint8x16x4_t gb = vld4q(in + 64);
+ vst4q (out, rg);
+ vst4q (out + 64, gb);
+}
+
+/*
+**test9:
+** ...
+** vld40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vld41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vld42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vld43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** ...
+** vld40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vld41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vld42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vld43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** ...
+** vst40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vst41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vst42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vst43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** ...
+** vst40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vst41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vst42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vst43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** ...
+*/
+void
+test9(const uint8_t * in, uint8_t * out)
+{
+ uint8x16x4_t rg = vld4q(in);
+ uint8x16x4_t gb = vld4q(in - 64);
+ vst4q (out, rg);
+ vst4q (out - 64, gb);
+}
+
+/*
+**test10:
+** ...
+** vld40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vld41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vld42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vld43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** ...
+** vld40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vld41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vld42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vld43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** ...
+** vst40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vst41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vst42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vst43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** ...
+** vst40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vst41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vst42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** vst43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]
+** ...
+*/
+void
+test10(const uint8_t * in, uint8_t * out)
+{
+ uint8x16x4_t rg = vld4q(in);
+ uint8x16x4_t gb = vld4q(in + 42);
+ vst4q (out, rg);
+ vst4q (out + 42, gb);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+/* { dg-final { scan-assembler-not "__ARM_undef" } } */
\ No newline at end of file