aarch64: Add combine patterns for right shift and narrow
authorTamar Christina <tamar.christina@arm.com>
Wed, 20 Oct 2021 16:06:31 +0000 (17:06 +0100)
committerTamar Christina <tamar.christina@arm.com>
Wed, 20 Oct 2021 16:06:31 +0000 (17:06 +0100)
commite33aef11e145996fc550eca07e899f0c756d3802
tree2148528ad5d2d40d3b653ccbaf1e4f34b129dbc9
parentd98626bf451dea6a28a42d953f7d0bd7659ad4d5
aarch64: Add combine patterns for right shift and narrow

This adds a simple pattern for combining right shifts and narrows into
shifted narrows.

i.e.

typedef short int16_t;
typedef unsigned short uint16_t;

void foo (uint16_t * restrict a, int16_t * restrict d, int n)
{
    for( int i = 0; i < n; i++ )
      d[i] = (a[i] * a[i]) >> 10;
}

now generates:

.L4:
        ldr     q0, [x0, x3]
        umull   v1.4s, v0.4h, v0.4h
        umull2  v0.4s, v0.8h, v0.8h
        shrn    v1.4h, v1.4s, 10
        shrn2   v1.8h, v0.4s, 10
        str     q1, [x1, x3]
        add     x3, x3, 16
        cmp     x4, x3
        bne     .L4

instead of:

.L4:
        ldr     q0, [x0, x3]
        umull   v1.4s, v0.4h, v0.4h
        umull2  v0.4s, v0.8h, v0.8h
        sshr    v1.4s, v1.4s, 10
        sshr    v0.4s, v0.4s, 10
        xtn     v1.4h, v1.4s
        xtn2    v1.8h, v0.4s
        str     q1, [x1, x3]
        add     x3, x3, 16
        cmp     x4, x3
        bne     .L4

Thanks,
Tamar

gcc/ChangeLog:

* config/aarch64/aarch64-simd.md (*aarch64_<srn_op>shrn<mode>_vect,
*aarch64_<srn_op>shrn<mode>2_vect_le,
*aarch64_<srn_op>shrn<mode>2_vect_be): New.
* config/aarch64/iterators.md (srn_op): New.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/shrn-combine-1.c: New test.
* gcc.target/aarch64/shrn-combine-2.c: New test.
* gcc.target/aarch64/shrn-combine-3.c: New test.
* gcc.target/aarch64/shrn-combine-4.c: New test.
gcc/config/aarch64/aarch64-simd.md
gcc/config/aarch64/iterators.md
gcc/testsuite/gcc.target/aarch64/shrn-combine-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/shrn-combine-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/shrn-combine-3.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/shrn-combine-4.c [new file with mode: 0644]