; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=cortex-a57 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-STD
-; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=cortex-a57 -enable-unsafe-fp-math < %s | FileCheck %s --check-prefixes=CHECK,CHECK-UNSAFE
+; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=neoverse-n2 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-STD
+; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=neoverse-n2 -enable-unsafe-fp-math < %s | FileCheck %s --check-prefixes=CHECK,CHECK-UNSAFE
; Incremental updates of the instruction depths should be enough for this test
; case.
-; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=cortex-a57 -enable-unsafe-fp-math \
+; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=neoverse-n2 -enable-unsafe-fp-math \
; RUN: -machine-combiner-inc-threshold=0 -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefixes=CHECK,CHECK-UNSAFE
; Verify that the first two adds are independent regardless of how the inputs are
; CHECK-STD-LABEL: reassociate_muls1:
; CHECK-STD: // %bb.0:
; CHECK-STD-NEXT: fdiv s0, s0, s1
-; CHECK-STD-NEXT: fmul s1, s2, s0
-; CHECK-STD-NEXT: fmul s0, s3, s1
+; CHECK-STD-NEXT: fmul s0, s2, s0
+; CHECK-STD-NEXT: fmul s0, s3, s0
; CHECK-STD-NEXT: ret
;
; CHECK-UNSAFE-LABEL: reassociate_muls1:
; CHECK-STD-LABEL: reassociate_muls_double:
; CHECK-STD: // %bb.0:
; CHECK-STD-NEXT: fdiv d0, d0, d1
-; CHECK-STD-NEXT: fmul d1, d2, d0
-; CHECK-STD-NEXT: fmul d0, d3, d1
+; CHECK-STD-NEXT: fmul d0, d2, d0
+; CHECK-STD-NEXT: fmul d0, d3, d0
; CHECK-STD-NEXT: ret
;
; CHECK-UNSAFE-LABEL: reassociate_muls_double:
ret double %t2
}
+; Verify that scalar half-precision adds are reassociated.
+
+define half @reassociate_adds_half(half %x0, half %x1, half %x2, half %x3) {
+; CHECK-STD-LABEL: reassociate_adds_half:
+; CHECK-STD: // %bb.0:
+; CHECK-STD-NEXT: fdiv h0, h0, h1
+; CHECK-STD-NEXT: fadd h0, h2, h0
+; CHECK-STD-NEXT: fadd h0, h3, h0
+; CHECK-STD-NEXT: ret
+;
+; CHECK-UNSAFE-LABEL: reassociate_adds_half:
+; CHECK-UNSAFE: // %bb.0:
+; CHECK-UNSAFE-NEXT: fdiv h0, h0, h1
+; CHECK-UNSAFE-NEXT: fadd h1, h3, h2
+; CHECK-UNSAFE-NEXT: fadd h0, h1, h0
+; CHECK-UNSAFE-NEXT: ret
+ %t0 = fdiv half %x0, %x1
+ %t1 = fadd half %x2, %t0
+ %t2 = fadd half %x3, %t1
+ ret half %t2
+}
+
+; Verify that scalar half-precision multiplies are reassociated.
+
+define half @reassociate_muls_half(half %x0, half %x1, half %x2, half %x3) {
+; CHECK-STD-LABEL: reassociate_muls_half:
+; CHECK-STD: // %bb.0:
+; CHECK-STD-NEXT: fdiv h0, h0, h1
+; CHECK-STD-NEXT: fmul h0, h2, h0
+; CHECK-STD-NEXT: fmul h0, h3, h0
+; CHECK-STD-NEXT: ret
+;
+; CHECK-UNSAFE-LABEL: reassociate_muls_half:
+; CHECK-UNSAFE: // %bb.0:
+; CHECK-UNSAFE-NEXT: fdiv h0, h0, h1
+; CHECK-UNSAFE-NEXT: fmul h1, h3, h2
+; CHECK-UNSAFE-NEXT: fmul h0, h1, h0
+; CHECK-UNSAFE-NEXT: ret
+ %t0 = fdiv half %x0, %x1
+ %t1 = fmul half %x2, %t0
+ %t2 = fmul half %x3, %t1
+ ret half %t2
+}
+
; Verify that scalar integer adds are reassociated.
define i32 @reassociate_adds_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) {
%t2 = fadd <4 x float> %x3, %t1
ret <4 x float> %t2
}
+
+; Verify that 64-bit vector half-precision adds are reassociated.
+
+define <4 x half> @reassociate_adds_v4f16(<4 x half> %x0, <4 x half> %x1, <4 x half> %x2, <4 x half> %x3) {
+; CHECK-STD-LABEL: reassociate_adds_v4f16:
+; CHECK-STD: // %bb.0:
+; CHECK-STD-NEXT: fadd v0.4h, v0.4h, v1.4h
+; CHECK-STD-NEXT: fadd v0.4h, v2.4h, v0.4h
+; CHECK-STD-NEXT: fadd v0.4h, v3.4h, v0.4h
+; CHECK-STD-NEXT: ret
+;
+; CHECK-UNSAFE-LABEL: reassociate_adds_v4f16:
+; CHECK-UNSAFE: // %bb.0:
+; CHECK-UNSAFE-NEXT: fadd v0.4h, v0.4h, v1.4h
+; CHECK-UNSAFE-NEXT: fadd v1.4h, v3.4h, v2.4h
+; CHECK-UNSAFE-NEXT: fadd v0.4h, v1.4h, v0.4h
+; CHECK-UNSAFE-NEXT: ret
+ %t0 = fadd <4 x half> %x0, %x1
+ %t1 = fadd <4 x half> %x2, %t0
+ %t2 = fadd <4 x half> %x3, %t1
+ ret <4 x half> %t2
+}
+
+; Verify that 128-bit vector half-precision multiplies are reassociated.
+
+define <8 x half> @reassociate_muls_v8f16(<8 x half> %x0, <8 x half> %x1, <8 x half> %x2, <8 x half> %x3) {
+; CHECK-STD-LABEL: reassociate_muls_v8f16:
+; CHECK-STD: // %bb.0:
+; CHECK-STD-NEXT: fadd v0.8h, v0.8h, v1.8h
+; CHECK-STD-NEXT: fmul v0.8h, v2.8h, v0.8h
+; CHECK-STD-NEXT: fmul v0.8h, v3.8h, v0.8h
+; CHECK-STD-NEXT: ret
+;
+; CHECK-UNSAFE-LABEL: reassociate_muls_v8f16:
+; CHECK-UNSAFE: // %bb.0:
+; CHECK-UNSAFE-NEXT: fadd v0.8h, v0.8h, v1.8h
+; CHECK-UNSAFE-NEXT: fmul v1.8h, v3.8h, v2.8h
+; CHECK-UNSAFE-NEXT: fmul v0.8h, v1.8h, v0.8h
+; CHECK-UNSAFE-NEXT: ret
+ %t0 = fadd <8 x half> %x0, %x1
+ %t1 = fmul <8 x half> %x2, %t0
+ %t2 = fmul <8 x half> %x3, %t1
+ ret <8 x half> %t2
+}
+
; Verify that 128-bit vector single-precision multiplies are reassociated.
define <4 x float> @reassociate_muls_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
; CHECK-UNSAFE-NEXT: fmov d10, d0
; CHECK-UNSAFE-NEXT: bl bar
; CHECK-UNSAFE-NEXT: fadd d1, d8, d9
-; CHECK-UNSAFE-NEXT: fadd d0, d10, d0
-; CHECK-UNSAFE-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload
; CHECK-UNSAFE-NEXT: ldp d9, d8, [sp, #8] // 16-byte Folded Reload
+; CHECK-UNSAFE-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload
+; CHECK-UNSAFE-NEXT: fadd d0, d10, d0
; CHECK-UNSAFE-NEXT: fadd d0, d1, d0
; CHECK-UNSAFE-NEXT: ldr d10, [sp], #32 // 8-byte Folded Reload
; CHECK-UNSAFE-NEXT: ret
; CHECK-NEXT: fmov d10, d0
; CHECK-NEXT: bl bar
; CHECK-NEXT: fadd d1, d8, d9
-; CHECK-NEXT: fadd d0, d10, d0
-; CHECK-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload
; CHECK-NEXT: ldp d9, d8, [sp, #8] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload
+; CHECK-NEXT: fadd d0, d10, d0
; CHECK-NEXT: fadd d0, d1, d0
; CHECK-NEXT: ldr d10, [sp], #32 // 8-byte Folded Reload
; CHECK-NEXT: ret