From f9af4ccb8a1c68a8bb8f7abc98e9ea37b0b68538 Mon Sep 17 00:00:00 2001 From: Evandro Menezes Date: Wed, 30 Oct 2019 16:56:21 -0500 Subject: [PATCH] [AArch64] Update for Exynos Fix the costs of `add` and `orr` with an immediate operand. --- llvm/lib/Target/AArch64/AArch64SchedExynosM3.td | 2 + llvm/lib/Target/AArch64/AArch64SchedExynosM4.td | 6 +- llvm/lib/Target/AArch64/AArch64SchedPredExynos.td | 9 +++ .../llvm-mca/AArch64/Exynos/zero-latency-move.s | 71 ++++++++++++++++++++++ 4 files changed, 86 insertions(+), 2 deletions(-) create mode 100644 llvm/test/tools/llvm-mca/AArch64/Exynos/zero-latency-move.s diff --git a/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td b/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td index c9d29d7..569bdb5 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td +++ b/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td @@ -130,8 +130,10 @@ def M3WriteAU : SchedWriteVariant<[SchedVar, SchedVar, SchedVar]>; def M3WriteAV : SchedWriteVariant<[SchedVar, + SchedVar, SchedVar]>; def M3WriteAW : SchedWriteVariant<[SchedVar, + SchedVar, SchedVar]>; def M3WriteAX : SchedWriteVariant<[SchedVar, SchedVar, diff --git a/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td b/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td index c8bf05f..447cdee 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td +++ b/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td @@ -156,8 +156,10 @@ def M4WriteAU : SchedWriteVariant<[SchedVar, SchedVar, SchedVar, SchedVar]>; -def M4WriteAV : SchedWriteVariant<[SchedVar, - SchedVar]>; +def M4WriteAV : SchedWriteVariant<[SchedVar, + SchedVar, + SchedVar, + SchedVar]>; def M4WriteAX : SchedWriteVariant<[SchedVar, SchedVar, SchedVar]>; diff --git a/llvm/lib/Target/AArch64/AArch64SchedPredExynos.td b/llvm/lib/Target/AArch64/AArch64SchedPredExynos.td index 0c1d82d..1a729dd 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedPredExynos.td +++ b/llvm/lib/Target/AArch64/AArch64SchedPredExynos.td @@ -50,6 +50,9 @@ def ExynosArithFn : TIIPredicate< MCReturnStatement>, MCOpcodeSwitchCase< IsArithUnshiftOp.ValidOpcodes, + MCReturnStatement>, + MCOpcodeSwitchCase< + IsArithImmOp.ValidOpcodes, MCReturnStatement>], MCReturnStatement>>; def ExynosArithPred : MCSchedPredicate; @@ -63,6 +66,9 @@ def ExynosLogicFn : TIIPredicate< MCReturnStatement>, MCOpcodeSwitchCase< IsLogicUnshiftOp.ValidOpcodes, + MCReturnStatement>, + MCOpcodeSwitchCase< + IsLogicImmOp.ValidOpcodes, MCReturnStatement>], MCReturnStatement>>; def ExynosLogicPred : MCSchedPredicate; @@ -81,6 +87,9 @@ def ExynosLogicExFn : TIIPredicate< CheckShiftBy8]>]>>>, MCOpcodeSwitchCase< IsLogicUnshiftOp.ValidOpcodes, + MCReturnStatement>, + MCOpcodeSwitchCase< + IsLogicImmOp.ValidOpcodes, MCReturnStatement>], MCReturnStatement>>; def ExynosLogicExPred : MCSchedPredicate; diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/zero-latency-move.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/zero-latency-move.s new file mode 100644 index 0000000..edef9c9 --- /dev/null +++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/zero-latency-move.s @@ -0,0 +1,71 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4 + + mov x0, x1 + mov sp, x0 + + mov w0, #0x3210 + add w0, w1, #0 + + adr x0, 1f + ldr x0, [x0] + + adrp x0, 1f + add x0, x0, :lo12:1f + + fmov s0, s1 + + movi d0, #0 + +1: + +# ALL: Iterations: 100 +# ALL-NEXT: Instructions: 1000 + +# M3-NEXT: Total Cycles: 172 +# M4-NEXT: Total Cycles: 172 + +# ALL-NEXT: Total uOps: 1000 + +# M3: Dispatch Width: 6 +# M3-NEXT: uOps Per Cycle: 5.81 +# M3-NEXT: IPC: 5.81 +# M3-NEXT: Block RThroughput: 1.7 + +# M4: Dispatch Width: 6 +# M4-NEXT: uOps Per Cycle: 5.81 +# M4-NEXT: IPC: 5.81 +# M4-NEXT: Block RThroughput: 1.7 + +# ALL: Instruction Info: +# ALL-NEXT: [1]: #uOps +# ALL-NEXT: [2]: Latency +# ALL-NEXT: [3]: RThroughput +# ALL-NEXT: [4]: MayLoad +# ALL-NEXT: [5]: MayStore +# ALL-NEXT: [6]: HasSideEffects (U) + +# ALL: [1] [2] [3] [4] [5] [6] Instructions: + +# M3-NEXT: 1 0 0.17 mov x0, x1 +# M3-NEXT: 1 0 0.17 mov sp, x0 +# M3-NEXT: 1 0 0.17 mov w0, #12816 +# M3-NEXT: 1 1 0.25 add w0, w1, #0 +# M3-NEXT: 1 0 0.17 adr x0, .Ltmp0 +# M3-NEXT: 1 4 0.50 * ldr x0, [x0] +# M3-NEXT: 1 0 0.17 adrp x0, .Ltmp0 +# M3-NEXT: 1 1 0.25 add x0, x0, :lo12:.Ltmp0 +# M3-NEXT: 1 1 0.33 fmov s0, s1 +# M3-NEXT: 1 0 0.17 movi d0, #0000000000000000 + +# M4-NEXT: 1 0 0.17 mov x0, x1 +# M4-NEXT: 1 0 0.17 mov sp, x0 +# M4-NEXT: 1 0 0.17 mov w0, #12816 +# M4-NEXT: 1 1 0.25 add w0, w1, #0 +# M4-NEXT: 1 0 0.17 adr x0, .Ltmp0 +# M4-NEXT: 1 4 0.50 * ldr x0, [x0] +# M4-NEXT: 1 0 0.17 adrp x0, .Ltmp0 +# M4-NEXT: 1 1 0.25 add x0, x0, :lo12:.Ltmp0 +# M4-NEXT: 1 1 0.33 fmov s0, s1 +# M4-NEXT: 1 0 0.17 movi d0, #0000000000000000 -- 2.7.4