Fix the costs of `add` and `orr` with an immediate operand.
SchedVar<ExynosLogicPred, [M3WriteA1]>,
SchedVar<NoSchedPred, [M3WriteAA]>]>;
def M3WriteAV : SchedWriteVariant<[SchedVar<IsCopyIdiomPred, [M3WriteZ0]>,
+ SchedVar<ExynosArithPred, [M3WriteA1]>,
SchedVar<NoSchedPred, [M3WriteAA]>]>;
def M3WriteAW : SchedWriteVariant<[SchedVar<IsZeroIdiomPred, [M3WriteZ0]>,
+ SchedVar<ExynosLogicPred, [M3WriteA1]>,
SchedVar<NoSchedPred, [M3WriteAA]>]>;
def M3WriteAX : SchedWriteVariant<[SchedVar<ExynosArithPred, [M3WriteA1]>,
SchedVar<ExynosLogicPred, [M3WriteA1]>,
SchedVar<ExynosArithPred, [M4WriteA1]>,
SchedVar<ExynosLogicExPred, [M4WriteA1]>,
SchedVar<NoSchedPred, [M4WriteAA]>]>;
-def M4WriteAV : SchedWriteVariant<[SchedVar<ExynosResetPred, [M4WriteZ0]>,
- SchedVar<NoSchedPred, [M4WriteAA]>]>;
+def M4WriteAV : SchedWriteVariant<[SchedVar<ExynosResetPred, [M4WriteZ0]>,
+ SchedVar<ExynosArithPred, [M4WriteA1]>,
+ SchedVar<ExynosLogicExPred, [M4WriteA1]>,
+ SchedVar<NoSchedPred, [M4WriteAA]>]>;
def M4WriteAX : SchedWriteVariant<[SchedVar<ExynosArithPred, [M4WriteA1]>,
SchedVar<ExynosLogicExPred, [M4WriteA1]>,
SchedVar<NoSchedPred, [M4WriteAA]>]>;
MCReturnStatement<ExynosCheckShift>>,
MCOpcodeSwitchCase<
IsArithUnshiftOp.ValidOpcodes,
+ MCReturnStatement<TruePred>>,
+ MCOpcodeSwitchCase<
+ IsArithImmOp.ValidOpcodes,
MCReturnStatement<TruePred>>],
MCReturnStatement<FalsePred>>>;
def ExynosArithPred : MCSchedPredicate<ExynosArithFn>;
MCReturnStatement<ExynosCheckShift>>,
MCOpcodeSwitchCase<
IsLogicUnshiftOp.ValidOpcodes,
+ MCReturnStatement<TruePred>>,
+ MCOpcodeSwitchCase<
+ IsLogicImmOp.ValidOpcodes,
MCReturnStatement<TruePred>>],
MCReturnStatement<FalsePred>>>;
def ExynosLogicPred : MCSchedPredicate<ExynosLogicFn>;
CheckShiftBy8]>]>>>,
MCOpcodeSwitchCase<
IsLogicUnshiftOp.ValidOpcodes,
+ MCReturnStatement<TruePred>>,
+ MCOpcodeSwitchCase<
+ IsLogicImmOp.ValidOpcodes,
MCReturnStatement<TruePred>>],
MCReturnStatement<FalsePred>>>;
def ExynosLogicExPred : MCSchedPredicate<ExynosLogicExFn>;
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3
+# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4
+
+ mov x0, x1
+ mov sp, x0
+
+ mov w0, #0x3210
+ add w0, w1, #0
+
+ adr x0, 1f
+ ldr x0, [x0]
+
+ adrp x0, 1f
+ add x0, x0, :lo12:1f
+
+ fmov s0, s1
+
+ movi d0, #0
+
+1:
+
+# ALL: Iterations: 100
+# ALL-NEXT: Instructions: 1000
+
+# M3-NEXT: Total Cycles: 172
+# M4-NEXT: Total Cycles: 172
+
+# ALL-NEXT: Total uOps: 1000
+
+# M3: Dispatch Width: 6
+# M3-NEXT: uOps Per Cycle: 5.81
+# M3-NEXT: IPC: 5.81
+# M3-NEXT: Block RThroughput: 1.7
+
+# M4: Dispatch Width: 6
+# M4-NEXT: uOps Per Cycle: 5.81
+# M4-NEXT: IPC: 5.81
+# M4-NEXT: Block RThroughput: 1.7
+
+# ALL: Instruction Info:
+# ALL-NEXT: [1]: #uOps
+# ALL-NEXT: [2]: Latency
+# ALL-NEXT: [3]: RThroughput
+# ALL-NEXT: [4]: MayLoad
+# ALL-NEXT: [5]: MayStore
+# ALL-NEXT: [6]: HasSideEffects (U)
+
+# ALL: [1] [2] [3] [4] [5] [6] Instructions:
+
+# M3-NEXT: 1 0 0.17 mov x0, x1
+# M3-NEXT: 1 0 0.17 mov sp, x0
+# M3-NEXT: 1 0 0.17 mov w0, #12816
+# M3-NEXT: 1 1 0.25 add w0, w1, #0
+# M3-NEXT: 1 0 0.17 adr x0, .Ltmp0
+# M3-NEXT: 1 4 0.50 * ldr x0, [x0]
+# M3-NEXT: 1 0 0.17 adrp x0, .Ltmp0
+# M3-NEXT: 1 1 0.25 add x0, x0, :lo12:.Ltmp0
+# M3-NEXT: 1 1 0.33 fmov s0, s1
+# M3-NEXT: 1 0 0.17 movi d0, #0000000000000000
+
+# M4-NEXT: 1 0 0.17 mov x0, x1
+# M4-NEXT: 1 0 0.17 mov sp, x0
+# M4-NEXT: 1 0 0.17 mov w0, #12816
+# M4-NEXT: 1 1 0.25 add w0, w1, #0
+# M4-NEXT: 1 0 0.17 adr x0, .Ltmp0
+# M4-NEXT: 1 4 0.50 * ldr x0, [x0]
+# M4-NEXT: 1 0 0.17 adrp x0, .Ltmp0
+# M4-NEXT: 1 1 0.25 add x0, x0, :lo12:.Ltmp0
+# M4-NEXT: 1 1 0.33 fmov s0, s1
+# M4-NEXT: 1 0 0.17 movi d0, #0000000000000000