From 8259182e51ccf23c13d670f6f0401ce33f6c742f Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Wed, 20 Nov 2019 16:04:52 -0800 Subject: [PATCH] Revert "[AArch64] Add the pipeline model for Exynos M5" as it's causing test failures in llvm-mca. This reverts commit 9bdfee2a3bd13d405ce1592930182f23849d2897. --- llvm/lib/Target/AArch64/AArch64.td | 3 +- llvm/lib/Target/AArch64/AArch64SchedExynosM5.td | 1012 -------------------- llvm/test/tools/llvm-mca/AArch64/Exynos/aes.s | 57 -- .../test/tools/llvm-mca/AArch64/Exynos/asimd-ld1.s | 189 ---- .../test/tools/llvm-mca/AArch64/Exynos/asimd-ld2.s | 118 --- .../test/tools/llvm-mca/AArch64/Exynos/asimd-ld3.s | 118 --- .../test/tools/llvm-mca/AArch64/Exynos/asimd-ld4.s | 118 --- .../test/tools/llvm-mca/AArch64/Exynos/asimd-st1.s | 169 ---- .../test/tools/llvm-mca/AArch64/Exynos/asimd-st2.s | 97 -- .../test/tools/llvm-mca/AArch64/Exynos/asimd-st3.s | 97 -- .../test/tools/llvm-mca/AArch64/Exynos/asimd-st4.s | 97 -- llvm/test/tools/llvm-mca/AArch64/Exynos/crc.s | 58 -- .../tools/llvm-mca/AArch64/Exynos/direct-branch.s | 8 - .../llvm-mca/AArch64/Exynos/divide-multiply.s | 67 -- .../tools/llvm-mca/AArch64/Exynos/double-recp.s | 66 -- .../tools/llvm-mca/AArch64/Exynos/double-rsqrt.s | 78 -- .../tools/llvm-mca/AArch64/Exynos/double-sqrt.s | 79 -- .../llvm-mca/AArch64/Exynos/extended-register.s | 16 - .../AArch64/Exynos/float-divide-multiply.s | 94 -- .../tools/llvm-mca/AArch64/Exynos/float-integer.s | 114 --- .../tools/llvm-mca/AArch64/Exynos/float-load.s | 153 --- .../tools/llvm-mca/AArch64/Exynos/float-recp.s | 62 -- .../tools/llvm-mca/AArch64/Exynos/float-rsqrt.s | 72 -- .../tools/llvm-mca/AArch64/Exynos/float-sqrt.s | 73 -- .../tools/llvm-mca/AArch64/Exynos/float-store.s | 142 --- llvm/test/tools/llvm-mca/AArch64/Exynos/load.s | 66 -- .../AArch64/Exynos/scheduler-queue-usage.s | 6 - .../llvm-mca/AArch64/Exynos/shifted-register.s | 22 +- llvm/test/tools/llvm-mca/AArch64/Exynos/store.s | 82 -- .../llvm-mca/AArch64/Exynos/zero-latency-move.s | 51 +- 30 files changed, 35 insertions(+), 3349 deletions(-) delete mode 100644 llvm/lib/Target/AArch64/AArch64SchedExynosM5.td delete mode 100644 llvm/test/tools/llvm-mca/AArch64/Exynos/aes.s delete mode 100644 llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-ld1.s delete mode 100644 llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-ld2.s delete mode 100644 llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-ld3.s delete mode 100644 llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-ld4.s delete mode 100644 llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st1.s delete mode 100644 llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st2.s delete mode 100644 llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st3.s delete mode 100644 llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st4.s delete mode 100644 llvm/test/tools/llvm-mca/AArch64/Exynos/crc.s delete mode 100644 llvm/test/tools/llvm-mca/AArch64/Exynos/divide-multiply.s delete mode 100644 llvm/test/tools/llvm-mca/AArch64/Exynos/double-recp.s delete mode 100644 llvm/test/tools/llvm-mca/AArch64/Exynos/double-rsqrt.s delete mode 100644 llvm/test/tools/llvm-mca/AArch64/Exynos/double-sqrt.s delete mode 100644 llvm/test/tools/llvm-mca/AArch64/Exynos/float-divide-multiply.s delete mode 100644 llvm/test/tools/llvm-mca/AArch64/Exynos/float-integer.s delete mode 100644 llvm/test/tools/llvm-mca/AArch64/Exynos/float-load.s delete mode 100644 llvm/test/tools/llvm-mca/AArch64/Exynos/float-recp.s delete mode 100644 llvm/test/tools/llvm-mca/AArch64/Exynos/float-rsqrt.s delete mode 100644 llvm/test/tools/llvm-mca/AArch64/Exynos/float-sqrt.s delete mode 100644 llvm/test/tools/llvm-mca/AArch64/Exynos/float-store.s delete mode 100644 llvm/test/tools/llvm-mca/AArch64/Exynos/load.s delete mode 100644 llvm/test/tools/llvm-mca/AArch64/Exynos/store.s diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td index a2f8249..8a397d5 100644 --- a/llvm/lib/Target/AArch64/AArch64.td +++ b/llvm/lib/Target/AArch64/AArch64.td @@ -450,7 +450,6 @@ include "AArch64SchedFalkor.td" include "AArch64SchedKryo.td" include "AArch64SchedExynosM3.td" include "AArch64SchedExynosM4.td" -include "AArch64SchedExynosM5.td" include "AArch64SchedThunderX.td" include "AArch64SchedThunderX2T99.td" @@ -791,7 +790,7 @@ def : ProcessorModel<"neoverse-n1", CortexA57Model, [ProcNeoverseN1]>; def : ProcessorModel<"cyclone", CycloneModel, [ProcCyclone]>; def : ProcessorModel<"exynos-m3", ExynosM3Model, [ProcExynosM3]>; def : ProcessorModel<"exynos-m4", ExynosM4Model, [ProcExynosM4]>; -def : ProcessorModel<"exynos-m5", ExynosM5Model, [ProcExynosM4]>; +def : ProcessorModel<"exynos-m5", ExynosM4Model, [ProcExynosM4]>; def : ProcessorModel<"falkor", FalkorModel, [ProcFalkor]>; def : ProcessorModel<"saphira", FalkorModel, [ProcSaphira]>; def : ProcessorModel<"kryo", KryoModel, [ProcKryo]>; diff --git a/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td b/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td deleted file mode 100644 index df74025..0000000 --- a/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td +++ /dev/null @@ -1,1012 +0,0 @@ -//=- AArch64SchedExynosM5.td - Samsung Exynos M5 Sched Defs --*- tablegen -*-=// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the machine model for the Samsung Exynos M5 to support -// instruction scheduling and other instruction cost heuristics. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// The Exynos-M5 is an advanced superscalar microprocessor with a 6-wide -// in-order stage for decode and dispatch and a wider issue stage. -// The execution units and loads and stores are out-of-order. - -def ExynosM5Model : SchedMachineModel { - let IssueWidth = 6; // Up to 6 uops per cycle. - let MicroOpBufferSize = 228; // ROB size. - let LoopMicroOpBufferSize = 60; // Based on the instruction queue size. - let LoadLatency = 4; // Optimistic load cases. - let MispredictPenalty = 15; // Minimum branch misprediction penalty. - let CompleteModel = 1; // Use the default model otherwise. - - list UnsupportedFeatures = SVEUnsupported.F; -} - -//===----------------------------------------------------------------------===// -// Define each kind of processor resource and number available on the Exynos-M5. - -let SchedModel = ExynosM5Model in { - -def M5UnitA : ProcResource<2>; // Simple integer -def M5UnitC : ProcResource<2>; // Simple and complex integer -let Super = M5UnitC, BufferSize = 1 in -def M5UnitD : ProcResource<1>; // Integer division (inside C0, serialized) -def M5UnitE : ProcResource<2>; // Simple 32-bit integer -let Super = M5UnitC in -def M5UnitF : ProcResource<2>; // CRC (inside C) -def M5UnitB : ProcResource<1>; // Branch -def M5UnitL0 : ProcResource<1>; // Load -def M5UnitS0 : ProcResource<1>; // Store -def M5PipeLS : ProcResource<1>; // Load/Store -let Super = M5PipeLS in { - def M5UnitL1 : ProcResource<1>; - def M5UnitS1 : ProcResource<1>; -} -def M5PipeF0 : ProcResource<1>; // FP #0 -let Super = M5PipeF0 in { - def M5UnitFMAC0 : ProcResource<1>; // FP multiplication - def M5UnitFADD0 : ProcResource<1>; // Simple FP - def M5UnitNALU0 : ProcResource<1>; // Simple vector - def M5UnitNDOT0 : ProcResource<1>; // Dot product vector - def M5UnitNHAD : ProcResource<1>; // Horizontal vector - def M5UnitNMSC : ProcResource<1>; // FP and vector miscellanea - def M5UnitNMUL0 : ProcResource<1>; // Vector multiplication - def M5UnitNSHT0 : ProcResource<1>; // Vector shifting - def M5UnitNSHF0 : ProcResource<1>; // Vector shuffling - def M5UnitNCRY0 : ProcResource<1>; // Cryptographic -} -def M5PipeF1 : ProcResource<1>; // FP #1 -let Super = M5PipeF1 in { - def M5UnitFMAC1 : ProcResource<1>; // FP multiplication - def M5UnitFADD1 : ProcResource<1>; // Simple FP - def M5UnitFCVT0 : ProcResource<1>; // FP conversion - def M5UnitFDIV0 : ProcResource<2>; // FP division (serialized) - def M5UnitFSQR0 : ProcResource<2>; // FP square root (serialized) - def M5UnitFST0 : ProcResource<1>; // FP store - def M5UnitNALU1 : ProcResource<1>; // Simple vector - def M5UnitNDOT1 : ProcResource<1>; // Dot product vector - def M5UnitNSHT1 : ProcResource<1>; // Vector shifting - def M5UnitNSHF1 : ProcResource<1>; // Vector shuffling -} -def M5PipeF2 : ProcResource<1>; // FP #2 -let Super = M5PipeF2 in { - def M5UnitFMAC2 : ProcResource<1>; // FP multiplication - def M5UnitFADD2 : ProcResource<1>; // Simple FP - def M5UnitFCVT1 : ProcResource<1>; // FP conversion - def M5UnitFDIV1 : ProcResource<2>; // FP division (serialized) - def M5UnitFSQR1 : ProcResource<2>; // FP square root (serialized) - def M5UnitFST1 : ProcResource<1>; // FP store - def M5UnitNALU2 : ProcResource<1>; // Simple vector - def M5UnitNDOT2 : ProcResource<1>; // Dot product vector - def M5UnitNMUL1 : ProcResource<1>; // Vector multiplication - def M5UnitNSHT2 : ProcResource<1>; // Vector shifting - def M5UnitNCRY1 : ProcResource<1>; // Cryptographic -} - -def M5UnitAX : ProcResGroup<[M5UnitA, - M5UnitC]>; -def M5UnitAW : ProcResGroup<[M5UnitA, - M5UnitC, - M5UnitE]>; -def M5UnitL : ProcResGroup<[M5UnitL0, - M5UnitL1]>; -def M5UnitS : ProcResGroup<[M5UnitS0, - M5UnitS1]>; -def M5UnitFMAC : ProcResGroup<[M5UnitFMAC0, - M5UnitFMAC1, - M5UnitFMAC2]>; -def M5UnitFADD : ProcResGroup<[M5UnitFADD0, - M5UnitFADD1, - M5UnitFADD2]>; -def M5UnitFCVT : ProcResGroup<[M5UnitFCVT0, - M5UnitFCVT1]>; -def M5UnitFDIV : ProcResGroup<[M5UnitFDIV0, - M5UnitFDIV1]>; -def M5UnitFSQR : ProcResGroup<[M5UnitFSQR0, - M5UnitFSQR1]>; -def M5UnitFST : ProcResGroup<[M5UnitFST0, - M5UnitFST1]>; -def M5UnitNALU : ProcResGroup<[M5UnitNALU0, - M5UnitNALU1, - M5UnitNALU2]>; -def M5UnitNDOT : ProcResGroup<[M5UnitNDOT0, - M5UnitNDOT1, - M5UnitNDOT2]>; -def M5UnitNMUL : ProcResGroup<[M5UnitNMUL0, - M5UnitNMUL1]>; -def M5UnitNSHT : ProcResGroup<[M5UnitNSHT0, - M5UnitNSHT1, - M5UnitNSHT2]>; -def M5UnitNSHF : ProcResGroup<[M5UnitNSHF0, - M5UnitNSHF1]>; -def M5UnitNCRY : ProcResGroup<[M5UnitNCRY0, - M5UnitNCRY1]>; - -//===----------------------------------------------------------------------===// -// Resources details. - -def M5WriteZ0 : SchedWriteRes<[]> { let Latency = 0; } -def M5WriteZ1 : SchedWriteRes<[]> { let Latency = 1; - let NumMicroOps = 0; } -def M5WriteZ4 : SchedWriteRes<[]> { let Latency = 4; - let NumMicroOps = 0; } - -def M5WriteA1W : SchedWriteRes<[M5UnitAW]> { let Latency = 1; } -def M5WriteA1X : SchedWriteRes<[M5UnitAX]> { let Latency = 1; } -def M5WriteAAW : SchedWriteRes<[M5UnitAW]> { let Latency = 2; - let ResourceCycles = [2]; } -def M5WriteAAX : SchedWriteRes<[M5UnitAX]> { let Latency = 2; - let ResourceCycles = [2]; } -def M5WriteAB : SchedWriteRes<[M5UnitAX, - M5UnitC, - M5UnitE]> { let Latency = 2; - let NumMicroOps = 2; } -def M5WriteAC : SchedWriteRes<[M5UnitAX, - M5UnitAX, - M5UnitC]> { let Latency = 3; - let NumMicroOps = 3; } -def M5WriteAD : SchedWriteRes<[M5UnitAW, - M5UnitC]> { let Latency = 2; - let NumMicroOps = 2; } -def M5WriteAFW : SchedWriteRes<[M5UnitAW]> { let Latency = 2; - let NumMicroOps = 2; } -def M5WriteAFX : SchedWriteRes<[M5UnitAX]> { let Latency = 2; - let NumMicroOps = 2; } -def M5WriteAUW : SchedWriteVariant<[SchedVar, - SchedVar, - SchedVar, - SchedVar]>; -def M5WriteAUX : SchedWriteVariant<[SchedVar, - SchedVar, - SchedVar, - SchedVar]>; -def M5WriteAVW : SchedWriteVariant<[SchedVar, - SchedVar, - SchedVar, - SchedVar]>; -def M5WriteAVX : SchedWriteVariant<[SchedVar, - SchedVar, - SchedVar, - SchedVar]>; -def M5WriteAXW : SchedWriteVariant<[SchedVar, - SchedVar, - SchedVar]>; -def M5WriteAXX : SchedWriteVariant<[SchedVar, - SchedVar, - SchedVar]>; -def M5WriteAYW : SchedWriteVariant<[SchedVar, - SchedVar]>; -def M5WriteAYX : SchedWriteVariant<[SchedVar, - SchedVar]>; - -def M5WriteB1 : SchedWriteRes<[M5UnitB]> { let Latency = 1; } -def M5WriteBX : SchedWriteVariant<[SchedVar, - SchedVar]>; - -def M5WriteC1 : SchedWriteRes<[M5UnitC]> { let Latency = 1; } -def M5WriteC2 : SchedWriteRes<[M5UnitC]> { let Latency = 2; } -def M5WriteCA : SchedWriteRes<[M5UnitC]> { let Latency = 3; - let ResourceCycles = [2]; } - -def M5WriteD10 : SchedWriteRes<[M5UnitD]> { let Latency = 10; - let ResourceCycles = [10]; } -def M5WriteD16 : SchedWriteRes<[M5UnitD]> { let Latency = 16; - let ResourceCycles = [16]; } - -def M5WriteF2 : SchedWriteRes<[M5UnitF]> { let Latency = 2; } - -def M5WriteL4 : SchedWriteRes<[M5UnitL]> { let Latency = 4; } -def M5WriteL5 : SchedWriteRes<[M5UnitL]> { let Latency = 5; } -def M5WriteL6 : SchedWriteRes<[M5UnitL]> { let Latency = 6; } -def M5WriteLA : SchedWriteRes<[M5UnitL, - M5UnitL]> { let Latency = 6; - let NumMicroOps = 1; } -def M5WriteLB : SchedWriteRes<[M5UnitAX, - M5UnitL]> { let Latency = 6; - let NumMicroOps = 2; } -def M5WriteLC : SchedWriteRes<[M5UnitAX, - M5UnitL, - M5UnitL]> { let Latency = 6; - let NumMicroOps = 2; } -def M5WriteLD : SchedWriteRes<[M5UnitAX, - M5UnitL]> { let Latency = 4; - let NumMicroOps = 2; } -def M5WriteLE : SchedWriteRes<[M5UnitAX, - M5UnitL]> { let Latency = 7; - let NumMicroOps = 2; } -def M5WriteLFW : SchedWriteRes<[M5UnitAW, - M5UnitAW, - M5UnitAW, - M5UnitAW, - M5UnitL]> { let Latency = 15; - let NumMicroOps = 6; - let ResourceCycles = [1, 1, 1, 1, 15]; } -def M5WriteLFX : SchedWriteRes<[M5UnitAX, - M5UnitAX, - M5UnitAX, - M5UnitAX, - M5UnitL]> { let Latency = 15; - let NumMicroOps = 6; - let ResourceCycles = [1, 1, 1, 1, 15]; } -def M5WriteLGW : SchedWriteRes<[M5UnitAW, - M5UnitL]> { let Latency = 13; - let NumMicroOps = 1; - let ResourceCycles = [1, 13]; } -def M5WriteLGX : SchedWriteRes<[M5UnitAX, - M5UnitL]> { let Latency = 13; - let NumMicroOps = 1; - let ResourceCycles = [1, 13]; } -def M5WriteLH : SchedWriteRes<[]> { let Latency = 6; - let NumMicroOps = 0; } -def M5WriteLX : SchedWriteVariant<[SchedVar, - SchedVar]>; -def M5WriteLY : SchedWriteVariant<[SchedVar, - SchedVar]>; - -def M5WriteS1 : SchedWriteRes<[M5UnitS]> { let Latency = 1; } -def M5WriteSA : SchedWriteRes<[M5UnitS0]> { let Latency = 4; } -def M5WriteSB : SchedWriteRes<[M5UnitAX, - M5UnitS]> { let Latency = 2; - let NumMicroOps = 1; } -def M5WriteSX : SchedWriteVariant<[SchedVar, - SchedVar]>; - -def M5ReadAdrBase : SchedReadVariant<[SchedVar< - MCSchedPredicate< - CheckAny< - [ScaledIdxFn, - ExynosScaledIdxFn]>>, [ReadDefault]>, - SchedVar]>; - -def M5WriteNEONB : SchedWriteRes<[M5UnitNALU, - M5UnitS0]> { let Latency = 5; - let NumMicroOps = 2; } -def M5WriteNEONH : SchedWriteRes<[M5UnitNALU, - M5UnitS0]> { let Latency = 2; - let NumMicroOps = 2; } -def M5WriteNEONI : SchedWriteRes<[M5UnitS0, - M5UnitNSHF]> { let Latency = 6; - let NumMicroOps = 2; } -def M5WriteNEONK : SchedWriteRes<[M5UnitNSHF, - M5UnitFCVT0, - M5UnitS0]> { let Latency = 5; - let NumMicroOps = 2; } -def M5WriteNEONN : SchedWriteRes<[M5UnitNMSC, - M5UnitNMSC]> { let Latency = 5; - let NumMicroOps = 2; - let ResourceCycles = [7, 7]; } -def M5WriteNEONO : SchedWriteRes<[M5UnitNMSC, - M5UnitNMSC, - M5UnitNMSC]> { let Latency = 8; - let NumMicroOps = 3; - let ResourceCycles = [10, 10, 10]; } -def M5WriteNEONP : SchedWriteRes<[M5UnitNSHF, - M5UnitS0, - M5UnitFCVT]> { let Latency = 7; - let NumMicroOps = 2; } -def M5WriteNEONQ : SchedWriteRes<[M5UnitNMSC, - M5UnitC]> { let Latency = 3; - let NumMicroOps = 1; } -def M5WriteNEONU : SchedWriteRes<[M5UnitFSQR, - M5UnitFSQR]> { let Latency = 7; - let ResourceCycles = [4, 4]; } -def M5WriteNEONV : SchedWriteRes<[M5UnitFDIV, - M5UnitFDIV]> { let Latency = 7; - let ResourceCycles = [6, 6]; } -def M5WriteNEONW : SchedWriteRes<[M5UnitFDIV, - M5UnitFDIV]> { let Latency = 12; - let ResourceCycles = [9, 9]; } -def M5WriteNEONX : SchedWriteRes<[M5UnitFSQR, - M5UnitFSQR]> { let Latency = 8; - let ResourceCycles = [5, 5]; } -def M5WriteNEONY : SchedWriteRes<[M5UnitFSQR, - M5UnitFSQR]> { let Latency = 12; - let ResourceCycles = [9, 9]; } -def M5WriteNEONZ : SchedWriteVariant<[SchedVar, - SchedVar]>; - -def M5WriteFADD2 : SchedWriteRes<[M5UnitFADD]> { let Latency = 2; } - -def M5WriteFCVT2 : SchedWriteRes<[M5UnitFCVT]> { let Latency = 2; } -def M5WriteFCVT2A : SchedWriteRes<[M5UnitFCVT0]> { let Latency = 2; } -def M5WriteFCVT3 : SchedWriteRes<[M5UnitFCVT]> { let Latency = 3; } -def M5WriteFCVT3A : SchedWriteRes<[M5UnitFCVT0]> { let Latency = 3; } -def M5WriteFCVTA : SchedWriteRes<[M5UnitFCVT0, - M5UnitS0]> { let Latency = 3; - let NumMicroOps = 1; } -def M5WriteFCVTB : SchedWriteRes<[M5UnitFCVT, - M5UnitS0]> { let Latency = 4; - let NumMicroOps = 1; } -def M5WriteFCVTC : SchedWriteRes<[M5UnitFCVT, - M5UnitS0]> { let Latency = 6; - let NumMicroOps = 1; } - -def M5WriteFDIV5 : SchedWriteRes<[M5UnitFDIV]> { let Latency = 5; - let ResourceCycles = [2]; } -def M5WriteFDIV7 : SchedWriteRes<[M5UnitFDIV]> { let Latency = 7; - let ResourceCycles = [4]; } -def M5WriteFDIV12 : SchedWriteRes<[M5UnitFDIV]> { let Latency = 12; - let ResourceCycles = [9]; } - -def M5WriteFMAC3 : SchedWriteRes<[M5UnitFMAC]> { let Latency = 3; } -def M5WriteFMAC4 : SchedWriteRes<[M5UnitFMAC]> { let Latency = 4; } -def M5WriteFMAC5 : SchedWriteRes<[M5UnitFMAC]> { let Latency = 5; } - -def M5WriteFSQR5 : SchedWriteRes<[M5UnitFSQR]> { let Latency = 5; - let ResourceCycles = [2]; } -def M5WriteFSQR7 : SchedWriteRes<[M5UnitFSQR]> { let Latency = 7; - let ResourceCycles = [4]; } -def M5WriteFSQR8 : SchedWriteRes<[M5UnitFSQR]> { let Latency = 8; - let ResourceCycles = [5]; } -def M5WriteFSQR12 : SchedWriteRes<[M5UnitFSQR]> { let Latency = 12; - let ResourceCycles = [9]; } - -def M5WriteNALU1 : SchedWriteRes<[M5UnitNALU]> { let Latency = 1; } -def M5WriteNALU2 : SchedWriteRes<[M5UnitNALU]> { let Latency = 2; } - -def M5WriteNDOT2 : SchedWriteRes<[M5UnitNDOT]> { let Latency = 2; } - -def M5WriteNCRY2 : SchedWriteRes<[M5UnitNCRY]> { let Latency = 2; } -def M5WriteNCRY1A : SchedWriteRes<[M5UnitNCRY0]> { let Latency = 1; } -def M5WriteNCRY2A : SchedWriteRes<[M5UnitNCRY0]> { let Latency = 2; } -def M5WriteNCRY3A : SchedWriteRes<[M5UnitNCRY0]> { let Latency = 3; } -def M5WriteNCRY5A : SchedWriteRes<[M5UnitNCRY]> { let Latency = 5; } - -def M5WriteNHAD1 : SchedWriteRes<[M5UnitNHAD]> { let Latency = 1; } -def M5WriteNHAD3 : SchedWriteRes<[M5UnitNHAD]> { let Latency = 3; } - -def M5WriteNMSC1 : SchedWriteRes<[M5UnitNMSC]> { let Latency = 1; } -def M5WriteNMSC2 : SchedWriteRes<[M5UnitNMSC]> { let Latency = 2; } - -def M5WriteNMUL3 : SchedWriteRes<[M5UnitNMUL]> { let Latency = 3; } - -def M5WriteNSHF1 : SchedWriteRes<[M5UnitNSHF]> { let Latency = 1; } -def M5WriteNSHF2 : SchedWriteRes<[M5UnitNSHF]> { let Latency = 2; } -def M5WriteNSHFA : SchedWriteRes<[M5UnitNSHF]> { let Latency = 2; } -def M5WriteNSHFB : SchedWriteRes<[M5UnitNSHF]> { let Latency = 4; - let NumMicroOps = 2; } -def M5WriteNSHFC : SchedWriteRes<[M5UnitNSHF]> { let Latency = 6; - let NumMicroOps = 3; } -def M5WriteNSHFD : SchedWriteRes<[M5UnitNSHF]> { let Latency = 8; - let NumMicroOps = 4; } - -def M5WriteNSHT2 : SchedWriteRes<[M5UnitNSHT]> { let Latency = 2; } -def M5WriteNSHT4A : SchedWriteRes<[M5UnitNSHT1]> { let Latency = 4; } - -def M5WriteVLDA : SchedWriteRes<[M5UnitL, - M5UnitL]> { let Latency = 6; - let NumMicroOps = 2; } -def M5WriteVLDB : SchedWriteRes<[M5UnitL, - M5UnitL, - M5UnitL]> { let Latency = 7; - let NumMicroOps = 3; } -def M5WriteVLDC : SchedWriteRes<[M5UnitL, - M5UnitL, - M5UnitL, - M5UnitL]> { let Latency = 7; - let NumMicroOps = 4; } -def M5WriteVLDD : SchedWriteRes<[M5UnitL, - M5UnitNSHF]> { let Latency = 7; - let NumMicroOps = 2; - let ResourceCycles = [2, 1]; } -def M5WriteVLDF : SchedWriteRes<[M5UnitL, - M5UnitL]> { let Latency = 11; - let NumMicroOps = 2; - let ResourceCycles = [6, 5]; } -def M5WriteVLDG : SchedWriteRes<[M5UnitL, - M5UnitNSHF, - M5UnitNSHF]> { let Latency = 7; - let NumMicroOps = 3; - let ResourceCycles = [2, 1, 1]; } -def M5WriteVLDI : SchedWriteRes<[M5UnitL, - M5UnitL, - M5UnitL]> { let Latency = 13; - let NumMicroOps = 3; } -def M5WriteVLDJ : SchedWriteRes<[M5UnitL, - M5UnitNSHF, - M5UnitNSHF, - M5UnitNSHF]> { let Latency = 8; - let NumMicroOps = 4; } -def M5WriteVLDK : SchedWriteRes<[M5UnitL, - M5UnitNSHF, - M5UnitNSHF, - M5UnitNSHF, - M5UnitNSHF]> { let Latency = 8; - let NumMicroOps = 5; } -def M5WriteVLDL : SchedWriteRes<[M5UnitL, - M5UnitNSHF, - M5UnitNSHF, - M5UnitL, - M5UnitNSHF]> { let Latency = 8; - let NumMicroOps = 5; } -def M5WriteVLDM : SchedWriteRes<[M5UnitL, - M5UnitNSHF, - M5UnitNSHF, - M5UnitL, - M5UnitNSHF, - M5UnitNSHF]> { let Latency = 8; - let NumMicroOps = 6; } -def M5WriteVLDN : SchedWriteRes<[M5UnitL, - M5UnitL, - M5UnitL, - M5UnitL]> { let Latency = 15; - let NumMicroOps = 4; - let ResourceCycles = [2, 2, 2, 2]; } - -def M5WriteVST1 : SchedWriteRes<[M5UnitS, - M5UnitFST]> { let Latency = 1; - let NumMicroOps = 1; } -def M5WriteVSTA : SchedWriteRes<[M5UnitS, - M5UnitFST, - M5UnitS, - M5UnitFST]> { let Latency = 2; - let NumMicroOps = 2; } -def M5WriteVSTB : SchedWriteRes<[M5UnitS, - M5UnitFST, - M5UnitS, - M5UnitFST, - M5UnitS, - M5UnitFST]> { let Latency = 3; - let NumMicroOps = 3; } -def M5WriteVSTC : SchedWriteRes<[M5UnitS, - M5UnitFST, - M5UnitS, - M5UnitFST, - M5UnitS, - M5UnitFST, - M5UnitS, - M5UnitFST]> { let Latency = 4; - let NumMicroOps = 4; } -def M5WriteVSTD : SchedWriteRes<[M5UnitS, - M5UnitFST]> { let Latency = 2; } -def M5WriteVSTE : SchedWriteRes<[M5UnitS, - M5UnitFST, - M5UnitS, - M5UnitFST]> { let Latency = 2; - let NumMicroOps = 1; } -def M5WriteVSTF : SchedWriteRes<[M5UnitNSHF, - M5UnitNSHF, - M5UnitS, - M5UnitFST]> { let Latency = 4; - let NumMicroOps = 3; } -def M5WriteVSTG : SchedWriteRes<[M5UnitNSHF, - M5UnitNSHF, - M5UnitNSHF, - M5UnitS, - M5UnitFST, - M5UnitS, - M5UnitFST]> { let Latency = 4; - let NumMicroOps = 5; } -def M5WriteVSTH : SchedWriteRes<[M5UnitS0, - M5UnitFST]> { let Latency = 1; - let NumMicroOps = 1; } -def M5WriteVSTI : SchedWriteRes<[M5UnitNSHF, - M5UnitNSHF, - M5UnitNSHF, - M5UnitNSHF, - M5UnitS, - M5UnitFST, - M5UnitS, - M5UnitFST, - M5UnitS, - M5UnitFST, - M5UnitS, - M5UnitFST]> { let Latency = 8; - let NumMicroOps = 5; - let ResourceCycles = [1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1]; } -def M5WriteVSTJ : SchedWriteRes<[M5UnitA, - M5UnitS0, - M5UnitFST]> { let Latency = 1; - let NumMicroOps = 1; } -def M5WriteVSTK : SchedWriteRes<[M5UnitAX, - M5UnitS, - M5UnitFST]> { let Latency = 3; - let NumMicroOps = 2; } -def M5WriteVSTL : SchedWriteRes<[M5UnitNSHF, - M5UnitNSHF, - M5UnitS, - M5UnitFST, - M5UnitS, - M5UnitFST]> { let Latency = 4; - let NumMicroOps = 4; - let ResourceCycles = [1, 1, 2, 1, 2, 1]; } -def M5WriteVSTY : SchedWriteVariant<[SchedVar, - SchedVar]>; - -// Special cases. -def M5WriteCOPY : SchedWriteVariant<[SchedVar, - SchedVar]>; -def M5WriteMOVI : SchedWriteVariant<[SchedVar, - SchedVar]>; - -// Fast forwarding. -def M5ReadFM1 : SchedReadAdvance<+1, [M5WriteF2]>; -def M5ReadAESM2 : SchedReadAdvance<+2, [M5WriteNCRY2]>; -def M5ReadFMACM1 : SchedReadAdvance<+1, [M5WriteFMAC4, - M5WriteFMAC5]>; -def M5ReadNMULM1 : SchedReadAdvance<+1, [M5WriteNMUL3]>; - -//===----------------------------------------------------------------------===// -// Coarse scheduling model. - -// Branch instructions. -def : SchedAlias; -def : SchedAlias; - -// Arithmetic and logical integer instructions. -def : SchedAlias; -def : SchedAlias; // FIXME: M5WriteAX crashes TableGen. -def : SchedAlias; // FIXME: M5WriteAX crashes TableGen. -def : SchedAlias; - -// Move instructions. -def : SchedAlias; - -// Divide and multiply instructions. -def : SchedAlias; -def : SchedAlias; -def : SchedAlias; -def : SchedAlias; - -// Miscellaneous instructions. -def : SchedAlias; - -// Addressing modes. -def : SchedAlias; -def : SchedAlias; - -// Load instructions. -def : SchedAlias; -def : SchedAlias; -def : SchedAlias; - -// Store instructions. -def : SchedAlias; -def : SchedAlias; -def : SchedAlias; -def : SchedAlias; - -// Atomic load and store instructions. -def : SchedAlias; - -// FP data instructions. -def : SchedAlias; -def : SchedAlias; -def : SchedAlias; -def : SchedAlias; - -// FP miscellaneous instructions. -def : SchedAlias; -def : SchedAlias; -def : SchedAlias; - -// FP load instructions. -def : SchedAlias; - -// FP store instructions. -def : SchedAlias; - -// ASIMD FP instructions. -def : SchedAlias; - -// Other miscellaneous instructions. -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 1; } - -//===----------------------------------------------------------------------===// -// Generic fast forwarding. - -// TODO: Add FP register forwarding rules. - -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -// TODO: The forwarding for 32 bits actually saves 2 cycles. -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; - -//===----------------------------------------------------------------------===// -// Finer scheduling model. - -// Branch instructions -def : InstRW<[M5WriteB1], (instrs Bcc)>; -def : InstRW<[M5WriteAFX], (instrs BL)>; -def : InstRW<[M5WriteBX], (instrs BLR)>; -def : InstRW<[M5WriteC1], (instregex "^CBN?Z[WX]")>; -def : InstRW<[M5WriteAD], (instregex "^TBN?ZW")>; -def : InstRW<[M5WriteAB], (instregex "^TBN?ZX")>; - -// Arithmetic and logical integer instructions. -def : InstRW<[M5WriteA1W], (instregex "^(ADC|SBC)S?Wr$")>; -def : InstRW<[M5WriteA1X], (instregex "^(ADC|SBC)S?Xr$")>; -def : InstRW<[M5WriteAXW], (instregex "^(ADD|AND|BIC|EON|EOR|ORN|SUB)Wrs$")>; -def : InstRW<[M5WriteAXX], (instregex "^(ADD|AND|BIC|EON|EOR|ORN|SUB)Xrs$")>; -def : InstRW<[M5WriteAUW], (instrs ORRWrs)>; -def : InstRW<[M5WriteAUX], (instrs ORRXrs)>; -def : InstRW<[M5WriteAXW], (instregex "^(ADD|AND|BIC|SUB)SWrs$")>; -def : InstRW<[M5WriteAXX], (instregex "^(ADD|AND|BIC|SUB)SXrs$")>; -def : InstRW<[M5WriteAXW], (instregex "^(ADD|SUB)S?Wrx(64)?$")>; -def : InstRW<[M5WriteAXX], (instregex "^(ADD|SUB)S?Xrx(64)?$")>; -def : InstRW<[M5WriteAVW], (instrs ADDWri, ORRWri)>; -def : InstRW<[M5WriteAVX], (instrs ADDXri, ORRXri)>; -def : InstRW<[M5WriteA1W], (instregex "^CCM[NP]W[ir]$")>; -def : InstRW<[M5WriteA1X], (instregex "^CCM[NP]X[ir]$")>; -def : InstRW<[M5WriteA1W], (instrs CSELWr, CSINCWr, CSINVWr, CSNEGWr)>; -def : InstRW<[M5WriteA1X], (instrs CSELXr, CSINCXr, CSINVXr, CSNEGXr)>; - -// Move instructions. -def : InstRW<[M5WriteCOPY], (instrs COPY)>; -def : InstRW<[M5WriteZ0], (instrs ADR, ADRP)>; -def : InstRW<[M5WriteZ0], (instregex "^MOV[NZ][WX]i$")>; - -// Shift instructions. -def : InstRW<[M5WriteA1W], (instrs ASRVWr, LSLVWr, LSRVWr, RORVWr)>; -def : InstRW<[M5WriteA1X], (instrs ASRVXr, LSLVXr, LSRVXr, RORVXr)>; - -// Miscellaneous instructions. -def : InstRW<[M5WriteAYW], (instrs EXTRWrri)>; -def : InstRW<[M5WriteAYX], (instrs EXTRXrri)>; -def : InstRW<[M5WriteA1W], (instrs BFMWri, SBFMWri, UBFMWri)>; -def : InstRW<[M5WriteA1X], (instrs BFMXri, SBFMXri, UBFMXri)>; -def : InstRW<[M5WriteA1W], (instrs CLSWr, CLZWr)>; -def : InstRW<[M5WriteA1X], (instrs CLSXr, CLZXr)>; -def : InstRW<[M5WriteA1W], (instrs RBITWr, REVWr, REV16Wr)>; -def : InstRW<[M5WriteA1X], (instrs RBITXr, REVXr, REV16Xr, REV32Xr)>; - -// Load instructions. -def : InstRW<[M5WriteLD, - WriteLDHi, - WriteAdr], (instregex "^LDP(SW|W|X)(post|pre)")>; -def : InstRW<[M5WriteL5, - ReadAdrBase], (instregex "^LDR(BB|SBW|SBX|HH|SHW|SHX|SW|W|X)roW")>; -def : InstRW<[WriteLDIdx, - ReadAdrBase], (instregex "^LDR(BB|SBW|SBX|HH|SHW|SHX|SW|W|X)roX")>; -def : InstRW<[M5WriteL5, - ReadAdrBase], (instrs PRFMroW)>; -def : InstRW<[WriteLDIdx, - ReadAdrBase], (instrs PRFMroX)>; - -// Store instructions. -def : InstRW<[M5WriteSB, - ReadAdrBase], (instregex "^STR(BB|HH|W|X)roW")>; -def : InstRW<[WriteST, - ReadAdrBase], (instregex "^STR(BB|HH|W|X)roX")>; - -// Atomic load and store instructions. -def : InstRW<[M5WriteLGW], (instregex "^CAS(A|AL|L)?[BHW]$")>; -def : InstRW<[M5WriteLGX], (instregex "^CAS(A|AL|L)?X$")>; -def : InstRW<[M5WriteLFW], (instregex "^CASP(A|AL|L)?W$")>; -def : InstRW<[M5WriteLFX], (instregex "^CASP(A|AL|L)?X$")>; -def : InstRW<[M5WriteLGW], (instregex "^LD(ADD|CLR|EOR|SET|[SU]MAX|[SU]MIN)(A|AL|L)?[BHW]$")>; -def : InstRW<[M5WriteLGX], (instregex "^LD(ADD|CLR|EOR|SET|[SU]MAX|[SU]MIN)(A|AL|L)?X$")>; -def : InstRW<[M5WriteLGW], (instregex "^SWP(A|AL|L)?[BHW]$")>; -def : InstRW<[M5WriteLGX], (instregex "^SWP(A|AL|L)?X$")>; - -// FP data instructions. -def : InstRW<[M5WriteNSHF1], (instrs FABSHr, FABSSr,FABSDr)>; -def : InstRW<[M5WriteFADD2], (instregex "^F(ADD|SUB)[HSD]rr")>; -def : InstRW<[M5WriteFADD2], (instregex "^FADDPv.i(16|32|64)")>; -def : InstRW<[M5WriteNEONQ], (instregex "^FCCMPE?[HSD]rr")>; -def : InstRW<[M5WriteNMSC2], (instregex "^FCMPE?[HSD]r[ir]")>; -def : InstRW<[M5WriteNMSC1], (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)(16|32|64|v1)")>; -def : InstRW<[M5WriteFDIV5], (instrs FDIVHrr)>; -def : InstRW<[M5WriteFDIV7], (instrs FDIVSrr)>; -def : InstRW<[M5WriteFDIV12], (instrs FDIVDrr)>; -def : InstRW<[M5WriteNMSC1], (instregex "^F(MAX|MIN)(NM)?[HSD]rr")>; -def : InstRW<[M5WriteFMAC3], (instregex "^FN?MUL[HSD]rr")>; -def : InstRW<[M5WriteFMAC3], (instrs FMULX16, FMULX32, FMULX64)>; -def : InstRW<[M5WriteFMAC4, - M5ReadFMACM1], (instregex "^FN?M(ADD|SUB)[HSD]rrr")>; -def : InstRW<[M5WriteNALU2], (instrs FNEGHr, FNEGSr, FNEGDr)>; -def : InstRW<[M5WriteFCVT3A], (instregex "^FRINT.+r")>; -def : InstRW<[M5WriteNEONH], (instregex "^FCSEL[HSD]rrr")>; -def : InstRW<[M5WriteFSQR5], (instrs FSQRTHr)>; -def : InstRW<[M5WriteFSQR8], (instrs FSQRTSr)>; -def : InstRW<[M5WriteFSQR12], (instrs FSQRTDr)>; - -// FP miscellaneous instructions. -def : InstRW<[M5WriteFCVT2], (instregex "^FCVT[HSD][HSD]r")>; -def : InstRW<[M5WriteFCVTC], (instregex "^[SU]CVTF[SU][XW][HSD]ri")>; -def : InstRW<[M5WriteFCVTB], (instregex "^FCVT[AMNPZ][SU][SU][XW][HSD]r")>; -def : InstRW<[M5WriteNALU1], (instregex "^FMOV[HSD]i")>; -def : InstRW<[M5WriteNALU2], (instregex "^FMOV[HSD]r")>; -def : InstRW<[M5WriteSA], (instregex "^FMOV[WX][HSD]r")>; -def : InstRW<[M5WriteFCVTA], (instregex "^FMOV[HSD][WX]r")>; -def : InstRW<[M5WriteNEONI], (instregex "^FMOVXDHighr")>; -def : InstRW<[M5WriteNEONK], (instregex "^FMOVDXHighr")>; -def : InstRW<[M5WriteFCVT3], (instregex "^F(RECP|RSQRT)Ev1(f16|i32|i64)")>; -def : InstRW<[M5WriteNMSC1], (instregex "^FRECPXv1")>; -def : InstRW<[M5WriteFMAC4], (instregex "^F(RECP|RSQRT)S(16|32|64)")>; - -// FP load instructions. -def : InstRW<[WriteVLD], (instregex "^LDR[SDQ]l")>; -def : InstRW<[WriteVLD], (instregex "^LDUR[BHSDQ]i")>; -def : InstRW<[WriteVLD, - WriteAdr], (instregex "^LDR[BHSDQ](post|pre)")>; -def : InstRW<[WriteVLD], (instregex "^LDR[BHSDQ]ui")>; -def : InstRW<[M5WriteLE, - ReadAdrBase], (instregex "^LDR[BHSDQ]roW")>; -def : InstRW<[WriteVLD, - ReadAdrBase], (instregex "^LDR[BHSD]roX")>; -def : InstRW<[M5WriteLY, - ReadAdrBase], (instrs LDRQroX)>; -def : InstRW<[WriteVLD, - M5WriteLH], (instregex "^LDN?P[SD]i")>; -def : InstRW<[M5WriteLA, - M5WriteLH], (instregex "^LDN?PQi")>; -def : InstRW<[M5WriteLB, - M5WriteLH, - WriteAdr], (instregex "^LDP[SD](post|pre)")>; -def : InstRW<[M5WriteLC, - M5WriteLH, - WriteAdr], (instregex "^LDPQ(post|pre)")>; - -// FP store instructions. -def : InstRW<[WriteVST], (instregex "^STUR[BHSDQ]i")>; -def : InstRW<[WriteVST, - WriteAdr], (instregex "^STR[BHSDQ](post|pre)")>; -def : InstRW<[WriteVST], (instregex "^STR[BHSDQ]ui")>; -def : InstRW<[WriteVST, - ReadAdrBase], (instregex "^STR[BHSD]ro[WX]")>; -def : InstRW<[M5WriteVSTK, - ReadAdrBase], (instregex "^STRQroW")>; -def : InstRW<[M5WriteVSTY, - ReadAdrBase], (instregex "^STRQroX")>; -def : InstRW<[WriteVST], (instregex "^STN?P[SD]i")>; -def : InstRW<[M5WriteVSTH], (instregex "^STN?PQi")>; -def : InstRW<[WriteVST, - WriteAdr], (instregex "^STP[SD](post|pre)")>; -def : InstRW<[M5WriteVSTJ, - WriteAdr], (instregex "^STPQ(post|pre)")>; - -// ASIMD instructions. -def : InstRW<[M5WriteNHAD1], (instregex "^[SU]ABDL?v")>; -def : InstRW<[M5WriteNHAD3], (instregex "^[SU]ABAL?v")>; -def : InstRW<[M5WriteNMSC1], (instregex "^ABSv")>; -def : InstRW<[M5WriteNALU2], (instregex "^(ADD|NEG|SUB)v")>; -def : InstRW<[M5WriteNHAD3], (instregex "^[SU]?ADDL?Pv")>; -def : InstRW<[M5WriteNHAD3], (instregex "^[SU]H(ADD|SUB)v")>; -def : InstRW<[M5WriteNHAD3], (instregex "^[SU](ADD|SUB)[LW]v")>; -def : InstRW<[M5WriteNHAD3], (instregex "^R?(ADD|SUB)HN2?v")>; -def : InstRW<[M5WriteNHAD3], (instregex "^[SU]Q(ADD|SUB)v")>; -def : InstRW<[M5WriteNHAD3], (instregex "^(SU|US)QADDv")>; -def : InstRW<[M5WriteNHAD3], (instregex "^[SU]RHADDv")>; -def : InstRW<[M5WriteNMSC1], (instregex "^SQ(ABS|NEG)v")>; -def : InstRW<[M5WriteNHAD3], (instregex "^[SU]?ADDL?Vv")>; -def : InstRW<[M5WriteNMSC1], (instregex "^CM(EQ|GE|GT|HI|HS|LE|LT)v")>; -def : InstRW<[M5WriteNALU2], (instregex "^CMTSTv")>; -def : InstRW<[M5WriteNALU2], (instregex "^(AND|BIC|EOR|NOT|ORN|ORR)v")>; -def : InstRW<[M5WriteNMSC1], (instregex "^[SU](MIN|MAX)v")>; -def : InstRW<[M5WriteNMSC2], (instregex "^[SU](MIN|MAX)Pv")>; -def : InstRW<[M5WriteNHAD3], (instregex "^[SU](MIN|MAX)Vv")>; -def : InstRW<[M5WriteNMUL3], (instregex "^(SQR?D)?MULH?v")>; -def : InstRW<[M5WriteNMUL3, - M5ReadNMULM1], (instregex "^ML[AS]v")>; -def : InstRW<[M5WriteNMUL3, - M5ReadNMULM1], (instregex "^SQRDML[AS]H")>; -def : InstRW<[M5WriteNMUL3], (instregex "^(S|U|SQD)ML[AS]L(v1(i32|i64)|v2i32|v4i16|v8i8)")>; -def : InstRW<[M5WriteNMUL3, - M5ReadNMULM1], (instregex "^(S|U|SQD)ML[AS]L(v4i32|v8i16|v16i8)")>; -def : InstRW<[M5WriteNMUL3, - M5ReadNMULM1], (instregex "^(S|U|SQD)MULL(v1(i32|i64)|v2i32|v4i16|v8i8)")>; -def : InstRW<[M5WriteNMUL3, - M5ReadNMULM1], (instregex "^(S|U|SQD)MULL(v4i32|v8i16|v16i8)")>; -def : InstRW<[M5WriteNDOT2], (instregex "^[SU]DOT(lane)?v")>; -def : InstRW<[M5WriteNHAD3], (instregex "^[SU]ADALPv")>; -def : InstRW<[M5WriteNSHT4A], (instregex "^[SU]R?SRA[dv]")>; -def : InstRW<[M5WriteNSHT2], (instregex "^SHL[dv]")>; -def : InstRW<[M5WriteNSHT2], (instregex "^S[LR]I[dv]")>; -def : InstRW<[M5WriteNSHT2], (instregex "^[SU]SH[LR][dv]")>; -def : InstRW<[M5WriteNSHT2], (instregex "^[SU]?SHLLv")>; -def : InstRW<[M5WriteNSHT4A], (instregex "^[SU]?Q?R?SHRU?N[bhsv]")>; -def : InstRW<[M5WriteNSHT4A], (instregex "^[SU]RSH[LR][dv]")>; -def : InstRW<[M5WriteNSHT4A], (instregex "^[SU]QR?SHLU?[bhsdv]")>; - -// ASIMD FP instructions. -def : InstRW<[M5WriteNSHF2], (instregex "^FABSv.f(16|32|64)")>; -def : InstRW<[M5WriteFADD2], (instregex "^F(ABD|ADD|SUB)v.f(16|32|64)")>; -def : InstRW<[M5WriteFADD2], (instregex "^FADDPv.f(16|32|64)")>; -def : InstRW<[M5WriteNMSC1], (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v[^1]")>; -def : InstRW<[M5WriteFCVT2], (instregex "^FCVT(L|N|XN)v")>; -def : InstRW<[M5WriteFCVT2A], (instregex "^FCVT[AMNPZ][SU]v")>; -def : InstRW<[M5WriteFCVT2], (instregex "^[SU]CVTFv.[fi](16|32|64)")>; -def : InstRW<[M5WriteFDIV7], (instrs FDIVv4f16)>; -def : InstRW<[M5WriteNEONV], (instrs FDIVv8f16)>; -def : InstRW<[M5WriteFDIV7], (instrs FDIVv2f32)>; -def : InstRW<[M5WriteNEONV], (instrs FDIVv4f32)>; -def : InstRW<[M5WriteNEONW], (instrs FDIVv2f64)>; -def : InstRW<[M5WriteNMSC1], (instregex "^F(MAX|MIN)(NM)?v")>; -def : InstRW<[M5WriteNMSC2], (instregex "^F(MAX|MIN)(NM)?Pv")>; -def : InstRW<[M5WriteNEONZ], (instregex "^F(MAX|MIN)(NM)?Vv")>; -def : InstRW<[M5WriteFMAC3], (instregex "^FMULX?v.[fi](16|32|64)")>; -def : InstRW<[M5WriteFMAC4, - M5ReadFMACM1], (instregex "^FML[AS]v.[fi](16|32|64)")>; -def : InstRW<[M5WriteNALU2], (instregex "^FNEGv.f(16|32|64)")>; -def : InstRW<[M5WriteFCVT3A], (instregex "^FRINT[AIMNPXZ]v")>; -def : InstRW<[M5WriteFSQR7], (instrs FSQRTv4f16)>; -def : InstRW<[M5WriteNEONU], (instrs FSQRTv8f16)>; -def : InstRW<[M5WriteFSQR8], (instrs FSQRTv2f32)>; -def : InstRW<[M5WriteNEONX], (instrs FSQRTv4f32)>; -def : InstRW<[M5WriteNEONY], (instrs FSQRTv2f64)>; - -// ASIMD miscellaneous instructions. -def : InstRW<[M5WriteNALU2], (instregex "^RBITv")>; -def : InstRW<[M5WriteNALU2], (instregex "^(BIF|BIT|BSL)v")>; -def : InstRW<[M5WriteNALU2], (instregex "^CL[STZ]v")>; -def : InstRW<[M5WriteNEONB], (instregex "^DUPv.+gpr")>; -def : InstRW<[M5WriteNSHF2], (instregex "^CPY")>; -def : InstRW<[M5WriteNSHF2], (instregex "^DUPv.+lane")>; -def : InstRW<[M5WriteNSHF2], (instregex "^EXTv")>; -def : InstRW<[M5WriteNSHT4A], (instregex "^XTNv")>; -def : InstRW<[M5WriteNSHT4A], (instregex "^[SU]?QXTU?Nv")>; -def : InstRW<[M5WriteNEONB], (instregex "^INSv.+gpr")>; -def : InstRW<[M5WriteNSHF2], (instregex "^INSv.+lane")>; -def : InstRW<[M5WriteMOVI], (instregex "^(MOV|MVN)I")>; -def : InstRW<[M5WriteNALU1], (instregex "^FMOVv.f(16|32|64)")>; -def : InstRW<[M5WriteFCVT3], (instregex "^F(RECP|RSQRT)Ev[248]f(16|32|64)")>; -def : InstRW<[M5WriteFCVT3], (instregex "^U(RECP|RSQRT)Ev[24]i32")>; -def : InstRW<[M5WriteFMAC4], (instregex "^F(RECP|RSQRT)Sv.f(16|32|64)")>; -def : InstRW<[M5WriteNSHF2], (instregex "^REV(16|32|64)v")>; -def : InstRW<[M5WriteNSHFA], (instregex "^TB[LX]v(8|16)i8One")>; -def : InstRW<[M5WriteNSHFB], (instregex "^TB[LX]v(8|16)i8Two")>; -def : InstRW<[M5WriteNSHFC], (instregex "^TB[LX]v(8|16)i8Three")>; -def : InstRW<[M5WriteNSHFD], (instregex "^TB[LX]v(8|16)i8Four")>; -def : InstRW<[M5WriteNEONP], (instregex "^[SU]MOVv")>; -def : InstRW<[M5WriteNSHF2], (instregex "^(TRN|UZP|ZIP)[12]v")>; - -// ASIMD load instructions. -def : InstRW<[WriteVLD], (instregex "LD1Onev(8b|16b|4h|8h|2s|4s|1d|2d)$")>; -def : InstRW<[WriteVLD, - M5WriteA1X, - WriteAdr], (instregex "LD1Onev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; -def : InstRW<[M5WriteVLDA], (instregex "LD1Twov(8b|16b|4h|8h|2s|4s|1d|2d)$")>; -def : InstRW<[M5WriteVLDA, - M5WriteA1X, - WriteAdr], (instregex "LD1Twov(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; -def : InstRW<[M5WriteVLDB], (instregex "LD1Threev(8b|16b|4h|8h|2s|4s|1d|2d)$")>; -def : InstRW<[M5WriteVLDB, - M5WriteA1X, - WriteAdr], (instregex "LD1Threev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; -def : InstRW<[M5WriteVLDC], (instregex "LD1Fourv(8b|16b|4h|8h|2s|4s|1d|2d)$")>; -def : InstRW<[M5WriteVLDC, - M5WriteA1X, - WriteAdr], (instregex "LD1Fourv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; -def : InstRW<[M5WriteVLDD], (instregex "LD1i(8|16|32|64)$")>; -def : InstRW<[M5WriteVLDD, - M5WriteA1X, - WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>; -def : InstRW<[WriteVLD], (instregex "LD1Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>; -def : InstRW<[WriteVLD, - M5WriteA1X, - WriteAdr], (instregex "LD1Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; -def : InstRW<[M5WriteVLDF], (instregex "LD2Twov(8b|16b|4h|8h|2s|4s|2d)$")>; -def : InstRW<[M5WriteVLDF, - M5WriteA1X, - WriteAdr], (instregex "LD2Twov(8b|16b|4h|8h|2s|4s|2d)_POST$")>; -def : InstRW<[M5WriteVLDG], (instregex "LD2i(8|16|32|64)$")>; -def : InstRW<[M5WriteVLDG, - M5WriteA1X, - WriteAdr], (instregex "LD2i(8|16|32|64)_POST$")>; -def : InstRW<[M5WriteVLDA], (instregex "LD2Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>; -def : InstRW<[M5WriteVLDA, - M5WriteA1X, - WriteAdr], (instregex "LD2Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; -def : InstRW<[M5WriteVLDI], (instregex "LD3Threev(8b|16b|4h|8h|2s|4s|2d)$")>; -def : InstRW<[M5WriteVLDI, - M5WriteA1X, - WriteAdr], (instregex "LD3Threev(8b|16b|4h|8h|2s|4s|2d)_POST$")>; -def : InstRW<[M5WriteVLDJ], (instregex "LD3i(8|16|32)$")>; -def : InstRW<[M5WriteVLDJ, - M5WriteA1X, - WriteAdr], (instregex "LD3i(8|16|32)_POST$")>; -def : InstRW<[M5WriteVLDL], (instregex "LD3i64$")>; -def : InstRW<[M5WriteVLDL, - M5WriteA1X, - WriteAdr], (instregex "LD3i64_POST$")>; -def : InstRW<[M5WriteVLDB], (instregex "LD3Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>; -def : InstRW<[M5WriteVLDB, - M5WriteA1X], (instregex "LD3Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; -def : InstRW<[M5WriteVLDN], (instregex "LD4Fourv(8b|16b|4h|8h|2s|4s|2d)$")>; -def : InstRW<[M5WriteVLDN, - M5WriteA1X, - WriteAdr], (instregex "LD4Fourv(8b|16b|4h|8h|2s|4s|2d)_POST$")>; -def : InstRW<[M5WriteVLDK], (instregex "LD4i(8|16|32)$")>; -def : InstRW<[M5WriteVLDK, - M5WriteA1X, - WriteAdr], (instregex "LD4i(8|16|32)_POST$")>; -def : InstRW<[M5WriteVLDM], (instregex "LD4i64$")>; -def : InstRW<[M5WriteVLDM, - M5WriteA1X, - WriteAdr], (instregex "LD4i64_POST$")>; -def : InstRW<[M5WriteVLDC], (instregex "LD4Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>; -def : InstRW<[M5WriteVLDC, - M5WriteA1X, - WriteAdr], (instregex "LD4Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; - -// ASIMD store instructions. -def : InstRW<[WriteVST], (instregex "ST1Onev(8b|16b|4h|8h|2s|4s|1d|2d)$")>; -def : InstRW<[WriteVST, - M5WriteA1X, - WriteAdr], (instregex "ST1Onev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; -def : InstRW<[M5WriteVSTA], (instregex "ST1Twov(8b|16b|4h|8h|2s|4s|1d|2d)$")>; -def : InstRW<[M5WriteVSTA, - M5WriteA1X, - WriteAdr], (instregex "ST1Twov(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; - -def : InstRW<[M5WriteVSTB], (instregex "ST1Threev(8b|16b|4h|8h|2s|4s|1d|2d)$")>; -def : InstRW<[M5WriteVSTB, - M5WriteA1X, - WriteAdr], (instregex "ST1Threev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; -def : InstRW<[M5WriteVSTC], (instregex "ST1Fourv(8b|16b|4h|8h|2s|4s|1d|2d)$")>; -def : InstRW<[M5WriteVSTC, - M5WriteA1X, - WriteAdr], (instregex "ST1Fourv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; -def : InstRW<[WriteVST], (instregex "ST1i(8|16|32|64)$")>; -def : InstRW<[WriteVST, - M5WriteA1X, - WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>; -def : InstRW<[M5WriteVSTD], (instregex "ST2Twov(8b|4h|2s)$")>; -def : InstRW<[M5WriteVSTD, - M5WriteA1X, - WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>; -def : InstRW<[M5WriteVSTE], (instregex "ST2Twov(16b|8h|4s|2d)$")>; -def : InstRW<[M5WriteVSTE, - M5WriteA1X, - WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>; -def : InstRW<[M5WriteVSTD], (instregex "ST2i(8|16|32|64)$")>; -def : InstRW<[M5WriteVSTD, - M5WriteA1X, - WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>; -def : InstRW<[M5WriteVSTF], (instregex "ST3Threev(8b|4h|2s)$")>; -def : InstRW<[M5WriteVSTF, - M5WriteA1X, - WriteAdr], (instregex "ST3Threev(8b|4h|2s)_POST$")>; -def : InstRW<[M5WriteVSTG], (instregex "ST3Threev(16b|8h|4s|2d)$")>; -def : InstRW<[M5WriteVSTG, - M5WriteA1X, - WriteAdr], (instregex "ST3Threev(16b|8h|4s|2d)_POST$")>; -def : InstRW<[M5WriteVSTA], (instregex "ST3i(8|16|32|64)$")>; -def : InstRW<[M5WriteVSTA, - M5WriteA1X, - WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>; -def : InstRW<[M5WriteVSTL], (instregex "ST4Fourv(8b|4h|2s)$")>; -def : InstRW<[M5WriteVSTL, - M5WriteA1X, - WriteAdr], (instregex "ST4Fourv(8b|4h|2s)_POST$")>; -def : InstRW<[M5WriteVSTI], (instregex "ST4Fourv(16b|8h|4s|2d)$")>; -def : InstRW<[M5WriteVSTI, - M5WriteA1X, - WriteAdr], (instregex "ST4Fourv(16b|8h|4s|2d)_POST$")>; -def : InstRW<[M5WriteVSTA], (instregex "ST4i(8|16|32|64)$")>; -def : InstRW<[M5WriteVSTA, - M5WriteA1X, - WriteAdr], (instregex "ST4i(8|16|32|64)_POST$")>; - -// Cryptography instructions. -def : InstRW<[M5WriteNCRY2], (instregex "^AES[DE]")>; -def : InstRW<[M5WriteNCRY2, - M5ReadAESM2], (instregex "^AESI?MC")>; -def : InstRW<[M5WriteNCRY2A], (instregex "^PMULv")>; -def : InstRW<[M5WriteNCRY1A], (instregex "^PMULLv(1|8)i")>; -def : InstRW<[M5WriteNCRY3A], (instregex "^PMULLv(2|16)i")>; -def : InstRW<[M5WriteNCRY2A], (instregex "^SHA1(H|SU[01])")>; -def : InstRW<[M5WriteNCRY5A], (instregex "^SHA1[CMP]")>; -def : InstRW<[M5WriteNCRY2A], (instrs SHA256SU0rr)>; -def : InstRW<[M5WriteNCRY5A], (instrs SHA256SU1rrr)>; -def : InstRW<[M5WriteNCRY5A], (instregex "^SHA256H2?")>; - -// CRC instructions. -def : InstRW<[M5WriteF2, - M5ReadFM1], (instregex "^CRC32C?[BHWX]")>; - -} // SchedModel = ExynosM5Model diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/aes.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/aes.s deleted file mode 100644 index e981b2a..0000000 --- a/llvm/test/tools/llvm-mca/AArch64/Exynos/aes.s +++ /dev/null @@ -1,57 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3 -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4 -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5 - -aese v0.16b, v1.16b -aesmc v0.16b, v0.16b - -aesd v0.16b, v1.16b -aesimc v0.16b, v0.16b - -# ALL: Iterations: 100 -# ALL-NEXT: Instructions: 400 - -# M3-NEXT: Total Cycles: 203 -# M4-NEXT: Total Cycles: 203 -# M5-NEXT: Total Cycles: 403 - -# ALL-NEXT: Total uOps: 400 - -# ALL: Dispatch Width: 6 - -# M3-NEXT: uOps Per Cycle: 1.97 -# M3-NEXT: IPC: 1.97 - -# M4-NEXT: uOps Per Cycle: 1.97 -# M4-NEXT: IPC: 1.97 - -# M5-NEXT: uOps Per Cycle: 0.99 -# M5-NEXT: IPC: 0.99 - -# ALL-NEXT: Block RThroughput: 2.0 - -# ALL: Instruction Info: -# ALL-NEXT: [1]: #uOps -# ALL-NEXT: [2]: Latency -# ALL-NEXT: [3]: RThroughput -# ALL-NEXT: [4]: MayLoad -# ALL-NEXT: [5]: MayStore -# ALL-NEXT: [6]: HasSideEffects (U) - -# ALL: [1] [2] [3] [4] [5] [6] Instructions: - -# M3-NEXT: 1 1 0.50 aese v0.16b, v1.16b -# M3-NEXT: 1 1 0.50 aesmc v0.16b, v0.16b -# M3-NEXT: 1 1 0.50 aesd v0.16b, v1.16b -# M3-NEXT: 1 1 0.50 aesimc v0.16b, v0.16b - -# M4-NEXT: 1 1 0.50 aese v0.16b, v1.16b -# M4-NEXT: 1 1 0.50 aesmc v0.16b, v0.16b -# M4-NEXT: 1 1 0.50 aesd v0.16b, v1.16b -# M4-NEXT: 1 1 0.50 aesimc v0.16b, v0.16b - -# M5-NEXT: 1 2 0.50 aese v0.16b, v1.16b -# M5-NEXT: 1 2 0.50 aesmc v0.16b, v0.16b -# M5-NEXT: 1 2 0.50 aesd v0.16b, v1.16b -# M5-NEXT: 1 2 0.50 aesimc v0.16b, v0.16b diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-ld1.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-ld1.s deleted file mode 100644 index f23b1f7..0000000 --- a/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-ld1.s +++ /dev/null @@ -1,189 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3 -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4 -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5 - -ld1 {v0.s}[0], [sp] -ld1r {v0.2s}, [sp] -ld1 {v0.2s}, [sp] -ld1 {v0.2s, v1.2s}, [sp] -ld1 {v0.2s, v1.2s, v2.2s}, [sp] -ld1 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp] - -ld1 {v0.d}[0], [sp] -ld1r {v0.2d}, [sp] -ld1 {v0.2d}, [sp] -ld1 {v0.2d, v1.2d}, [sp] -ld1 {v0.2d, v1.2d, v2.2d}, [sp] -ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp] - -ld1 {v0.s}[0], [sp], #4 -ld1r {v0.2s}, [sp], #4 -ld1 {v0.2s}, [sp], #8 -ld1 {v0.2s, v1.2s}, [sp], #16 -ld1 {v0.2s, v1.2s, v2.2s}, [sp], #24 -ld1 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp], #32 - -ld1 {v0.d}[0], [sp], #8 -ld1r {v0.2d}, [sp], #8 -ld1 {v0.2d}, [sp], #16 -ld1 {v0.2d, v1.2d}, [sp], #32 -ld1 {v0.2d, v1.2d, v2.2d}, [sp], #48 -ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp], #64 - -ld1 {v0.s}[0], [sp], x0 -ld1r {v0.2s}, [sp], x0 -ld1 {v0.2s}, [sp], x0 -ld1 {v0.2s, v1.2s}, [sp], x0 -ld1 {v0.2s, v1.2s, v2.2s}, [sp], x0 -ld1 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp], x0 - -ld1 {v0.d}[0], [sp], x0 -ld1r {v0.2d}, [sp], x0 -ld1 {v0.2d}, [sp], x0 -ld1 {v0.2d, v1.2d}, [sp], x0 -ld1 {v0.2d, v1.2d, v2.2d}, [sp], x0 -ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp], x0 - -# ALL: Iterations: 100 -# ALL-NEXT: Instructions: 3600 - -# M3-NEXT: Total Cycles: 14903 -# M4-NEXT: Total Cycles: 14703 -# M5-NEXT: Total Cycles: 17203 - -# ALL-NEXT: Total uOps: 10200 - -# ALL: Dispatch Width: 6 - -# M3-NEXT: uOps Per Cycle: 0.68 -# M3-NEXT: IPC: 0.24 - -# M4-NEXT: uOps Per Cycle: 0.69 -# M4-NEXT: IPC: 0.24 - -# M5-NEXT: uOps Per Cycle: 0.59 -# M5-NEXT: IPC: 0.21 - -# ALL-NEXT: Block RThroughput: 39.0 - -# ALL: Instruction Info: -# ALL-NEXT: [1]: #uOps -# ALL-NEXT: [2]: Latency -# ALL-NEXT: [3]: RThroughput -# ALL-NEXT: [4]: MayLoad -# ALL-NEXT: [5]: MayStore -# ALL-NEXT: [6]: HasSideEffects (U) - -# ALL: [1] [2] [3] [4] [5] [6] Instructions: - -# M3-NEXT: 2 7 1.00 * ld1 { v0.s }[0], [sp] -# M3-NEXT: 1 5 0.50 * ld1r { v0.2s }, [sp] -# M3-NEXT: 1 5 0.50 * ld1 { v0.2s }, [sp] -# M3-NEXT: 2 5 1.00 * ld1 { v0.2s, v1.2s }, [sp] -# M3-NEXT: 3 6 1.50 * ld1 { v0.2s, v1.2s, v2.2s }, [sp] -# M3-NEXT: 4 6 2.00 * ld1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] -# M3-NEXT: 2 6 1.00 * ld1 { v0.d }[0], [sp] -# M3-NEXT: 1 5 0.50 * ld1r { v0.2d }, [sp] -# M3-NEXT: 1 5 0.50 * ld1 { v0.2d }, [sp] -# M3-NEXT: 2 5 1.00 * ld1 { v0.2d, v1.2d }, [sp] -# M3-NEXT: 3 6 1.50 * ld1 { v0.2d, v1.2d, v2.2d }, [sp] -# M3-NEXT: 4 6 2.00 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp] -# M3-NEXT: 3 7 1.00 * ld1 { v0.s }[0], [sp], #4 -# M3-NEXT: 2 5 0.50 * ld1r { v0.2s }, [sp], #4 -# M3-NEXT: 2 5 0.50 * ld1 { v0.2s }, [sp], #8 -# M3-NEXT: 3 5 1.00 * ld1 { v0.2s, v1.2s }, [sp], #16 -# M3-NEXT: 4 6 1.50 * ld1 { v0.2s, v1.2s, v2.2s }, [sp], #24 -# M3-NEXT: 5 6 2.00 * ld1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32 -# M3-NEXT: 3 6 1.00 * ld1 { v0.d }[0], [sp], #8 -# M3-NEXT: 2 5 0.50 * ld1r { v0.2d }, [sp], #8 -# M3-NEXT: 2 5 0.50 * ld1 { v0.2d }, [sp], #16 -# M3-NEXT: 3 5 1.00 * ld1 { v0.2d, v1.2d }, [sp], #32 -# M3-NEXT: 4 6 1.50 * ld1 { v0.2d, v1.2d, v2.2d }, [sp], #48 -# M3-NEXT: 5 6 2.00 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64 -# M3-NEXT: 3 7 1.00 * ld1 { v0.s }[0], [sp], x0 -# M3-NEXT: 2 5 0.50 * ld1r { v0.2s }, [sp], x0 -# M3-NEXT: 2 5 0.50 * ld1 { v0.2s }, [sp], x0 -# M3-NEXT: 3 5 1.00 * ld1 { v0.2s, v1.2s }, [sp], x0 -# M3-NEXT: 4 6 1.50 * ld1 { v0.2s, v1.2s, v2.2s }, [sp], x0 -# M3-NEXT: 5 6 2.00 * ld1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0 -# M3-NEXT: 3 6 1.00 * ld1 { v0.d }[0], [sp], x0 -# M3-NEXT: 2 5 0.50 * ld1r { v0.2d }, [sp], x0 -# M3-NEXT: 2 5 0.50 * ld1 { v0.2d }, [sp], x0 -# M3-NEXT: 3 5 1.00 * ld1 { v0.2d, v1.2d }, [sp], x0 -# M3-NEXT: 4 6 1.50 * ld1 { v0.2d, v1.2d, v2.2d }, [sp], x0 -# M3-NEXT: 5 6 2.00 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0 - -# M4-NEXT: 2 6 1.00 * ld1 { v0.s }[0], [sp] -# M4-NEXT: 1 5 0.50 * ld1r { v0.2s }, [sp] -# M4-NEXT: 1 5 0.50 * ld1 { v0.2s }, [sp] -# M4-NEXT: 2 5 1.00 * ld1 { v0.2s, v1.2s }, [sp] -# M4-NEXT: 3 6 1.50 * ld1 { v0.2s, v1.2s, v2.2s }, [sp] -# M4-NEXT: 4 6 2.00 * ld1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] -# M4-NEXT: 2 6 1.00 * ld1 { v0.d }[0], [sp] -# M4-NEXT: 1 5 0.50 * ld1r { v0.2d }, [sp] -# M4-NEXT: 1 5 0.50 * ld1 { v0.2d }, [sp] -# M4-NEXT: 2 5 1.00 * ld1 { v0.2d, v1.2d }, [sp] -# M4-NEXT: 3 6 1.50 * ld1 { v0.2d, v1.2d, v2.2d }, [sp] -# M4-NEXT: 4 6 2.00 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp] -# M4-NEXT: 3 6 1.00 * ld1 { v0.s }[0], [sp], #4 -# M4-NEXT: 2 5 0.50 * ld1r { v0.2s }, [sp], #4 -# M4-NEXT: 2 5 0.50 * ld1 { v0.2s }, [sp], #8 -# M4-NEXT: 3 5 1.00 * ld1 { v0.2s, v1.2s }, [sp], #16 -# M4-NEXT: 4 6 1.50 * ld1 { v0.2s, v1.2s, v2.2s }, [sp], #24 -# M4-NEXT: 5 6 2.00 * ld1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32 -# M4-NEXT: 3 6 1.00 * ld1 { v0.d }[0], [sp], #8 -# M4-NEXT: 2 5 0.50 * ld1r { v0.2d }, [sp], #8 -# M4-NEXT: 2 5 0.50 * ld1 { v0.2d }, [sp], #16 -# M4-NEXT: 3 5 1.00 * ld1 { v0.2d, v1.2d }, [sp], #32 -# M4-NEXT: 4 6 1.50 * ld1 { v0.2d, v1.2d, v2.2d }, [sp], #48 -# M4-NEXT: 5 6 2.00 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64 -# M4-NEXT: 3 6 1.00 * ld1 { v0.s }[0], [sp], x0 -# M4-NEXT: 2 5 0.50 * ld1r { v0.2s }, [sp], x0 -# M4-NEXT: 2 5 0.50 * ld1 { v0.2s }, [sp], x0 -# M4-NEXT: 3 5 1.00 * ld1 { v0.2s, v1.2s }, [sp], x0 -# M4-NEXT: 4 6 1.50 * ld1 { v0.2s, v1.2s, v2.2s }, [sp], x0 -# M4-NEXT: 5 6 2.00 * ld1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0 -# M4-NEXT: 3 6 1.00 * ld1 { v0.d }[0], [sp], x0 -# M4-NEXT: 2 5 0.50 * ld1r { v0.2d }, [sp], x0 -# M4-NEXT: 2 5 0.50 * ld1 { v0.2d }, [sp], x0 -# M4-NEXT: 3 5 1.00 * ld1 { v0.2d, v1.2d }, [sp], x0 -# M4-NEXT: 4 6 1.50 * ld1 { v0.2d, v1.2d, v2.2d }, [sp], x0 -# M4-NEXT: 5 6 2.00 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0 - -# M5-NEXT: 2 7 1.00 * ld1 { v0.s }[0], [sp] -# M5-NEXT: 1 6 0.50 * ld1r { v0.2s }, [sp] -# M5-NEXT: 1 6 0.50 * ld1 { v0.2s }, [sp] -# M5-NEXT: 2 6 1.00 * ld1 { v0.2s, v1.2s }, [sp] -# M5-NEXT: 3 7 1.50 * ld1 { v0.2s, v1.2s, v2.2s }, [sp] -# M5-NEXT: 4 7 2.00 * ld1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] -# M5-NEXT: 2 7 1.00 * ld1 { v0.d }[0], [sp] -# M5-NEXT: 1 6 0.50 * ld1r { v0.2d }, [sp] -# M5-NEXT: 1 6 0.50 * ld1 { v0.2d }, [sp] -# M5-NEXT: 2 6 1.00 * ld1 { v0.2d, v1.2d }, [sp] -# M5-NEXT: 3 7 1.50 * ld1 { v0.2d, v1.2d, v2.2d }, [sp] -# M5-NEXT: 4 7 2.00 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp] -# M5-NEXT: 3 7 1.00 * ld1 { v0.s }[0], [sp], #4 -# M5-NEXT: 2 6 0.50 * ld1r { v0.2s }, [sp], #4 -# M5-NEXT: 2 6 0.50 * ld1 { v0.2s }, [sp], #8 -# M5-NEXT: 3 6 1.00 * ld1 { v0.2s, v1.2s }, [sp], #16 -# M5-NEXT: 4 7 1.50 * ld1 { v0.2s, v1.2s, v2.2s }, [sp], #24 -# M5-NEXT: 5 7 2.00 * ld1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32 -# M5-NEXT: 3 7 1.00 * ld1 { v0.d }[0], [sp], #8 -# M5-NEXT: 2 6 0.50 * ld1r { v0.2d }, [sp], #8 -# M5-NEXT: 2 6 0.50 * ld1 { v0.2d }, [sp], #16 -# M5-NEXT: 3 6 1.00 * ld1 { v0.2d, v1.2d }, [sp], #32 -# M5-NEXT: 4 7 1.50 * ld1 { v0.2d, v1.2d, v2.2d }, [sp], #48 -# M5-NEXT: 5 7 2.00 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64 -# M5-NEXT: 3 7 1.00 * ld1 { v0.s }[0], [sp], x0 -# M5-NEXT: 2 6 0.50 * ld1r { v0.2s }, [sp], x0 -# M5-NEXT: 2 6 0.50 * ld1 { v0.2s }, [sp], x0 -# M5-NEXT: 3 6 1.00 * ld1 { v0.2s, v1.2s }, [sp], x0 -# M5-NEXT: 4 7 1.50 * ld1 { v0.2s, v1.2s, v2.2s }, [sp], x0 -# M5-NEXT: 5 7 2.00 * ld1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0 -# M5-NEXT: 3 7 1.00 * ld1 { v0.d }[0], [sp], x0 -# M5-NEXT: 2 6 0.50 * ld1r { v0.2d }, [sp], x0 -# M5-NEXT: 2 6 0.50 * ld1 { v0.2d }, [sp], x0 -# M5-NEXT: 3 6 1.00 * ld1 { v0.2d, v1.2d }, [sp], x0 -# M5-NEXT: 4 7 1.50 * ld1 { v0.2d, v1.2d, v2.2d }, [sp], x0 -# M5-NEXT: 5 7 2.00 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0 diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-ld2.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-ld2.s deleted file mode 100644 index 2ca640d..0000000 --- a/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-ld2.s +++ /dev/null @@ -1,118 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3 -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4 -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5 - -ld2 {v0.s, v1.s}[0], [sp] -ld2r {v0.2s, v1.2s}, [sp] -ld2 {v0.2s, v1.2s}, [sp] - -ld2 {v0.d, v1.d}[0], [sp] -ld2r {v0.2d, v1.2d}, [sp] -ld2 {v0.2d, v1.2d}, [sp] - -ld2 {v0.s, v1.s}[0], [sp], #8 -ld2r {v0.2s, v1.2s}, [sp], #8 -ld2 {v0.2s, v1.2s}, [sp], #16 - -ld2 {v0.d, v1.d}[0], [sp], #16 -ld2r {v0.2d, v1.2d}, [sp], #16 -ld2 {v0.2d, v1.2d}, [sp], #32 - -ld2 {v0.s, v1.s}[0], [sp], x0 -ld2r {v0.2s, v1.2s}, [sp], x0 -ld2 {v0.2s, v1.2s}, [sp], x0 - -ld2 {v0.d, v1.d}[0], [sp], x0 -ld2r {v0.2d, v1.2d}, [sp], x0 -ld2 {v0.2d, v1.2d}, [sp], x0 - -# ALL: Iterations: 100 -# ALL-NEXT: Instructions: 1800 - -# M3-NEXT: Total Cycles: 10003 -# M4-NEXT: Total Cycles: 9803 -# M5-NEXT: Total Cycles: 11103 - -# ALL-NEXT: Total uOps: 5400 - -# ALL: Dispatch Width: 6 - -# M3-NEXT: uOps Per Cycle: 0.54 -# M3-NEXT: IPC: 0.18 -# M3-NEXT: Block RThroughput: 42.0 - -# M4-NEXT: uOps Per Cycle: 0.55 -# M4-NEXT: IPC: 0.18 -# M4-NEXT: Block RThroughput: 30.0 - -# M5-NEXT: uOps Per Cycle: 0.49 -# M5-NEXT: IPC: 0.16 -# M5-NEXT: Block RThroughput: 45.0 - -# ALL: Instruction Info: -# ALL-NEXT: [1]: #uOps -# ALL-NEXT: [2]: Latency -# ALL-NEXT: [3]: RThroughput -# ALL-NEXT: [4]: MayLoad -# ALL-NEXT: [5]: MayStore -# ALL-NEXT: [6]: HasSideEffects (U) - -# ALL: [1] [2] [3] [4] [5] [6] Instructions: - -# M3-NEXT: 3 7 1.00 * ld2 { v0.s, v1.s }[0], [sp] -# M3-NEXT: 2 5 1.00 * ld2r { v0.2s, v1.2s }, [sp] -# M3-NEXT: 2 10 5.00 * ld2 { v0.2s, v1.2s }, [sp] -# M3-NEXT: 3 6 1.00 * ld2 { v0.d, v1.d }[0], [sp] -# M3-NEXT: 2 5 1.00 * ld2r { v0.2d, v1.2d }, [sp] -# M3-NEXT: 2 10 5.00 * ld2 { v0.2d, v1.2d }, [sp] -# M3-NEXT: 4 7 1.00 * ld2 { v0.s, v1.s }[0], [sp], #8 -# M3-NEXT: 3 5 1.00 * ld2r { v0.2s, v1.2s }, [sp], #8 -# M3-NEXT: 3 10 5.00 * ld2 { v0.2s, v1.2s }, [sp], #16 -# M3-NEXT: 4 6 1.00 * ld2 { v0.d, v1.d }[0], [sp], #16 -# M3-NEXT: 3 5 1.00 * ld2r { v0.2d, v1.2d }, [sp], #16 -# M3-NEXT: 3 10 5.00 * ld2 { v0.2d, v1.2d }, [sp], #32 -# M3-NEXT: 4 7 1.00 * ld2 { v0.s, v1.s }[0], [sp], x0 -# M3-NEXT: 3 5 1.00 * ld2r { v0.2s, v1.2s }, [sp], x0 -# M3-NEXT: 3 10 5.00 * ld2 { v0.2s, v1.2s }, [sp], x0 -# M3-NEXT: 4 6 1.00 * ld2 { v0.d, v1.d }[0], [sp], x0 -# M3-NEXT: 3 5 1.00 * ld2r { v0.2d, v1.2d }, [sp], x0 -# M3-NEXT: 3 10 5.00 * ld2 { v0.2d, v1.2d }, [sp], x0 - -# M4-NEXT: 3 6 1.00 * ld2 { v0.s, v1.s }[0], [sp] -# M4-NEXT: 2 5 1.00 * ld2r { v0.2s, v1.2s }, [sp] -# M4-NEXT: 2 10 3.00 * ld2 { v0.2s, v1.2s }, [sp] -# M4-NEXT: 3 6 1.00 * ld2 { v0.d, v1.d }[0], [sp] -# M4-NEXT: 2 5 1.00 * ld2r { v0.2d, v1.2d }, [sp] -# M4-NEXT: 2 10 3.00 * ld2 { v0.2d, v1.2d }, [sp] -# M4-NEXT: 4 6 1.00 * ld2 { v0.s, v1.s }[0], [sp], #8 -# M4-NEXT: 3 5 1.00 * ld2r { v0.2s, v1.2s }, [sp], #8 -# M4-NEXT: 3 10 3.00 * ld2 { v0.2s, v1.2s }, [sp], #16 -# M4-NEXT: 4 6 1.00 * ld2 { v0.d, v1.d }[0], [sp], #16 -# M4-NEXT: 3 5 1.00 * ld2r { v0.2d, v1.2d }, [sp], #16 -# M4-NEXT: 3 10 3.00 * ld2 { v0.2d, v1.2d }, [sp], #32 -# M4-NEXT: 4 6 1.00 * ld2 { v0.s, v1.s }[0], [sp], x0 -# M4-NEXT: 3 5 1.00 * ld2r { v0.2s, v1.2s }, [sp], x0 -# M4-NEXT: 3 10 3.00 * ld2 { v0.2s, v1.2s }, [sp], x0 -# M4-NEXT: 4 6 1.00 * ld2 { v0.d, v1.d }[0], [sp], x0 -# M4-NEXT: 3 5 1.00 * ld2r { v0.2d, v1.2d }, [sp], x0 -# M4-NEXT: 3 10 3.00 * ld2 { v0.2d, v1.2d }, [sp], x0 - -# M5-NEXT: 3 7 1.00 * ld2 { v0.s, v1.s }[0], [sp] -# M5-NEXT: 2 6 1.00 * ld2r { v0.2s, v1.2s }, [sp] -# M5-NEXT: 2 11 5.50 * ld2 { v0.2s, v1.2s }, [sp] -# M5-NEXT: 3 7 1.00 * ld2 { v0.d, v1.d }[0], [sp] -# M5-NEXT: 2 6 1.00 * ld2r { v0.2d, v1.2d }, [sp] -# M5-NEXT: 2 11 5.50 * ld2 { v0.2d, v1.2d }, [sp] -# M5-NEXT: 4 7 1.00 * ld2 { v0.s, v1.s }[0], [sp], #8 -# M5-NEXT: 3 6 1.00 * ld2r { v0.2s, v1.2s }, [sp], #8 -# M5-NEXT: 3 11 5.50 * ld2 { v0.2s, v1.2s }, [sp], #16 -# M5-NEXT: 4 7 1.00 * ld2 { v0.d, v1.d }[0], [sp], #16 -# M5-NEXT: 3 6 1.00 * ld2r { v0.2d, v1.2d }, [sp], #16 -# M5-NEXT: 3 11 5.50 * ld2 { v0.2d, v1.2d }, [sp], #32 -# M5-NEXT: 4 7 1.00 * ld2 { v0.s, v1.s }[0], [sp], x0 -# M5-NEXT: 3 6 1.00 * ld2r { v0.2s, v1.2s }, [sp], x0 -# M5-NEXT: 3 11 5.50 * ld2 { v0.2s, v1.2s }, [sp], x0 -# M5-NEXT: 4 7 1.00 * ld2 { v0.d, v1.d }[0], [sp], x0 -# M5-NEXT: 3 6 1.00 * ld2r { v0.2d, v1.2d }, [sp], x0 -# M5-NEXT: 3 11 5.50 * ld2 { v0.2d, v1.2d }, [sp], x0 diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-ld3.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-ld3.s deleted file mode 100644 index a6a8943..0000000 --- a/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-ld3.s +++ /dev/null @@ -1,118 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3 -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4 -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5 - -ld3 {v0.s, v1.s, v2.s}[0], [sp] -ld3r {v0.2s, v1.2s, v2.2s}, [sp] -ld3 {v0.2s, v1.2s, v2.2s}, [sp] - -ld3 {v0.d, v1.d, v2.d}[0], [sp] -ld3r {v0.2d, v1.2d, v2.2d}, [sp] -ld3 {v0.2d, v1.2d, v2.2d}, [sp] - -ld3 {v0.s, v1.s, v2.s}[0], [sp], #12 -ld3r {v0.2s, v1.2s, v2.2s}, [sp], #12 -ld3 {v0.2s, v1.2s, v2.2s}, [sp], #24 - -ld3 {v0.d, v1.d, v2.d}[0], [sp], #24 -ld3r {v0.2d, v1.2d, v2.2d}, [sp], #24 -ld3 {v0.2d, v1.2d, v2.2d}, [sp], #48 - -ld3 {v0.s, v1.s, v2.s}[0], [sp], x0 -ld3r {v0.2s, v1.2s, v2.2s}, [sp], x0 -ld3 {v0.2s, v1.2s, v2.2s}, [sp], x0 - -ld3 {v0.d, v1.d, v2.d}[0], [sp], x0 -ld3r {v0.2d, v1.2d, v2.2d}, [sp], x0 -ld3 {v0.2d, v1.2d, v2.2d}, [sp], x0 - -# ALL: Iterations: 100 -# ALL-NEXT: Instructions: 1800 - -# M3-NEXT: Total Cycles: 12501 -# M4-NEXT: Total Cycles: 11804 -# M5-NEXT: Total Cycles: 12903 - -# ALL-NEXT: Total uOps: 7500 - -# ALL: Dispatch Width: 6 - -# M3-NEXT: uOps Per Cycle: 0.60 -# M3-NEXT: IPC: 0.14 -# M3-NEXT: Block RThroughput: 84.0 - -# M4-NEXT: uOps Per Cycle: 0.64 -# M4-NEXT: IPC: 0.15 -# M4-NEXT: Block RThroughput: 54.0 - -# M5-NEXT: uOps Per Cycle: 0.58 -# M5-NEXT: IPC: 0.14 -# M5-NEXT: Block RThroughput: 22.5 - -# ALL: Instruction Info: -# ALL-NEXT: [1]: #uOps -# ALL-NEXT: [2]: Latency -# ALL-NEXT: [3]: RThroughput -# ALL-NEXT: [4]: MayLoad -# ALL-NEXT: [5]: MayStore -# ALL-NEXT: [6]: HasSideEffects (U) - -# ALL: [1] [2] [3] [4] [5] [6] Instructions: - -# M3-NEXT: 4 7 1.00 * ld3 { v0.s, v1.s, v2.s }[0], [sp] -# M3-NEXT: 3 6 1.50 * ld3r { v0.2s, v1.2s, v2.2s }, [sp] -# M3-NEXT: 3 12 9.00 * ld3 { v0.2s, v1.2s, v2.2s }, [sp] -# M3-NEXT: 5 6 6.00 * ld3 { v0.d, v1.d, v2.d }[0], [sp] -# M3-NEXT: 3 6 1.50 * ld3r { v0.2d, v1.2d, v2.2d }, [sp] -# M3-NEXT: 3 12 9.00 * ld3 { v0.2d, v1.2d, v2.2d }, [sp] -# M3-NEXT: 5 7 1.00 * ld3 { v0.s, v1.s, v2.s }[0], [sp], #12 -# M3-NEXT: 4 6 1.50 * ld3r { v0.2s, v1.2s, v2.2s }, [sp], #12 -# M3-NEXT: 4 12 9.00 * ld3 { v0.2s, v1.2s, v2.2s }, [sp], #24 -# M3-NEXT: 6 6 6.00 * ld3 { v0.d, v1.d, v2.d }[0], [sp], #24 -# M3-NEXT: 4 6 1.50 * ld3r { v0.2d, v1.2d, v2.2d }, [sp], #24 -# M3-NEXT: 4 12 9.00 * ld3 { v0.2d, v1.2d, v2.2d }, [sp], #48 -# M3-NEXT: 5 7 1.00 * ld3 { v0.s, v1.s, v2.s }[0], [sp], x0 -# M3-NEXT: 4 6 1.50 * ld3r { v0.2s, v1.2s, v2.2s }, [sp], x0 -# M3-NEXT: 4 12 9.00 * ld3 { v0.2s, v1.2s, v2.2s }, [sp], x0 -# M3-NEXT: 6 6 6.00 * ld3 { v0.d, v1.d, v2.d }[0], [sp], x0 -# M3-NEXT: 4 6 1.50 * ld3r { v0.2d, v1.2d, v2.2d }, [sp], x0 -# M3-NEXT: 4 12 9.00 * ld3 { v0.2d, v1.2d, v2.2d }, [sp], x0 - -# M4-NEXT: 4 7 1.50 * ld3 { v0.s, v1.s, v2.s }[0], [sp] -# M4-NEXT: 3 6 1.50 * ld3r { v0.2s, v1.2s, v2.2s }, [sp] -# M4-NEXT: 3 12 4.50 * ld3 { v0.2s, v1.2s, v2.2s }, [sp] -# M4-NEXT: 5 7 4.50 * ld3 { v0.d, v1.d, v2.d }[0], [sp] -# M4-NEXT: 3 6 1.50 * ld3r { v0.2d, v1.2d, v2.2d }, [sp] -# M4-NEXT: 3 12 4.50 * ld3 { v0.2d, v1.2d, v2.2d }, [sp] -# M4-NEXT: 5 7 1.50 * ld3 { v0.s, v1.s, v2.s }[0], [sp], #12 -# M4-NEXT: 4 6 1.50 * ld3r { v0.2s, v1.2s, v2.2s }, [sp], #12 -# M4-NEXT: 4 12 4.50 * ld3 { v0.2s, v1.2s, v2.2s }, [sp], #24 -# M4-NEXT: 6 7 4.50 * ld3 { v0.d, v1.d, v2.d }[0], [sp], #24 -# M4-NEXT: 4 6 1.50 * ld3r { v0.2d, v1.2d, v2.2d }, [sp], #24 -# M4-NEXT: 4 12 4.50 * ld3 { v0.2d, v1.2d, v2.2d }, [sp], #48 -# M4-NEXT: 5 7 1.50 * ld3 { v0.s, v1.s, v2.s }[0], [sp], x0 -# M4-NEXT: 4 6 1.50 * ld3r { v0.2s, v1.2s, v2.2s }, [sp], x0 -# M4-NEXT: 4 12 4.50 * ld3 { v0.2s, v1.2s, v2.2s }, [sp], x0 -# M4-NEXT: 6 7 4.50 * ld3 { v0.d, v1.d, v2.d }[0], [sp], x0 -# M4-NEXT: 4 6 1.50 * ld3r { v0.2d, v1.2d, v2.2d }, [sp], x0 -# M4-NEXT: 4 12 4.50 * ld3 { v0.2d, v1.2d, v2.2d }, [sp], x0 - -# M5-NEXT: 4 8 1.50 * ld3 { v0.s, v1.s, v2.s }[0], [sp] -# M5-NEXT: 3 7 1.50 * ld3r { v0.2s, v1.2s, v2.2s }, [sp] -# M5-NEXT: 3 13 1.50 * ld3 { v0.2s, v1.2s, v2.2s }, [sp] -# M5-NEXT: 5 8 1.50 * ld3 { v0.d, v1.d, v2.d }[0], [sp] -# M5-NEXT: 3 7 1.50 * ld3r { v0.2d, v1.2d, v2.2d }, [sp] -# M5-NEXT: 3 13 1.50 * ld3 { v0.2d, v1.2d, v2.2d }, [sp] -# M5-NEXT: 5 8 1.50 * ld3 { v0.s, v1.s, v2.s }[0], [sp], #12 -# M5-NEXT: 4 7 1.50 * ld3r { v0.2s, v1.2s, v2.2s }, [sp], #12 -# M5-NEXT: 4 13 1.50 * ld3 { v0.2s, v1.2s, v2.2s }, [sp], #24 -# M5-NEXT: 6 8 1.50 * ld3 { v0.d, v1.d, v2.d }[0], [sp], #24 -# M5-NEXT: 4 7 1.50 * ld3r { v0.2d, v1.2d, v2.2d }, [sp], #24 -# M5-NEXT: 4 13 1.50 * ld3 { v0.2d, v1.2d, v2.2d }, [sp], #48 -# M5-NEXT: 5 8 1.50 * ld3 { v0.s, v1.s, v2.s }[0], [sp], x0 -# M5-NEXT: 4 7 1.50 * ld3r { v0.2s, v1.2s, v2.2s }, [sp], x0 -# M5-NEXT: 4 13 1.50 * ld3 { v0.2s, v1.2s, v2.2s }, [sp], x0 -# M5-NEXT: 6 8 1.50 * ld3 { v0.d, v1.d, v2.d }[0], [sp], x0 -# M5-NEXT: 4 7 1.50 * ld3r { v0.2d, v1.2d, v2.2d }, [sp], x0 -# M5-NEXT: 4 13 1.50 * ld3 { v0.2d, v1.2d, v2.2d }, [sp], x0 diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-ld4.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-ld4.s deleted file mode 100644 index c5f2c9b..0000000 --- a/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-ld4.s +++ /dev/null @@ -1,118 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3 -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4 -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5 - -ld4 {v0.s, v1.s, v2.s, v3.s}[0], [sp] -ld4r {v0.2s, v1.2s, v2.2s, v3.2s}, [sp] -ld4 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp] - -ld4 {v0.d, v1.d, v2.d, v3.d}[0], [sp] -ld4r {v0.2d, v1.2d, v2.2d, v3.2d}, [sp] -ld4 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp] - -ld4 {v0.s, v1.s, v2.s, v3.s}[0], [sp], #16 -ld4r {v0.2s, v1.2s, v2.2s, v3.2s}, [sp], #16 -ld4 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp], #32 - -ld4 {v0.d, v1.d, v2.d, v3.d}[0], [sp], #32 -ld4r {v0.2d, v1.2d, v2.2d, v3.2d}, [sp], #32 -ld4 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp], #64 - -ld4 {v0.s, v1.s, v2.s, v3.s}[0], [sp], x0 -ld4r {v0.2s, v1.2s, v2.2s, v3.2s}, [sp], x0 -ld4 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp], x0 - -ld4 {v0.d, v1.d, v2.d, v3.d}[0], [sp], x0 -ld4r {v0.2d, v1.2d, v2.2d, v3.2d}, [sp], x0 -ld4 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp], x0 - -# ALL: Iterations: 100 -# ALL-NEXT: Instructions: 1800 - -# M3-NEXT: Total Cycles: 15598 -# M4-NEXT: Total Cycles: 13004 -# M5-NEXT: Total Cycles: 14304 - -# ALL-NEXT: Total uOps: 9300 - -# ALL: Dispatch Width: 6 - -# M3-NEXT: uOps Per Cycle: 0.60 -# M3-NEXT: IPC: 0.12 -# M3-NEXT: Block RThroughput: 108.0 - -# M4-NEXT: uOps Per Cycle: 0.72 -# M4-NEXT: IPC: 0.14 -# M4-NEXT: Block RThroughput: 61.5 - -# M5-NEXT: uOps Per Cycle: 0.65 -# M5-NEXT: IPC: 0.13 -# M5-NEXT: Block RThroughput: 40.5 - -# ALL: Instruction Info: -# ALL-NEXT: [1]: #uOps -# ALL-NEXT: [2]: Latency -# ALL-NEXT: [3]: RThroughput -# ALL-NEXT: [4]: MayLoad -# ALL-NEXT: [5]: MayStore -# ALL-NEXT: [6]: HasSideEffects (U) - -# ALL: [1] [2] [3] [4] [5] [6] Instructions: - -# M3-NEXT: 5 9 2.00 * ld4 { v0.s, v1.s, v2.s, v3.s }[0], [sp] -# M3-NEXT: 4 6 2.00 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] -# M3-NEXT: 4 14 12.00 * ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] -# M3-NEXT: 6 7 6.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[0], [sp] -# M3-NEXT: 4 6 2.00 * ld4r { v0.2d, v1.2d, v2.2d, v3.2d }, [sp] -# M3-NEXT: 4 14 12.00 * ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp] -# M3-NEXT: 6 9 2.00 * ld4 { v0.s, v1.s, v2.s, v3.s }[0], [sp], #16 -# M3-NEXT: 5 6 2.00 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #16 -# M3-NEXT: 5 14 12.00 * ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32 -# M3-NEXT: 7 7 6.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[0], [sp], #32 -# M3-NEXT: 5 6 2.00 * ld4r { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #32 -# M3-NEXT: 5 14 12.00 * ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64 -# M3-NEXT: 6 9 2.00 * ld4 { v0.s, v1.s, v2.s, v3.s }[0], [sp], x0 -# M3-NEXT: 5 6 2.00 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0 -# M3-NEXT: 5 14 12.00 * ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0 -# M3-NEXT: 7 7 6.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[0], [sp], x0 -# M3-NEXT: 5 6 2.00 * ld4r { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0 -# M3-NEXT: 5 14 12.00 * ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0 - -# M4-NEXT: 5 7 2.00 * ld4 { v0.s, v1.s, v2.s, v3.s }[0], [sp] -# M4-NEXT: 4 6 2.00 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] -# M4-NEXT: 4 14 6.00 * ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] -# M4-NEXT: 6 7 3.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[0], [sp] -# M4-NEXT: 4 6 2.00 * ld4r { v0.2d, v1.2d, v2.2d, v3.2d }, [sp] -# M4-NEXT: 4 14 6.00 * ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp] -# M4-NEXT: 6 7 2.00 * ld4 { v0.s, v1.s, v2.s, v3.s }[0], [sp], #16 -# M4-NEXT: 5 6 2.00 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #16 -# M4-NEXT: 5 14 6.00 * ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32 -# M4-NEXT: 7 7 3.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[0], [sp], #32 -# M4-NEXT: 5 6 2.00 * ld4r { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #32 -# M4-NEXT: 5 14 6.00 * ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64 -# M4-NEXT: 6 7 2.00 * ld4 { v0.s, v1.s, v2.s, v3.s }[0], [sp], x0 -# M4-NEXT: 5 6 2.00 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0 -# M4-NEXT: 5 14 6.00 * ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0 -# M4-NEXT: 7 7 3.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[0], [sp], x0 -# M4-NEXT: 5 6 2.00 * ld4r { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0 -# M4-NEXT: 5 14 6.00 * ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0 - -# M5-NEXT: 5 8 2.00 * ld4 { v0.s, v1.s, v2.s, v3.s }[0], [sp] -# M5-NEXT: 4 7 2.00 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] -# M5-NEXT: 4 15 4.00 * ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] -# M5-NEXT: 6 8 2.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[0], [sp] -# M5-NEXT: 4 7 2.00 * ld4r { v0.2d, v1.2d, v2.2d, v3.2d }, [sp] -# M5-NEXT: 4 15 4.00 * ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp] -# M5-NEXT: 6 8 2.00 * ld4 { v0.s, v1.s, v2.s, v3.s }[0], [sp], #16 -# M5-NEXT: 5 7 2.00 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #16 -# M5-NEXT: 5 15 4.00 * ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32 -# M5-NEXT: 7 8 2.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[0], [sp], #32 -# M5-NEXT: 5 7 2.00 * ld4r { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #32 -# M5-NEXT: 5 15 4.00 * ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64 -# M5-NEXT: 6 8 2.00 * ld4 { v0.s, v1.s, v2.s, v3.s }[0], [sp], x0 -# M5-NEXT: 5 7 2.00 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0 -# M5-NEXT: 5 15 4.00 * ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0 -# M5-NEXT: 7 8 2.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[0], [sp], x0 -# M5-NEXT: 5 7 2.00 * ld4r { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0 -# M5-NEXT: 5 15 4.00 * ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0 diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st1.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st1.s deleted file mode 100644 index 81e5fe8..0000000 --- a/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st1.s +++ /dev/null @@ -1,169 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3 -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4 -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5 - -st1 {v0.s}[0], [sp] -st1 {v0.2s}, [sp] -st1 {v0.2s, v1.2s}, [sp] -st1 {v0.2s, v1.2s, v2.2s}, [sp] -st1 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp] - -st1 {v0.d}[0], [sp] -st1 {v0.2d}, [sp] -st1 {v0.2d, v1.2d}, [sp] -st1 {v0.2d, v1.2d, v2.2d}, [sp] -st1 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp] - -st1 {v0.s}[0], [sp], #4 -st1 {v0.2s}, [sp], #8 -st1 {v0.2s, v1.2s}, [sp], #16 -st1 {v0.2s, v1.2s, v2.2s}, [sp], #24 -st1 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp], #32 - -st1 {v0.d}[0], [sp], #8 -st1 {v0.2d}, [sp], #16 -st1 {v0.2d, v1.2d}, [sp], #32 -st1 {v0.2d, v1.2d, v2.2d}, [sp], #48 -st1 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp], #64 - -st1 {v0.s}[0], [sp], x0 -st1 {v0.2s}, [sp], x0 -st1 {v0.2s, v1.2s}, [sp], x0 -st1 {v0.2s, v1.2s, v2.2s}, [sp], x0 -st1 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp], x0 - -st1 {v0.d}[0], [sp], x0 -st1 {v0.2d}, [sp], x0 -st1 {v0.2d, v1.2d}, [sp], x0 -st1 {v0.2d, v1.2d, v2.2d}, [sp], x0 -st1 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp], x0 - -# ALL: Iterations: 100 -# ALL-NEXT: Instructions: 3000 - -# M3-NEXT: Total Cycles: 10203 -# M3-NEXT: Total uOps: 8400 - -# M4-NEXT: Total Cycles: 6603 -# M4-NEXT: Total uOps: 8600 - -# M5-NEXT: Total Cycles: 6603 -# M5-NEXT: Total uOps: 8600 - -# ALL: Dispatch Width: 6 - -# M3-NEXT: uOps Per Cycle: 0.82 -# M3-NEXT: IPC: 0.29 -# M3-NEXT: Block RThroughput: 72.0 - -# M4-NEXT: uOps Per Cycle: 1.30 -# M4-NEXT: IPC: 0.45 -# M4-NEXT: Block RThroughput: 33.0 - -# M5-NEXT: uOps Per Cycle: 1.30 -# M5-NEXT: IPC: 0.45 -# M5-NEXT: Block RThroughput: 33.0 - -# ALL: Instruction Info: -# ALL-NEXT: [1]: #uOps -# ALL-NEXT: [2]: Latency -# ALL-NEXT: [3]: RThroughput -# ALL-NEXT: [4]: MayLoad -# ALL-NEXT: [5]: MayStore -# ALL-NEXT: [6]: HasSideEffects (U) - -# ALL: [1] [2] [3] [4] [5] [6] Instructions: - -# M3-NEXT: 4 7 3.00 * st1 { v0.s }[0], [sp] -# M3-NEXT: 1 1 1.00 * st1 { v0.2s }, [sp] -# M3-NEXT: 2 2 2.00 * st1 { v0.2s, v1.2s }, [sp] -# M3-NEXT: 3 3 3.00 * st1 { v0.2s, v1.2s, v2.2s }, [sp] -# M3-NEXT: 4 4 4.00 * st1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] -# M3-NEXT: 4 7 3.00 * st1 { v0.d }[0], [sp] -# M3-NEXT: 1 1 1.00 * st1 { v0.2d }, [sp] -# M3-NEXT: 2 2 2.00 * st1 { v0.2d, v1.2d }, [sp] -# M3-NEXT: 3 3 3.00 * st1 { v0.2d, v1.2d, v2.2d }, [sp] -# M3-NEXT: 4 4 4.00 * st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp] -# M3-NEXT: 4 7 3.00 * st1 { v0.s }[0], [sp], #4 -# M3-NEXT: 1 1 1.00 * st1 { v0.2s }, [sp], #8 -# M3-NEXT: 2 2 2.00 * st1 { v0.2s, v1.2s }, [sp], #16 -# M3-NEXT: 3 3 3.00 * st1 { v0.2s, v1.2s, v2.2s }, [sp], #24 -# M3-NEXT: 4 4 4.00 * st1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32 -# M3-NEXT: 4 7 3.00 * st1 { v0.d }[0], [sp], #8 -# M3-NEXT: 1 1 1.00 * st1 { v0.2d }, [sp], #16 -# M3-NEXT: 2 2 2.00 * st1 { v0.2d, v1.2d }, [sp], #32 -# M3-NEXT: 3 3 3.00 * st1 { v0.2d, v1.2d, v2.2d }, [sp], #48 -# M3-NEXT: 4 4 4.00 * st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64 -# M3-NEXT: 4 7 3.00 * st1 { v0.s }[0], [sp], x0 -# M3-NEXT: 1 1 1.00 * st1 { v0.2s }, [sp], x0 -# M3-NEXT: 2 2 2.00 * st1 { v0.2s, v1.2s }, [sp], x0 -# M3-NEXT: 3 3 3.00 * st1 { v0.2s, v1.2s, v2.2s }, [sp], x0 -# M3-NEXT: 4 4 4.00 * st1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0 -# M3-NEXT: 4 7 3.00 * st1 { v0.d }[0], [sp], x0 -# M3-NEXT: 1 1 1.00 * st1 { v0.2d }, [sp], x0 -# M3-NEXT: 2 2 2.00 * st1 { v0.2d, v1.2d }, [sp], x0 -# M3-NEXT: 3 3 3.00 * st1 { v0.2d, v1.2d, v2.2d }, [sp], x0 -# M3-NEXT: 4 4 4.00 * st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0 - -# M4-NEXT: 1 1 0.50 * st1 { v0.s }[0], [sp] -# M4-NEXT: 1 1 0.50 * st1 { v0.2s }, [sp] -# M4-NEXT: 2 2 1.00 * st1 { v0.2s, v1.2s }, [sp] -# M4-NEXT: 3 3 1.50 * st1 { v0.2s, v1.2s, v2.2s }, [sp] -# M4-NEXT: 4 4 2.00 * st1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] -# M4-NEXT: 1 1 0.50 * st1 { v0.d }[0], [sp] -# M4-NEXT: 1 1 0.50 * st1 { v0.2d }, [sp] -# M4-NEXT: 2 2 1.00 * st1 { v0.2d, v1.2d }, [sp] -# M4-NEXT: 3 3 1.50 * st1 { v0.2d, v1.2d, v2.2d }, [sp] -# M4-NEXT: 4 4 2.00 * st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp] -# M4-NEXT: 2 1 0.50 * st1 { v0.s }[0], [sp], #4 -# M4-NEXT: 2 1 0.50 * st1 { v0.2s }, [sp], #8 -# M4-NEXT: 3 2 1.00 * st1 { v0.2s, v1.2s }, [sp], #16 -# M4-NEXT: 4 3 1.50 * st1 { v0.2s, v1.2s, v2.2s }, [sp], #24 -# M4-NEXT: 5 4 2.00 * st1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32 -# M4-NEXT: 2 1 0.50 * st1 { v0.d }[0], [sp], #8 -# M4-NEXT: 2 1 0.50 * st1 { v0.2d }, [sp], #16 -# M4-NEXT: 3 2 1.00 * st1 { v0.2d, v1.2d }, [sp], #32 -# M4-NEXT: 4 3 1.50 * st1 { v0.2d, v1.2d, v2.2d }, [sp], #48 -# M4-NEXT: 5 4 2.00 * st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64 -# M4-NEXT: 2 1 0.50 * st1 { v0.s }[0], [sp], x0 -# M4-NEXT: 2 1 0.50 * st1 { v0.2s }, [sp], x0 -# M4-NEXT: 3 2 1.00 * st1 { v0.2s, v1.2s }, [sp], x0 -# M4-NEXT: 4 3 1.50 * st1 { v0.2s, v1.2s, v2.2s }, [sp], x0 -# M4-NEXT: 5 4 2.00 * st1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0 -# M4-NEXT: 2 1 0.50 * st1 { v0.d }[0], [sp], x0 -# M4-NEXT: 2 1 0.50 * st1 { v0.2d }, [sp], x0 -# M4-NEXT: 3 2 1.00 * st1 { v0.2d, v1.2d }, [sp], x0 -# M4-NEXT: 4 3 1.50 * st1 { v0.2d, v1.2d, v2.2d }, [sp], x0 -# M4-NEXT: 5 4 2.00 * st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0 - -# M5-NEXT: 1 1 0.50 * st1 { v0.s }[0], [sp] -# M5-NEXT: 1 1 0.50 * st1 { v0.2s }, [sp] -# M5-NEXT: 2 2 1.00 * st1 { v0.2s, v1.2s }, [sp] -# M5-NEXT: 3 3 1.50 * st1 { v0.2s, v1.2s, v2.2s }, [sp] -# M5-NEXT: 4 4 2.00 * st1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] -# M5-NEXT: 1 1 0.50 * st1 { v0.d }[0], [sp] -# M5-NEXT: 1 1 0.50 * st1 { v0.2d }, [sp] -# M5-NEXT: 2 2 1.00 * st1 { v0.2d, v1.2d }, [sp] -# M5-NEXT: 3 3 1.50 * st1 { v0.2d, v1.2d, v2.2d }, [sp] -# M5-NEXT: 4 4 2.00 * st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp] -# M5-NEXT: 2 1 0.50 * st1 { v0.s }[0], [sp], #4 -# M5-NEXT: 2 1 0.50 * st1 { v0.2s }, [sp], #8 -# M5-NEXT: 3 2 1.00 * st1 { v0.2s, v1.2s }, [sp], #16 -# M5-NEXT: 4 3 1.50 * st1 { v0.2s, v1.2s, v2.2s }, [sp], #24 -# M5-NEXT: 5 4 2.00 * st1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32 -# M5-NEXT: 2 1 0.50 * st1 { v0.d }[0], [sp], #8 -# M5-NEXT: 2 1 0.50 * st1 { v0.2d }, [sp], #16 -# M5-NEXT: 3 2 1.00 * st1 { v0.2d, v1.2d }, [sp], #32 -# M5-NEXT: 4 3 1.50 * st1 { v0.2d, v1.2d, v2.2d }, [sp], #48 -# M5-NEXT: 5 4 2.00 * st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64 -# M5-NEXT: 2 1 0.50 * st1 { v0.s }[0], [sp], x0 -# M5-NEXT: 2 1 0.50 * st1 { v0.2s }, [sp], x0 -# M5-NEXT: 3 2 1.00 * st1 { v0.2s, v1.2s }, [sp], x0 -# M5-NEXT: 4 3 1.50 * st1 { v0.2s, v1.2s, v2.2s }, [sp], x0 -# M5-NEXT: 5 4 2.00 * st1 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0 -# M5-NEXT: 2 1 0.50 * st1 { v0.d }[0], [sp], x0 -# M5-NEXT: 2 1 0.50 * st1 { v0.2d }, [sp], x0 -# M5-NEXT: 3 2 1.00 * st1 { v0.2d, v1.2d }, [sp], x0 -# M5-NEXT: 4 3 1.50 * st1 { v0.2d, v1.2d, v2.2d }, [sp], x0 -# M5-NEXT: 5 4 2.00 * st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0 diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st2.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st2.s deleted file mode 100644 index 9506241..0000000 --- a/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st2.s +++ /dev/null @@ -1,97 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3 -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4 -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5 - -st2 {v0.s, v1.s}[0], [sp] -st2 {v0.2s, v1.2s}, [sp] - -st2 {v0.d, v1.d}[0], [sp] -st2 {v0.2d, v1.2d}, [sp] - -st2 {v0.s, v1.s}[0], [sp], #8 -st2 {v0.2s, v1.2s}, [sp], #16 - -st2 {v0.d, v1.d}[0], [sp], #16 -st2 {v0.2d, v1.2d}, [sp], #32 - -st2 {v0.s, v1.s}[0], [sp], x0 -st2 {v0.2s, v1.2s}, [sp], x0 - -st2 {v0.d, v1.d}[0], [sp], x0 -st2 {v0.2d, v1.2d}, [sp], x0 - -# ALL: Iterations: 100 -# ALL-NEXT: Instructions: 1200 - -# M3-NEXT: Total Cycles: 8703 -# M3-NEXT: Total uOps: 5400 - -# M4-NEXT: Total Cycles: 2403 -# M4-NEXT: Total uOps: 2300 - -# M5-NEXT: Total Cycles: 2403 -# M5-NEXT: Total uOps: 2000 - -# ALL: Dispatch Width: 6 - -# M3-NEXT: uOps Per Cycle: 0.62 -# M3-NEXT: IPC: 0.14 -# M3-NEXT: Block RThroughput: 40.5 - -# M4-NEXT: uOps Per Cycle: 0.96 -# M4-NEXT: IPC: 0.50 -# M4-NEXT: Block RThroughput: 7.5 - -# M5-NEXT: uOps Per Cycle: 0.83 -# M5-NEXT: IPC: 0.50 -# M5-NEXT: Block RThroughput: 7.5 - -# ALL: Instruction Info: -# ALL-NEXT: [1]: #uOps -# ALL-NEXT: [2]: Latency -# ALL-NEXT: [3]: RThroughput -# ALL-NEXT: [4]: MayLoad -# ALL-NEXT: [5]: MayStore -# ALL-NEXT: [6]: HasSideEffects (U) - -# ALL: [1] [2] [3] [4] [5] [6] Instructions: - -# M3-NEXT: 4 7 3.00 * st2 { v0.s, v1.s }[0], [sp] -# M3-NEXT: 4 7 3.00 * st2 { v0.2s, v1.2s }, [sp] -# M3-NEXT: 4 7 3.00 * st2 { v0.d, v1.d }[0], [sp] -# M3-NEXT: 6 8 4.50 * st2 { v0.2d, v1.2d }, [sp] -# M3-NEXT: 4 7 3.00 * st2 { v0.s, v1.s }[0], [sp], #8 -# M3-NEXT: 4 7 3.00 * st2 { v0.2s, v1.2s }, [sp], #16 -# M3-NEXT: 4 7 3.00 * st2 { v0.d, v1.d }[0], [sp], #16 -# M3-NEXT: 6 8 4.50 * st2 { v0.2d, v1.2d }, [sp], #32 -# M3-NEXT: 4 7 3.00 * st2 { v0.s, v1.s }[0], [sp], x0 -# M3-NEXT: 4 7 3.00 * st2 { v0.2s, v1.2s }, [sp], x0 -# M3-NEXT: 4 7 3.00 * st2 { v0.d, v1.d }[0], [sp], x0 -# M3-NEXT: 6 8 4.50 * st2 { v0.2d, v1.2d }, [sp], x0 - -# M4-NEXT: 1 2 0.50 * st2 { v0.s, v1.s }[0], [sp] -# M4-NEXT: 1 2 0.50 * st2 { v0.2s, v1.2s }, [sp] -# M4-NEXT: 1 2 0.50 * st2 { v0.d, v1.d }[0], [sp] -# M4-NEXT: 2 2 1.00 * st2 { v0.2d, v1.2d }, [sp] -# M4-NEXT: 2 2 0.50 * st2 { v0.s, v1.s }[0], [sp], #8 -# M4-NEXT: 2 2 0.50 * st2 { v0.2s, v1.2s }, [sp], #16 -# M4-NEXT: 2 2 0.50 * st2 { v0.d, v1.d }[0], [sp], #16 -# M4-NEXT: 3 2 1.00 * st2 { v0.2d, v1.2d }, [sp], #32 -# M4-NEXT: 2 2 0.50 * st2 { v0.s, v1.s }[0], [sp], x0 -# M4-NEXT: 2 2 0.50 * st2 { v0.2s, v1.2s }, [sp], x0 -# M4-NEXT: 2 2 0.50 * st2 { v0.d, v1.d }[0], [sp], x0 -# M4-NEXT: 3 2 1.00 * st2 { v0.2d, v1.2d }, [sp], x0 - -# M5-NEXT: 1 2 0.50 * st2 { v0.s, v1.s }[0], [sp] -# M5-NEXT: 1 2 0.50 * st2 { v0.2s, v1.2s }, [sp] -# M5-NEXT: 1 2 0.50 * st2 { v0.d, v1.d }[0], [sp] -# M5-NEXT: 1 2 1.00 * st2 { v0.2d, v1.2d }, [sp] -# M5-NEXT: 2 2 0.50 * st2 { v0.s, v1.s }[0], [sp], #8 -# M5-NEXT: 2 2 0.50 * st2 { v0.2s, v1.2s }, [sp], #16 -# M5-NEXT: 2 2 0.50 * st2 { v0.d, v1.d }[0], [sp], #16 -# M5-NEXT: 2 2 1.00 * st2 { v0.2d, v1.2d }, [sp], #32 -# M5-NEXT: 2 2 0.50 * st2 { v0.s, v1.s }[0], [sp], x0 -# M5-NEXT: 2 2 0.50 * st2 { v0.2s, v1.2s }, [sp], x0 -# M5-NEXT: 2 2 0.50 * st2 { v0.d, v1.d }[0], [sp], x0 -# M5-NEXT: 2 2 1.00 * st2 { v0.2d, v1.2d }, [sp], x0 diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st3.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st3.s deleted file mode 100644 index 4de5213d..0000000 --- a/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st3.s +++ /dev/null @@ -1,97 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3 -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4 -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5 - -st3 {v0.s, v1.s, v2.s}[0], [sp] -st3 {v0.2s, v1.2s, v2.2s}, [sp] - -st3 {v0.d, v1.d, v2.d}[0], [sp] -st3 {v0.2d, v1.2d, v2.2d}, [sp] - -st3 {v0.s, v1.s, v2.s}[0], [sp], #12 -st3 {v0.2s, v1.2s, v2.2s}, [sp], #24 - -st3 {v0.d, v1.d, v2.d}[0], [sp], #24 -st3 {v0.2d, v1.2d, v2.2d}, [sp], #48 - -st3 {v0.s, v1.s, v2.s}[0], [sp], x0 -st3 {v0.2s, v1.2s, v2.2s}, [sp], x0 - -st3 {v0.d, v1.d, v2.d}[0], [sp], x0 -st3 {v0.2d, v1.2d, v2.2d}, [sp], x0 - -# ALL: Iterations: 100 -# ALL-NEXT: Instructions: 1200 - -# M3-NEXT: Total Cycles: 18003 -# M3-NEXT: Total uOps: 8400 - -# M4-NEXT: Total Cycles: 3903 -# M4-NEXT: Total uOps: 5000 - -# M5-NEXT: Total Cycles: 3603 -# M5-NEXT: Total uOps: 4400 - -# ALL: Dispatch Width: 6 - -# M3-NEXT: uOps Per Cycle: 0.47 -# M3-NEXT: IPC: 0.07 -# M3-NEXT: Block RThroughput: 72.0 - -# M4-NEXT: uOps Per Cycle: 1.28 -# M4-NEXT: IPC: 0.31 -# M4-NEXT: Block RThroughput: 21.0 - -# M5-NEXT: uOps Per Cycle: 1.22 -# M5-NEXT: IPC: 0.33 -# M5-NEXT: Block RThroughput: 10.5 - -# ALL: Instruction Info: -# ALL-NEXT: [1]: #uOps -# ALL-NEXT: [2]: Latency -# ALL-NEXT: [3]: RThroughput -# ALL-NEXT: [4]: MayLoad -# ALL-NEXT: [5]: MayStore -# ALL-NEXT: [6]: HasSideEffects (U) - -# ALL: [1] [2] [3] [4] [5] [6] Instructions: - -# M3-NEXT: 5 14 4.50 * st3 { v0.s, v1.s, v2.s }[0], [sp] -# M3-NEXT: 7 15 6.00 * st3 { v0.2s, v1.2s, v2.2s }, [sp] -# M3-NEXT: 7 15 6.00 * st3 { v0.d, v1.d, v2.d }[0], [sp] -# M3-NEXT: 9 16 7.50 * st3 { v0.2d, v1.2d, v2.2d }, [sp] -# M3-NEXT: 5 14 4.50 * st3 { v0.s, v1.s, v2.s }[0], [sp], #12 -# M3-NEXT: 7 15 6.00 * st3 { v0.2s, v1.2s, v2.2s }, [sp], #24 -# M3-NEXT: 7 15 6.00 * st3 { v0.d, v1.d, v2.d }[0], [sp], #24 -# M3-NEXT: 9 16 7.50 * st3 { v0.2d, v1.2d, v2.2d }, [sp], #48 -# M3-NEXT: 5 14 4.50 * st3 { v0.s, v1.s, v2.s }[0], [sp], x0 -# M3-NEXT: 7 15 6.00 * st3 { v0.2s, v1.2s, v2.2s }, [sp], x0 -# M3-NEXT: 7 15 6.00 * st3 { v0.d, v1.d, v2.d }[0], [sp], x0 -# M3-NEXT: 9 16 7.50 * st3 { v0.2d, v1.2d, v2.2d }, [sp], x0 - -# M4-NEXT: 2 2 1.00 * st3 { v0.s, v1.s, v2.s }[0], [sp] -# M4-NEXT: 4 4 2.00 * st3 { v0.2s, v1.2s, v2.2s }, [sp] -# M4-NEXT: 2 2 1.00 * st3 { v0.d, v1.d, v2.d }[0], [sp] -# M4-NEXT: 6 5 3.00 * st3 { v0.2d, v1.2d, v2.2d }, [sp] -# M4-NEXT: 3 2 1.00 * st3 { v0.s, v1.s, v2.s }[0], [sp], #12 -# M4-NEXT: 5 4 2.00 * st3 { v0.2s, v1.2s, v2.2s }, [sp], #24 -# M4-NEXT: 3 2 1.00 * st3 { v0.d, v1.d, v2.d }[0], [sp], #24 -# M4-NEXT: 7 5 3.00 * st3 { v0.2d, v1.2d, v2.2d }, [sp], #48 -# M4-NEXT: 3 2 1.00 * st3 { v0.s, v1.s, v2.s }[0], [sp], x0 -# M4-NEXT: 5 4 2.00 * st3 { v0.2s, v1.2s, v2.2s }, [sp], x0 -# M4-NEXT: 3 2 1.00 * st3 { v0.d, v1.d, v2.d }[0], [sp], x0 -# M4-NEXT: 7 5 3.00 * st3 { v0.2d, v1.2d, v2.2d }, [sp], x0 - -# M5-NEXT: 2 2 1.00 * st3 { v0.s, v1.s, v2.s }[0], [sp] -# M5-NEXT: 3 4 1.00 * st3 { v0.2s, v1.2s, v2.2s }, [sp] -# M5-NEXT: 2 2 1.00 * st3 { v0.d, v1.d, v2.d }[0], [sp] -# M5-NEXT: 5 4 1.50 * st3 { v0.2d, v1.2d, v2.2d }, [sp] -# M5-NEXT: 3 2 1.00 * st3 { v0.s, v1.s, v2.s }[0], [sp], #12 -# M5-NEXT: 4 4 1.00 * st3 { v0.2s, v1.2s, v2.2s }, [sp], #24 -# M5-NEXT: 3 2 1.00 * st3 { v0.d, v1.d, v2.d }[0], [sp], #24 -# M5-NEXT: 6 4 1.50 * st3 { v0.2d, v1.2d, v2.2d }, [sp], #48 -# M5-NEXT: 3 2 1.00 * st3 { v0.s, v1.s, v2.s }[0], [sp], x0 -# M5-NEXT: 4 4 1.00 * st3 { v0.2s, v1.2s, v2.2s }, [sp], x0 -# M5-NEXT: 3 2 1.00 * st3 { v0.d, v1.d, v2.d }[0], [sp], x0 -# M5-NEXT: 6 4 1.50 * st3 { v0.2d, v1.2d, v2.2d }, [sp], x0 diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st4.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st4.s deleted file mode 100644 index 7dfe59f..0000000 --- a/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st4.s +++ /dev/null @@ -1,97 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3 -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4 -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5 - -st4 {v0.s, v1.s, v2.s, v3.s}[0], [sp] -st4 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp] - -st4 {v0.d, v1.d, v2.d, v3.d}[0], [sp] -st4 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp] - -st4 {v0.s, v1.s, v2.s, v3.s}[0], [sp], #16 -st4 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp], #32 - -st4 {v0.d, v1.d, v2.d, v3.d}[0], [sp], #32 -st4 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp], #64 - -st4 {v0.s, v1.s, v2.s, v3.s}[0], [sp], x0 -st4 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp], x0 - -st4 {v0.d, v1.d, v2.d, v3.d}[0], [sp], x0 -st4 {v0.2d, v1.2d, v2.2d, v3.2d}, [sp], x0 - -# ALL: Iterations: 100 -# ALL-NEXT: Instructions: 1200 - -# M3-NEXT: Total Cycles: 18603 -# M3-NEXT: Total uOps: 9000 - -# M4-NEXT: Total Cycles: 4803 -# M4-NEXT: Total uOps: 4700 - -# M5-NEXT: Total Cycles: 4803 -# M5-NEXT: Total uOps: 4700 - -# ALL: Dispatch Width: 6 - -# M3-NEXT: uOps Per Cycle: 0.48 -# M3-NEXT: IPC: 0.06 -# M3-NEXT: Block RThroughput: 76.5 - -# M4-NEXT: uOps Per Cycle: 0.98 -# M4-NEXT: IPC: 0.25 -# M4-NEXT: Block RThroughput: 24.0 - -# M5-NEXT: uOps Per Cycle: 0.98 -# M5-NEXT: IPC: 0.25 -# M5-NEXT: Block RThroughput: 24.0 - -# ALL: Instruction Info: -# ALL-NEXT: [1]: #uOps -# ALL-NEXT: [2]: Latency -# ALL-NEXT: [3]: RThroughput -# ALL-NEXT: [4]: MayLoad -# ALL-NEXT: [5]: MayStore -# ALL-NEXT: [6]: HasSideEffects (U) - -# ALL: [1] [2] [3] [4] [5] [6] Instructions: - -# M3-NEXT: 7 15 6.00 * st4 { v0.s, v1.s, v2.s, v3.s }[0], [sp] -# M3-NEXT: 7 15 6.00 * st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] -# M3-NEXT: 7 15 6.00 * st4 { v0.d, v1.d, v2.d, v3.d }[0], [sp] -# M3-NEXT: 9 17 7.50 * st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp] -# M3-NEXT: 7 15 6.00 * st4 { v0.s, v1.s, v2.s, v3.s }[0], [sp], #16 -# M3-NEXT: 7 15 6.00 * st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32 -# M3-NEXT: 7 15 6.00 * st4 { v0.d, v1.d, v2.d, v3.d }[0], [sp], #32 -# M3-NEXT: 9 17 7.50 * st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64 -# M3-NEXT: 7 15 6.00 * st4 { v0.s, v1.s, v2.s, v3.s }[0], [sp], x0 -# M3-NEXT: 7 15 6.00 * st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0 -# M3-NEXT: 7 15 6.00 * st4 { v0.d, v1.d, v2.d, v3.d }[0], [sp], x0 -# M3-NEXT: 9 17 7.50 * st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0 - -# M4-NEXT: 2 2 1.00 * st4 { v0.s, v1.s, v2.s, v3.s }[0], [sp] -# M4-NEXT: 4 4 2.00 * st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] -# M4-NEXT: 2 2 1.00 * st4 { v0.d, v1.d, v2.d, v3.d }[0], [sp] -# M4-NEXT: 5 8 4.00 * st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp] -# M4-NEXT: 3 2 1.00 * st4 { v0.s, v1.s, v2.s, v3.s }[0], [sp], #16 -# M4-NEXT: 5 4 2.00 * st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32 -# M4-NEXT: 3 2 1.00 * st4 { v0.d, v1.d, v2.d, v3.d }[0], [sp], #32 -# M4-NEXT: 6 8 4.00 * st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64 -# M4-NEXT: 3 2 1.00 * st4 { v0.s, v1.s, v2.s, v3.s }[0], [sp], x0 -# M4-NEXT: 5 4 2.00 * st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0 -# M4-NEXT: 3 2 1.00 * st4 { v0.d, v1.d, v2.d, v3.d }[0], [sp], x0 -# M4-NEXT: 6 8 4.00 * st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0 - -# M5-NEXT: 2 2 1.00 * st4 { v0.s, v1.s, v2.s, v3.s }[0], [sp] -# M5-NEXT: 4 4 2.00 * st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] -# M5-NEXT: 2 2 1.00 * st4 { v0.d, v1.d, v2.d, v3.d }[0], [sp] -# M5-NEXT: 5 8 4.00 * st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp] -# M5-NEXT: 3 2 1.00 * st4 { v0.s, v1.s, v2.s, v3.s }[0], [sp], #16 -# M5-NEXT: 5 4 2.00 * st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], #32 -# M5-NEXT: 3 2 1.00 * st4 { v0.d, v1.d, v2.d, v3.d }[0], [sp], #32 -# M5-NEXT: 6 8 4.00 * st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], #64 -# M5-NEXT: 3 2 1.00 * st4 { v0.s, v1.s, v2.s, v3.s }[0], [sp], x0 -# M5-NEXT: 5 4 2.00 * st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x0 -# M5-NEXT: 3 2 1.00 * st4 { v0.d, v1.d, v2.d, v3.d }[0], [sp], x0 -# M5-NEXT: 6 8 4.00 * st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [sp], x0 diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/crc.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/crc.s deleted file mode 100644 index 27aa007..0000000 --- a/llvm/test/tools/llvm-mca/AArch64/Exynos/crc.s +++ /dev/null @@ -1,58 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3 -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4 -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5 - -crc32w w0, w1, w2 -crc32w w0, w0, w3 - -crc32cx w0, w1, x2 -crc32cx w0, w0, x3 - -# ALL: Iterations: 100 -# ALL-NEXT: Instructions: 400 - -# M3-NEXT: Total Cycles: 204 -# M4-NEXT: Total Cycles: 404 -# M5-NEXT: Total Cycles: 204 - -# ALL-NEXT: Total uOps: 400 - -# ALL: Dispatch Width: 6 - -# M3-NEXT: uOps Per Cycle: 1.96 -# M3-NEXT: IPC: 1.96 -# M3-NEXT: Block RThroughput: 2.0 - -# M4-NEXT: uOps Per Cycle: 0.99 -# M4-NEXT: IPC: 0.99 -# M4-NEXT: Block RThroughput: 4.0 - -# M5-NEXT: uOps Per Cycle: 1.96 -# M5-NEXT: IPC: 1.96 -# M5-NEXT: Block RThroughput: 2.0 - -# ALL: Instruction Info: -# ALL-NEXT: [1]: #uOps -# ALL-NEXT: [2]: Latency -# ALL-NEXT: [3]: RThroughput -# ALL-NEXT: [4]: MayLoad -# ALL-NEXT: [5]: MayStore -# ALL-NEXT: [6]: HasSideEffects (U) - -# ALL: [1] [2] [3] [4] [5] [6] Instructions: - -# M3-NEXT: 1 2 0.50 crc32w w0, w1, w2 -# M3-NEXT: 1 2 0.50 crc32w w0, w0, w3 -# M3-NEXT: 1 2 0.50 crc32cx w0, w1, x2 -# M3-NEXT: 1 2 0.50 crc32cx w0, w0, x3 - -# M4-NEXT: 1 2 1.00 crc32w w0, w1, w2 -# M4-NEXT: 1 2 1.00 crc32w w0, w0, w3 -# M4-NEXT: 1 2 1.00 crc32cx w0, w1, x2 -# M4-NEXT: 1 2 1.00 crc32cx w0, w0, x3 - -# M5-NEXT: 1 2 0.50 crc32w w0, w1, w2 -# M5-NEXT: 1 2 0.50 crc32w w0, w0, w3 -# M5-NEXT: 1 2 0.50 crc32cx w0, w1, x2 -# M5-NEXT: 1 2 0.50 crc32cx w0, w0, x3 diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/direct-branch.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/direct-branch.s index 79f810c..0819170 100644 --- a/llvm/test/tools/llvm-mca/AArch64/Exynos/direct-branch.s +++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/direct-branch.s @@ -1,7 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3 # RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4 -# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5 b main @@ -10,7 +9,6 @@ # M3-NEXT: Total Cycles: 18 # M4-NEXT: Total Cycles: 18 -# M5-NEXT: Total Cycles: 18 # ALL-NEXT: Total uOps: 100 @@ -24,11 +22,6 @@ # M4-NEXT: IPC: 5.56 # M4-NEXT: Block RThroughput: 0.2 -# M5: Dispatch Width: 6 -# M5-NEXT: uOps Per Cycle: 5.56 -# M5-NEXT: IPC: 5.56 -# M5-NEXT: Block RThroughput: 0.2 - # ALL: Instruction Info: # ALL-NEXT: [1]: #uOps # ALL-NEXT: [2]: Latency @@ -41,4 +34,3 @@ # M3-NEXT: 1 0 0.17 b main # M4-NEXT: 1 0 0.17 b main -# M5-NEXT: 1 0 0.17 b main diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/divide-multiply.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/divide-multiply.s deleted file mode 100644 index c74d192..0000000 --- a/llvm/test/tools/llvm-mca/AArch64/Exynos/divide-multiply.s +++ /dev/null @@ -1,67 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM3 -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM4 -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM5 - -sdiv w0, w1, w2 -udiv x1, x2, x3 - -mul w2, w3, w4 -msub x3, x4, x5, x6 - -smull x4, w5, w6 -umulh x5, x6, x7 - -# ALL: Iterations: 100 -# ALL-NEXT: Instructions: 600 - -# EM3-NEXT: Total Cycles: 3305 -# EM4-NEXT: Total Cycles: 3303 -# EM5-NEXT: Total Cycles: 2603 - -# ALL-NEXT: Total uOps: 600 - -# ALL: Dispatch Width: 6 - -# EM3-NEXT: uOps Per Cycle: 0.18 -# EM3-NEXT: IPC: 0.18 -# EM3-NEXT: Block RThroughput: 33.0 - -# EM4-NEXT: uOps Per Cycle: 0.18 -# EM4-NEXT: IPC: 0.18 -# EM4-NEXT: Block RThroughput: 33.0 - -# EM5-NEXT: uOps Per Cycle: 0.23 -# EM5-NEXT: IPC: 0.23 -# EM5-NEXT: Block RThroughput: 26.0 - -# ALL: Instruction Info: -# ALL-NEXT: [1]: #uOps -# ALL-NEXT: [2]: Latency -# ALL-NEXT: [3]: RThroughput -# ALL-NEXT: [4]: MayLoad -# ALL-NEXT: [5]: MayStore -# ALL-NEXT: [6]: HasSideEffects (U) - -# ALL: [1] [2] [3] [4] [5] [6] Instructions: - -# EM3-NEXT: 1 12 12.00 sdiv w0, w1, w2 -# EM3-NEXT: 1 21 21.00 udiv x1, x2, x3 -# EM3-NEXT: 1 3 0.50 mul w2, w3, w4 -# EM3-NEXT: 1 4 1.00 msub x3, x4, x5, x6 -# EM3-NEXT: 1 3 0.50 smull x4, w5, w6 -# EM3-NEXT: 1 4 1.00 umulh x5, x6, x7 - -# EM4-NEXT: 1 12 12.00 sdiv w0, w1, w2 -# EM4-NEXT: 1 21 21.00 udiv x1, x2, x3 -# EM4-NEXT: 1 3 0.50 mul w2, w3, w4 -# EM4-NEXT: 1 4 1.00 msub x3, x4, x5, x6 -# EM4-NEXT: 1 3 0.50 smull x4, w5, w6 -# EM4-NEXT: 1 4 1.00 umulh x5, x6, x7 - -# EM5-NEXT: 1 10 10.00 sdiv w0, w1, w2 -# EM5-NEXT: 1 16 16.00 udiv x1, x2, x3 -# EM5-NEXT: 1 2 0.50 mul w2, w3, w4 -# EM5-NEXT: 1 3 1.00 msub x3, x4, x5, x6 -# EM5-NEXT: 1 2 0.50 smull x4, w5, w6 -# EM5-NEXT: 1 3 1.00 umulh x5, x6, x7 diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/double-recp.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/double-recp.s deleted file mode 100644 index 872f6ab7..0000000 --- a/llvm/test/tools/llvm-mca/AArch64/Exynos/double-recp.s +++ /dev/null @@ -1,66 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3 -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4 -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5 - -fmov d31, #1.00000000 -fdiv d30, d31, d30 - -# Newton series for 1 / x. -frecpe d1, d0 -frecps d2, d0, d1 -fmul d1, d1, d2 -frecps d2, d0, d1 -fmul d1, d1, d2 -frecps d0, d0, d1 -fmul d0, d1, d0 - -# ALL: Iterations: 100 -# ALL-NEXT: Instructions: 900 - -# M3-NEXT: Total Cycles: 2503 -# M4-NEXT: Total Cycles: 2403 -# M5-NEXT: Total Cycles: 2403 - -# ALL-NEXT: Total uOps: 900 - -# ALL: Dispatch Width: 6 - -# M3-NEXT: uOps Per Cycle: 0.36 -# M3-NEXT: IPC: 0.36 -# M3-NEXT: Block RThroughput: 3.3 - -# M4-NEXT: uOps Per Cycle: 0.37 -# M4-NEXT: IPC: 0.37 -# M4-NEXT: Block RThroughput: 2.3 - -# M5-NEXT: uOps Per Cycle: 0.37 -# M5-NEXT: IPC: 0.37 -# M5-NEXT: Block RThroughput: 2.3 - -# ALL: Instruction Info: -# ALL-NEXT: [1]: #uOps -# ALL-NEXT: [2]: Latency -# ALL-NEXT: [3]: RThroughput -# ALL-NEXT: [4]: MayLoad -# ALL-NEXT: [5]: MayStore -# ALL-NEXT: [6]: HasSideEffects (U) - -# ALL: [1] [2] [3] [4] [5] [6] Instructions: -# ALL-NEXT: 1 1 0.33 fmov d31, #1.00000000 - -# M3-NEXT: 1 12 3.25 fdiv d30, d31, d30 -# M3-NEXT: 1 4 0.50 frecpe d1, d0 - -# M4-NEXT: 1 12 2.25 fdiv d30, d31, d30 -# M4-NEXT: 1 3 0.50 frecpe d1, d0 - -# M5-NEXT: 1 12 2.25 fdiv d30, d31, d30 -# M5-NEXT: 1 3 0.50 frecpe d1, d0 - -# ALL-NEXT: 1 4 0.33 frecps d2, d0, d1 -# ALL-NEXT: 1 3 0.33 fmul d1, d1, d2 -# ALL-NEXT: 1 4 0.33 frecps d2, d0, d1 -# ALL-NEXT: 1 3 0.33 fmul d1, d1, d2 -# ALL-NEXT: 1 4 0.33 frecps d0, d0, d1 -# ALL-NEXT: 1 3 0.33 fmul d0, d1, d0 diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/double-rsqrt.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/double-rsqrt.s deleted file mode 100644 index 98fa404..0000000 --- a/llvm/test/tools/llvm-mca/AArch64/Exynos/double-rsqrt.s +++ /dev/null @@ -1,78 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3 -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4 -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5 - -fsqrt d30, d30 -fmov d31, #1.00000000 -fdiv d30, d31, d30 - -# Newton series for 1 / sqrt(). -frsqrte d1, d0 -fmul d2, d1, d1 -frsqrts d2, d0, d2 -fmul d1, d1, d2 -fmul d2, d1, d1 -frsqrts d2, d0, d2 -fmul d1, d1, d2 -fmul d2, d1, d1 -frsqrts d0, d0, d2 -fmul d0, d1, d0 - -# ALL: Iterations: 100 -# ALL-NEXT: Instructions: 1300 - -# M3-NEXT: Total Cycles: 3703 -# M4-NEXT: Total Cycles: 3303 -# M5-NEXT: Total Cycles: 3303 - -# ALL-NEXT: Total uOps: 1300 - -# ALL: Dispatch Width: 6 - -# M3-NEXT: uOps Per Cycle: 0.35 -# M3-NEXT: IPC: 0.35 -# M3-NEXT: Block RThroughput: 26.0 - -# M4-NEXT: uOps Per Cycle: 0.39 -# M4-NEXT: IPC: 0.39 -# M4-NEXT: Block RThroughput: 3.0 - -# M5-NEXT: uOps Per Cycle: 0.39 -# M5-NEXT: IPC: 0.39 -# M5-NEXT: Block RThroughput: 3.0 - -# ALL: Instruction Info: -# ALL-NEXT: [1]: #uOps -# ALL-NEXT: [2]: Latency -# ALL-NEXT: [3]: RThroughput -# ALL-NEXT: [4]: MayLoad -# ALL-NEXT: [5]: MayStore -# ALL-NEXT: [6]: HasSideEffects (U) - -# ALL: [1] [2] [3] [4] [5] [6] Instructions: - -# M3-NEXT: 1 25 26.00 fsqrt d30, d30 -# M4-NEXT: 1 12 2.25 fsqrt d30, d30 -# M5-NEXT: 1 12 2.25 fsqrt d30, d30 - -# ALL-NEXT: 1 1 0.33 fmov d31, #1.00000000 - -# M3-NEXT: 1 12 3.25 fdiv d30, d31, d30 -# M3-NEXT: 1 4 0.50 frsqrte d1, d0 - -# M4-NEXT: 1 12 2.25 fdiv d30, d31, d30 -# M4-NEXT: 1 3 0.50 frsqrte d1, d0 - -# M5-NEXT: 1 12 2.25 fdiv d30, d31, d30 -# M5-NEXT: 1 3 0.50 frsqrte d1, d0 - -# ALL-NEXT: 1 3 0.33 fmul d2, d1, d1 -# ALL-NEXT: 1 4 0.33 frsqrts d2, d0, d2 -# ALL-NEXT: 1 3 0.33 fmul d1, d1, d2 -# ALL-NEXT: 1 3 0.33 fmul d2, d1, d1 -# ALL-NEXT: 1 4 0.33 frsqrts d2, d0, d2 -# ALL-NEXT: 1 3 0.33 fmul d1, d1, d2 -# ALL-NEXT: 1 3 0.33 fmul d2, d1, d1 -# ALL-NEXT: 1 4 0.33 frsqrts d0, d0, d2 -# ALL-NEXT: 1 3 0.33 fmul d0, d1, d0 diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/double-sqrt.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/double-sqrt.s deleted file mode 100644 index b9aceff..0000000 --- a/llvm/test/tools/llvm-mca/AArch64/Exynos/double-sqrt.s +++ /dev/null @@ -1,79 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3 -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4 -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5 - -fsqrt d31, d31 - -# Newton series for sqrt(). -frsqrte d1, d0 -fmul d2, d1, d1 -frsqrts d2, d0, d2 -fmul d1, d1, d2 -fmul d2, d1, d1 -frsqrts d2, d0, d2 -fmul d1, d1, d2 -fmul d2, d1, d1 -frsqrts d2, d0, d2 -fmul d2, d2, d0 -fmul d1, d1, d2 -fcmp d0, #0.0 -fcsel d0, d0, d1, eq - -# ALL: Iterations: 100 -# ALL-NEXT: Instructions: 1400 - -# M3-NEXT: Total Cycles: 4203 -# M4-NEXT: Total Cycles: 4103 -# M5-NEXT: Total Cycles: 3803 - -# ALL-NEXT: Total uOps: 1500 - -# ALL: Dispatch Width: 6 - -# M3-NEXT: uOps Per Cycle: 0.36 -# M3-NEXT: IPC: 0.33 -# M3-NEXT: Block RThroughput: 27.0 - -# M4-NEXT: uOps Per Cycle: 0.37 -# M4-NEXT: IPC: 0.34 -# M4-NEXT: Block RThroughput: 3.3 - -# M5-NEXT: uOps Per Cycle: 0.39 -# M5-NEXT: IPC: 0.37 -# M5-NEXT: Block RThroughput: 3.3 - -# ALL: Instruction Info: -# ALL-NEXT: [1]: #uOps -# ALL-NEXT: [2]: Latency -# ALL-NEXT: [3]: RThroughput -# ALL-NEXT: [4]: MayLoad -# ALL-NEXT: [5]: MayStore -# ALL-NEXT: [6]: HasSideEffects (U) - -# ALL: [1] [2] [3] [4] [5] [6] Instructions: - -# M3-NEXT: 1 25 26.00 fsqrt d31, d31 -# M3-NEXT: 1 4 0.50 frsqrte d1, d0 - -# M4-NEXT: 1 12 2.25 fsqrt d31, d31 -# M4-NEXT: 1 3 0.50 frsqrte d1, d0 - -# M5-NEXT: 1 12 2.25 fsqrt d31, d31 -# M5-NEXT: 1 3 0.50 frsqrte d1, d0 - -# ALL-NEXT: 1 3 0.33 fmul d2, d1, d1 -# ALL-NEXT: 1 4 0.33 frsqrts d2, d0, d2 -# ALL-NEXT: 1 3 0.33 fmul d1, d1, d2 -# ALL-NEXT: 1 3 0.33 fmul d2, d1, d1 -# ALL-NEXT: 1 4 0.33 frsqrts d2, d0, d2 -# ALL-NEXT: 1 3 0.33 fmul d1, d1, d2 -# ALL-NEXT: 1 3 0.33 fmul d2, d1, d1 -# ALL-NEXT: 1 4 0.33 frsqrts d2, d0, d2 -# ALL-NEXT: 1 3 0.33 fmul d2, d2, d0 -# ALL-NEXT: 1 3 0.33 fmul d1, d1, d2 -# ALL-NEXT: 1 2 1.00 fcmp d0, #0.0 - -# M3-NEXT: 2 5 1.00 fcsel d0, d0, d1, eq -# M4-NEXT: 2 5 1.00 fcsel d0, d0, d1, eq -# M5-NEXT: 2 2 1.00 fcsel d0, d0, d1, eq diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/extended-register.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/extended-register.s index 03522cd..aa14531 100644 --- a/llvm/test/tools/llvm-mca/AArch64/Exynos/extended-register.s +++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/extended-register.s @@ -1,7 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM3 # RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM4 -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM5 sub w0, w1, w2, sxtb #0 add x3, x4, w5, sxth #1 @@ -17,7 +16,6 @@ # EM3-NEXT: Total Cycles: 304 # EM4-NEXT: Total Cycles: 304 -# EM5-NEXT: Total Cycles: 254 # ALL-NEXT: Total uOps: 800 @@ -31,11 +29,6 @@ # EM4-NEXT: IPC: 2.63 # EM4-NEXT: Block RThroughput: 3.0 -# EM5: Dispatch Width: 6 -# EM5-NEXT: uOps Per Cycle: 3.15 -# EM5-NEXT: IPC: 3.15 -# EM5-NEXT: Block RThroughput: 2.5 - # ALL: Instruction Info: # ALL-NEXT: [1]: #uOps # ALL-NEXT: [2]: Latency @@ -63,12 +56,3 @@ # EM4-NEXT: 1 2 0.50 add x15, x16, w17, uxth #1 # EM4-NEXT: 1 2 0.50 subs x18, x19, w20, sxtw #2 # EM4-NEXT: 1 2 0.50 adds x21, x22, x23, sxtx #3 - -# EM5-NEXT: 1 1 0.17 sub w0, w1, w2, sxtb -# EM5-NEXT: 1 2 0.50 add x3, x4, w5, sxth #1 -# EM5-NEXT: 1 1 0.25 subs x6, x7, w8, uxtw #2 -# EM5-NEXT: 1 1 0.25 adds x9, x10, x11, uxtx #3 -# EM5-NEXT: 1 1 0.17 sub w12, w13, w14, uxtb -# EM5-NEXT: 1 2 0.50 add x15, x16, w17, uxth #1 -# EM5-NEXT: 1 2 0.50 subs x18, x19, w20, sxtw #2 -# EM5-NEXT: 1 2 0.50 adds x21, x22, x23, sxtx #3 diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/float-divide-multiply.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/float-divide-multiply.s deleted file mode 100644 index a24d8a2..0000000 --- a/llvm/test/tools/llvm-mca/AArch64/Exynos/float-divide-multiply.s +++ /dev/null @@ -1,94 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM3 -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM4 -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM5 - -fdiv h0, h1, h2 -fdiv s1, s2, s3 -fdiv d2, d3, d4 - -fmul h3, h4, h5 -fmul s4, s5, s6 -fmul d5, d6, d7 - -fmadd h6, h7, h8, h9 -fmadd s7, s8, s9, s10 -fmadd d8, d9, d10, d11 - -fsqrt h9, h10 -fsqrt s10, s11 -fsqrt d11, d12 - -# ALL: Iterations: 100 - -# EM3-NEXT: Instructions: 800 -# EM3-NEXT: Total Cycles: 4503 -# EM3-NEXT: Total uOps: 800 - -# EM4-NEXT: Instructions: 1200 -# EM4-NEXT: Total Cycles: 575 -# EM4-NEXT: Total uOps: 1200 - -# EM5-NEXT: Instructions: 1200 -# EM5-NEXT: Total Cycles: 433 -# EM5-NEXT: Total uOps: 1200 - -# ALL: Dispatch Width: 6 - -# EM3-NEXT: uOps Per Cycle: 0.18 -# EM3-NEXT: IPC: 0.18 -# EM3-NEXT: Block RThroughput: 45.0 - -# EM4-NEXT: uOps Per Cycle: 2.09 -# EM4-NEXT: IPC: 2.09 -# EM4-NEXT: Block RThroughput: 4.0 - -# EM5-NEXT: uOps Per Cycle: 2.77 -# EM5-NEXT: IPC: 2.77 -# EM5-NEXT: Block RThroughput: 4.0 - -# ALL: Instruction Info: -# ALL-NEXT: [1]: #uOps -# ALL-NEXT: [2]: Latency -# ALL-NEXT: [3]: RThroughput -# ALL-NEXT: [4]: MayLoad -# ALL-NEXT: [5]: MayStore -# ALL-NEXT: [6]: HasSideEffects (U) - -# EM3: [1] [2] [3] [4] [5] [6] Instructions: -# EM3-NEXT: 1 7 2.00 fdiv s1, s2, s3 -# EM3-NEXT: 1 12 3.25 fdiv d2, d3, d4 -# EM3-NEXT: 1 3 0.33 fmul s4, s5, s6 -# EM3-NEXT: 1 3 0.33 fmul d5, d6, d7 -# EM3-NEXT: 1 4 0.33 fmadd s7, s8, s9, s10 -# EM3-NEXT: 1 4 0.33 fmadd d8, d9, d10, d11 -# EM3-NEXT: 1 18 19.00 fsqrt s10, s11 -# EM3-NEXT: 1 25 26.00 fsqrt d11, d12 - -# EM4: [1] [2] [3] [4] [5] [6] Instructions: -# EM4-NEXT: 1 7 3.00 fdiv h0, h1, h2 -# EM4-NEXT: 1 7 1.50 fdiv s1, s2, s3 -# EM4-NEXT: 1 12 2.25 fdiv d2, d3, d4 -# EM4-NEXT: 1 3 0.50 fmul h3, h4, h5 -# EM4-NEXT: 1 3 0.33 fmul s4, s5, s6 -# EM4-NEXT: 1 3 0.33 fmul d5, d6, d7 -# EM4-NEXT: 1 4 0.50 fmadd h6, h7, h8, h9 -# EM4-NEXT: 1 4 0.33 fmadd s7, s8, s9, s10 -# EM4-NEXT: 1 4 0.33 fmadd d8, d9, d10, d11 -# EM4-NEXT: 1 7 3.00 fsqrt h9, h10 -# EM4-NEXT: 1 8 1.75 fsqrt s10, s11 -# EM4-NEXT: 1 12 2.25 fsqrt d11, d12 - -# EM5: [1] [2] [3] [4] [5] [6] Instructions: -# EM5-NEXT: 1 5 0.50 fdiv h0, h1, h2 -# EM5-NEXT: 1 7 1.00 fdiv s1, s2, s3 -# EM5-NEXT: 1 12 2.25 fdiv d2, d3, d4 -# EM5-NEXT: 1 3 0.33 fmul h3, h4, h5 -# EM5-NEXT: 1 3 0.33 fmul s4, s5, s6 -# EM5-NEXT: 1 3 0.33 fmul d5, d6, d7 -# EM5-NEXT: 1 4 0.33 fmadd h6, h7, h8, h9 -# EM5-NEXT: 1 4 0.33 fmadd s7, s8, s9, s10 -# EM5-NEXT: 1 4 0.33 fmadd d8, d9, d10, d11 -# EM5-NEXT: 1 5 0.50 fsqrt h9, h10 -# EM5-NEXT: 1 8 1.25 fsqrt s10, s11 -# EM5-NEXT: 1 12 2.25 fsqrt d11, d12 diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/float-integer.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/float-integer.s deleted file mode 100644 index 65aed32..0000000 --- a/llvm/test/tools/llvm-mca/AArch64/Exynos/float-integer.s +++ /dev/null @@ -1,114 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM3 -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM4 -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM5 - -scvtf h0, w0 -scvtf s1, w1 -scvtf d2, x2 - -fcvtzs w3, h3 -fcvtzs w4, s4 -fcvtzs x5, d5 - -fmov h6, #2.0 -fmov s7, #4.0 -fmov d8, #8.0 - -fmov h9, w9 -fmov s10, w10 -fmov d11, x11 -fmov v12.d[1], x12 - -fmov w13, h13 -fmov w14, s14 -fmov x15, d15 -fmov x16, v16.d[1] - -# ALL: Iterations: 100 - -# EM3-NEXT: Instructions: 1200 -# EM3-NEXT: Total Cycles: 405 -# EM3-NEXT: Total uOps: 1400 - -# EM4-NEXT: Instructions: 1700 -# EM4-NEXT: Total Cycles: 1108 -# EM4-NEXT: Total uOps: 1900 - -# EM5-NEXT: Instructions: 1700 -# EM5-NEXT: Total Cycles: 1407 -# EM5-NEXT: Total uOps: 1900 - -# ALL: Dispatch Width: 6 - -# EM3-NEXT: uOps Per Cycle: 3.46 -# EM3-NEXT: IPC: 2.96 -# EM3-NEXT: Block RThroughput: 4.0 - -# EM4-NEXT: uOps Per Cycle: 1.71 -# EM4-NEXT: IPC: 1.53 -# EM4-NEXT: Block RThroughput: 11.0 - -# EM5-NEXT: uOps Per Cycle: 1.35 -# EM5-NEXT: IPC: 1.21 -# EM5-NEXT: Block RThroughput: 14.0 - -# ALL: Instruction Info: -# ALL-NEXT: [1]: #uOps -# ALL-NEXT: [2]: Latency -# ALL-NEXT: [3]: RThroughput -# ALL-NEXT: [4]: MayLoad -# ALL-NEXT: [5]: MayStore -# ALL-NEXT: [6]: HasSideEffects (U) - -# EM3: [1] [2] [3] [4] [5] [6] Instructions: -# EM3-NEXT: 1 4 1.00 scvtf s1, w1 -# EM3-NEXT: 1 4 1.00 scvtf d2, x2 -# EM3-NEXT: 1 3 1.00 fcvtzs w4, s4 -# EM3-NEXT: 1 3 1.00 fcvtzs x5, d5 -# EM3-NEXT: 1 1 0.33 fmov s7, #4.00000000 -# EM3-NEXT: 1 1 0.33 fmov d8, #8.00000000 -# EM3-NEXT: 1 1 0.33 fmov s10, w10 -# EM3-NEXT: 1 1 0.33 fmov d11, x11 -# EM3-NEXT: 2 5 1.00 fmov v12.d[1], x12 -# EM3-NEXT: 1 1 0.33 fmov w14, s14 -# EM3-NEXT: 1 1 0.33 fmov x15, d15 -# EM3-NEXT: 2 5 1.00 fmov x16, v16.d[1] - -# EM4: [1] [2] [3] [4] [5] [6] Instructions: -# EM4-NEXT: 1 6 1.00 scvtf h0, w0 -# EM4-NEXT: 1 6 1.00 scvtf s1, w1 -# EM4-NEXT: 1 6 1.00 scvtf d2, x2 -# EM4-NEXT: 1 4 1.00 fcvtzs w3, h3 -# EM4-NEXT: 1 4 1.00 fcvtzs w4, s4 -# EM4-NEXT: 1 4 1.00 fcvtzs x5, d5 -# EM4-NEXT: 1 1 0.33 fmov h6, #2.00000000 -# EM4-NEXT: 1 1 0.33 fmov s7, #4.00000000 -# EM4-NEXT: 1 1 0.33 fmov d8, #8.00000000 -# EM4-NEXT: 1 3 1.00 fmov h9, w9 -# EM4-NEXT: 1 3 1.00 fmov s10, w10 -# EM4-NEXT: 1 3 1.00 fmov d11, x11 -# EM4-NEXT: 2 2 1.00 fmov v12.d[1], x12 -# EM4-NEXT: 1 4 1.00 fmov w13, h13 -# EM4-NEXT: 1 4 1.00 fmov w14, s14 -# EM4-NEXT: 1 4 1.00 fmov x15, d15 -# EM4-NEXT: 2 5 1.00 fmov x16, v16.d[1] - -# EM5: [1] [2] [3] [4] [5] [6] Instructions: -# EM5-NEXT: 1 6 1.00 scvtf h0, w0 -# EM5-NEXT: 1 6 1.00 scvtf s1, w1 -# EM5-NEXT: 1 6 1.00 scvtf d2, x2 -# EM5-NEXT: 1 4 1.00 fcvtzs w3, h3 -# EM5-NEXT: 1 4 1.00 fcvtzs w4, s4 -# EM5-NEXT: 1 4 1.00 fcvtzs x5, d5 -# EM5-NEXT: 1 1 0.33 fmov h6, #2.00000000 -# EM5-NEXT: 1 1 0.33 fmov s7, #4.00000000 -# EM5-NEXT: 1 1 0.33 fmov d8, #8.00000000 -# EM5-NEXT: 1 4 1.00 fmov h9, w9 -# EM5-NEXT: 1 4 1.00 fmov s10, w10 -# EM5-NEXT: 1 4 1.00 fmov d11, x11 -# EM5-NEXT: 2 6 1.00 fmov v12.d[1], x12 -# EM5-NEXT: 1 3 1.00 fmov w13, h13 -# EM5-NEXT: 1 3 1.00 fmov w14, s14 -# EM5-NEXT: 1 3 1.00 fmov x15, d15 -# EM5-NEXT: 2 5 1.00 fmov x16, v16.d[1] diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/float-load.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/float-load.s deleted file mode 100644 index 18dcf5e..0000000 --- a/llvm/test/tools/llvm-mca/AArch64/Exynos/float-load.s +++ /dev/null @@ -1,153 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3 -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4 -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5 - -ldr s0, 1f -ldr q0, 1f - -ldur d0, [sp, #2] -ldur q0, [sp, #16] - -ldr b0, [sp], #1 -ldr q0, [sp], #16 - -ldr h0, [sp, #2]! -ldr q0, [sp, #16]! - -ldr s0, [sp, #4] -ldr q0, [sp, #16] - -ldr d0, [sp, x0, lsl #3] -ldr q0, [sp, x0, lsl #4] - -ldr b0, [sp, x0] -ldr q0, [sp, x0] - -ldr h0, [sp, w0, sxtw #1] -ldr q0, [sp, w0, uxtw #4] - -ldr s0, [sp, w0, sxtw] -ldr q0, [sp, w0, uxtw] - -ldp d0, d1, [sp], #16 -ldp q0, q1, [sp], #32 - -ldp s0, s1, [sp, #8]! -ldp q0, q1, [sp, #32]! - -ldp d0, d1, [sp, #16] -ldp q0, q1, [sp, #32] - -1: - -# ALL: Iterations: 100 -# ALL-NEXT: Instructions: 2400 - -# M3-NEXT: Total Cycles: 4708 -# M3-NEXT: Total uOps: 3200 - -# M4-NEXT: Total Cycles: 4708 -# M4-NEXT: Total uOps: 3200 - -# M5-NEXT: Total Cycles: 5509 -# M5-NEXT: Total uOps: 3300 - -# ALL: Dispatch Width: 6 - -# M3-NEXT: uOps Per Cycle: 0.68 -# M3-NEXT: IPC: 0.51 -# M3-NEXT: Block RThroughput: 13.5 - -# M4-NEXT: uOps Per Cycle: 0.68 -# M4-NEXT: IPC: 0.51 -# M4-NEXT: Block RThroughput: 13.0 - -# M5-NEXT: uOps Per Cycle: 0.60 -# M5-NEXT: IPC: 0.44 -# M5-NEXT: Block RThroughput: 13.5 - -# ALL: Instruction Info: -# ALL-NEXT: [1]: #uOps -# ALL-NEXT: [2]: Latency -# ALL-NEXT: [3]: RThroughput -# ALL-NEXT: [4]: MayLoad -# ALL-NEXT: [5]: MayStore -# ALL-NEXT: [6]: HasSideEffects (U) - -# ALL: [1] [2] [3] [4] [5] [6] Instructions: - -# M3-NEXT: 1 5 0.50 * ldr s0, {{\.?}}Ltmp0 -# M3-NEXT: 1 5 0.50 * ldr q0, {{\.?}}Ltmp0 -# M3-NEXT: 1 5 0.50 * ldur d0, [sp, #2] -# M3-NEXT: 1 5 0.50 * ldur q0, [sp, #16] -# M3-NEXT: 1 5 0.50 * ldr b0, [sp], #1 -# M3-NEXT: 1 5 0.50 * ldr q0, [sp], #16 -# M3-NEXT: 1 5 0.50 * ldr h0, [sp, #2]! -# M3-NEXT: 1 5 0.50 * ldr q0, [sp, #16]! -# M3-NEXT: 1 5 0.50 * ldr s0, [sp, #4] -# M3-NEXT: 1 5 0.50 * ldr q0, [sp, #16] -# M3-NEXT: 1 5 0.50 * ldr d0, [sp, x0, lsl #3] -# M3-NEXT: 2 6 0.50 * ldr q0, [sp, x0, lsl #4] -# M3-NEXT: 1 5 0.50 * ldr b0, [sp, x0] -# M3-NEXT: 1 5 0.50 * ldr q0, [sp, x0] -# M3-NEXT: 2 6 0.50 * ldr h0, [sp, w0, sxtw #1] -# M3-NEXT: 2 6 0.50 * ldr q0, [sp, w0, uxtw #4] -# M3-NEXT: 2 6 0.50 * ldr s0, [sp, w0, sxtw] -# M3-NEXT: 1 5 0.50 * ldr q0, [sp, w0, uxtw] -# M3-NEXT: 2 5 0.50 * ldp d0, d1, [sp], #16 -# M3-NEXT: 2 5 1.00 * ldp q0, q1, [sp], #32 -# M3-NEXT: 2 5 0.50 * ldp s0, s1, [sp, #8]! -# M3-NEXT: 2 5 1.00 * ldp q0, q1, [sp, #32]! -# M3-NEXT: 1 5 0.50 * ldp d0, d1, [sp, #16] -# M3-NEXT: 1 5 1.00 * ldp q0, q1, [sp, #32] - -# M4-NEXT: 1 5 0.50 * ldr s0, {{\.?}}Ltmp0 -# M4-NEXT: 1 5 0.50 * ldr q0, {{\.?}}Ltmp0 -# M4-NEXT: 1 5 0.50 * ldur d0, [sp, #2] -# M4-NEXT: 1 5 0.50 * ldur q0, [sp, #16] -# M4-NEXT: 1 5 0.50 * ldr b0, [sp], #1 -# M4-NEXT: 1 5 0.50 * ldr q0, [sp], #16 -# M4-NEXT: 1 5 0.50 * ldr h0, [sp, #2]! -# M4-NEXT: 1 5 0.50 * ldr q0, [sp, #16]! -# M4-NEXT: 1 5 0.50 * ldr s0, [sp, #4] -# M4-NEXT: 1 5 0.50 * ldr q0, [sp, #16] -# M4-NEXT: 1 5 0.50 * ldr d0, [sp, x0, lsl #3] -# M4-NEXT: 2 6 0.50 * ldr q0, [sp, x0, lsl #4] -# M4-NEXT: 1 5 0.50 * ldr b0, [sp, x0] -# M4-NEXT: 1 5 0.50 * ldr q0, [sp, x0] -# M4-NEXT: 2 6 0.50 * ldr h0, [sp, w0, sxtw #1] -# M4-NEXT: 2 6 0.50 * ldr q0, [sp, w0, uxtw #4] -# M4-NEXT: 2 6 0.50 * ldr s0, [sp, w0, sxtw] -# M4-NEXT: 2 6 0.50 * ldr q0, [sp, w0, uxtw] -# M4-NEXT: 1 5 0.50 * ldp d0, d1, [sp], #16 -# M4-NEXT: 2 5 0.50 * ldp q0, q1, [sp], #32 -# M4-NEXT: 2 5 0.50 * ldp s0, s1, [sp, #8]! -# M4-NEXT: 2 5 1.00 * ldp q0, q1, [sp, #32]! -# M4-NEXT: 1 5 0.50 * ldp d0, d1, [sp, #16] -# M4-NEXT: 1 5 1.00 * ldp q0, q1, [sp, #32] - -# M5-NEXT: 1 6 0.50 * ldr s0, {{\.?}}Ltmp0 -# M5-NEXT: 1 6 0.50 * ldr q0, {{\.?}}Ltmp0 -# M5-NEXT: 1 6 0.50 * ldur d0, [sp, #2] -# M5-NEXT: 1 6 0.50 * ldur q0, [sp, #16] -# M5-NEXT: 1 6 0.50 * ldr b0, [sp], #1 -# M5-NEXT: 1 6 0.50 * ldr q0, [sp], #16 -# M5-NEXT: 1 6 0.50 * ldr h0, [sp, #2]! -# M5-NEXT: 1 6 0.50 * ldr q0, [sp, #16]! -# M5-NEXT: 1 6 0.50 * ldr s0, [sp, #4] -# M5-NEXT: 1 6 0.50 * ldr q0, [sp, #16] -# M5-NEXT: 1 6 0.50 * ldr d0, [sp, x0, lsl #3] -# M5-NEXT: 2 7 0.50 * ldr q0, [sp, x0, lsl #4] -# M5-NEXT: 1 6 0.50 * ldr b0, [sp, x0] -# M5-NEXT: 1 6 0.50 * ldr q0, [sp, x0] -# M5-NEXT: 2 7 0.50 * ldr h0, [sp, w0, sxtw #1] -# M5-NEXT: 2 7 0.50 * ldr q0, [sp, w0, uxtw #4] -# M5-NEXT: 2 7 0.50 * ldr s0, [sp, w0, sxtw] -# M5-NEXT: 2 7 0.50 * ldr q0, [sp, w0, uxtw] -# M5-NEXT: 2 6 0.50 * ldp d0, d1, [sp], #16 -# M5-NEXT: 2 6 1.00 * ldp q0, q1, [sp], #32 -# M5-NEXT: 2 6 0.50 * ldp s0, s1, [sp, #8]! -# M5-NEXT: 2 6 1.00 * ldp q0, q1, [sp, #32]! -# M5-NEXT: 1 6 0.50 * ldp d0, d1, [sp, #16] -# M5-NEXT: 1 6 1.00 * ldp q0, q1, [sp, #32] diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/float-recp.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/float-recp.s deleted file mode 100644 index 05245ad..0000000 --- a/llvm/test/tools/llvm-mca/AArch64/Exynos/float-recp.s +++ /dev/null @@ -1,62 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3 -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4 -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5 - -fmov s31, #1.00000000 -fdiv s30, s31, s30 - -# Newton series for 1 / x. -frecpe s1, s0 -frecps s2, s0, s1 -fmul s1, s1, s2 -frecps s0, s0, s1 -fmul s0, s1, s0 - -# ALL: Iterations: 100 -# ALL-NEXT: Instructions: 700 - -# M3-NEXT: Total Cycles: 1803 -# M4-NEXT: Total Cycles: 1703 -# M5-NEXT: Total Cycles: 1703 - -# ALL-NEXT: Total uOps: 700 - -# ALL: Dispatch Width: 6 - -# M3-NEXT: uOps Per Cycle: 0.39 -# M3-NEXT: IPC: 0.39 -# M3-NEXT: Block RThroughput: 2.0 - -# M4-NEXT: uOps Per Cycle: 0.41 -# M4-NEXT: IPC: 0.41 -# M4-NEXT: Block RThroughput: 1.5 - -# M5-NEXT: uOps Per Cycle: 0.41 -# M5-NEXT: IPC: 0.41 -# M5-NEXT: Block RThroughput: 1.3 - -# ALL: Instruction Info: -# ALL-NEXT: [1]: #uOps -# ALL-NEXT: [2]: Latency -# ALL-NEXT: [3]: RThroughput -# ALL-NEXT: [4]: MayLoad -# ALL-NEXT: [5]: MayStore -# ALL-NEXT: [6]: HasSideEffects (U) - -# ALL: [1] [2] [3] [4] [5] [6] Instructions: -# ALL-NEXT: 1 1 0.33 fmov s31, #1.00000000 - -# M3-NEXT: 1 7 2.00 fdiv s30, s31, s30 -# M3-NEXT: 1 4 0.50 frecpe s1, s0 - -# M4-NEXT: 1 7 1.50 fdiv s30, s31, s30 -# M4-NEXT: 1 3 0.50 frecpe s1, s0 - -# M5-NEXT: 1 7 1.00 fdiv s30, s31, s30 -# M5-NEXT: 1 3 0.50 frecpe s1, s0 - -# ALL-NEXT: 1 4 0.33 frecps s2, s0, s1 -# ALL-NEXT: 1 3 0.33 fmul s1, s1, s2 -# ALL-NEXT: 1 4 0.33 frecps s0, s0, s1 -# ALL-NEXT: 1 3 0.33 fmul s0, s1, s0 diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/float-rsqrt.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/float-rsqrt.s deleted file mode 100644 index fd82cc3..0000000 --- a/llvm/test/tools/llvm-mca/AArch64/Exynos/float-rsqrt.s +++ /dev/null @@ -1,72 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3 -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4 -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5 - -fsqrt s30, s30 -fmov s31, #1.00000000 -fdiv s30, s31, s30 - -# Newton series for 1 / sqrtf(). -frsqrte s1, s0 -fmul s2, s1, s1 -frsqrts s2, s0, s2 -fmul s1, s1, s2 -fmul s2, s1, s1 -frsqrts s0, s0, s2 -fmul s0, s1, s0 - -# ALL: Iterations: 100 -# ALL-NEXT: Instructions: 1000 - -# M3-NEXT: Total Cycles: 2503 -# M4-NEXT: Total Cycles: 2303 -# M5-NEXT: Total Cycles: 2303 - -# ALL-NEXT: Total uOps: 1000 - -# ALL: Dispatch Width: 6 - -# M3-NEXT: uOps Per Cycle: 0.40 -# M3-NEXT: IPC: 0.40 -# M3-NEXT: Block RThroughput: 19.0 - -# M4-NEXT: uOps Per Cycle: 0.43 -# M4-NEXT: IPC: 0.43 -# M4-NEXT: Block RThroughput: 2.0 - -# M5-NEXT: uOps Per Cycle: 0.43 -# M5-NEXT: IPC: 0.43 -# M5-NEXT: Block RThroughput: 2.0 - -# ALL: Instruction Info: -# ALL-NEXT: [1]: #uOps -# ALL-NEXT: [2]: Latency -# ALL-NEXT: [3]: RThroughput -# ALL-NEXT: [4]: MayLoad -# ALL-NEXT: [5]: MayStore -# ALL-NEXT: [6]: HasSideEffects (U) - -# ALL: [1] [2] [3] [4] [5] [6] Instructions: - -# M3-NEXT: 1 18 19.00 fsqrt s30, s30 -# M4-NEXT: 1 8 1.75 fsqrt s30, s30 -# M5-NEXT: 1 8 1.25 fsqrt s30, s30 - -# ALL-NEXT: 1 1 0.33 fmov s31, #1.00000000 - -# M3-NEXT: 1 7 2.00 fdiv s30, s31, s30 -# M3-NEXT: 1 4 0.50 frsqrte s1, s0 - -# M4-NEXT: 1 7 1.50 fdiv s30, s31, s30 -# M4-NEXT: 1 3 0.50 frsqrte s1, s0 - -# M5-NEXT: 1 7 1.00 fdiv s30, s31, s30 -# M5-NEXT: 1 3 0.50 frsqrte s1, s0 - -# ALL-NEXT: 1 3 0.33 fmul s2, s1, s1 -# ALL-NEXT: 1 4 0.33 frsqrts s2, s0, s2 -# ALL-NEXT: 1 3 0.33 fmul s1, s1, s2 -# ALL-NEXT: 1 3 0.33 fmul s2, s1, s1 -# ALL-NEXT: 1 4 0.33 frsqrts s0, s0, s2 -# ALL-NEXT: 1 3 0.33 fmul s0, s1, s0 diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/float-sqrt.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/float-sqrt.s deleted file mode 100644 index 423fae2..0000000 --- a/llvm/test/tools/llvm-mca/AArch64/Exynos/float-sqrt.s +++ /dev/null @@ -1,73 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3 -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4 -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5 - -fsqrt s31, s31 - -# Newton series for sqrtf(). -frsqrte s1, s0 -fmul s2, s1, s1 -frsqrts s2, s0, s2 -fmul s1, s1, s2 -fmul s2, s1, s1 -frsqrts s2, s0, s2 -fmul s2, s2, s0 -fmul s1, s1, s2 -fcmp s0, #0.0 -fcsel s0, s0, s1, eq - -# ALL: Iterations: 100 -# ALL-NEXT: Instructions: 1100 - -# M3-NEXT: Total Cycles: 3203 -# M4-NEXT: Total Cycles: 3103 -# M5-NEXT: Total Cycles: 2803 - -# ALL-NEXT: Total uOps: 1200 - -# ALL: Dispatch Width: 6 - -# M3-NEXT: uOps Per Cycle: 0.37 -# M3-NEXT: IPC: 0.34 -# M3-NEXT: Block RThroughput: 20.0 - -# M4-NEXT: uOps Per Cycle: 0.39 -# M4-NEXT: IPC: 0.35 -# M4-NEXT: Block RThroughput: 2.3 - -# M5-NEXT: uOps Per Cycle: 0.43 -# M5-NEXT: IPC: 0.39 -# M5-NEXT: Block RThroughput: 2.3 - -# ALL: Instruction Info: -# ALL-NEXT: [1]: #uOps -# ALL-NEXT: [2]: Latency -# ALL-NEXT: [3]: RThroughput -# ALL-NEXT: [4]: MayLoad -# ALL-NEXT: [5]: MayStore -# ALL-NEXT: [6]: HasSideEffects (U) - -# ALL: [1] [2] [3] [4] [5] [6] Instructions: - -# M3-NEXT: 1 18 19.00 fsqrt s31, s31 -# M3-NEXT: 1 4 0.50 frsqrte s1, s0 - -# M4-NEXT: 1 8 1.75 fsqrt s31, s31 -# M4-NEXT: 1 3 0.50 frsqrte s1, s0 - -# M5-NEXT: 1 8 1.25 fsqrt s31, s31 -# M5-NEXT: 1 3 0.50 frsqrte s1, s0 - -# ALL-NEXT: 1 3 0.33 fmul s2, s1, s1 -# ALL-NEXT: 1 4 0.33 frsqrts s2, s0, s2 -# ALL-NEXT: 1 3 0.33 fmul s1, s1, s2 -# ALL-NEXT: 1 3 0.33 fmul s2, s1, s1 -# ALL-NEXT: 1 4 0.33 frsqrts s2, s0, s2 -# ALL-NEXT: 1 3 0.33 fmul s2, s2, s0 -# ALL-NEXT: 1 3 0.33 fmul s1, s1, s2 -# ALL-NEXT: 1 2 1.00 fcmp s0, #0.0 - -# M3-NEXT: 2 5 1.00 fcsel s0, s0, s1, eq -# M4-NEXT: 2 5 1.00 fcsel s0, s0, s1, eq -# M5-NEXT: 2 2 1.00 fcsel s0, s0, s1, eq diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/float-store.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/float-store.s deleted file mode 100644 index 55d1d60..0000000 --- a/llvm/test/tools/llvm-mca/AArch64/Exynos/float-store.s +++ /dev/null @@ -1,142 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3 -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4 -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5 - -stur d0, [sp, #2] -stur q0, [sp, #16] - -str b0, [sp], #1 -str q0, [sp], #16 - -str h0, [sp, #2]! -str q0, [sp, #16]! - -str s0, [sp, #4] -str q0, [sp, #16] - -str d0, [sp, x0, lsl #3] -str q0, [sp, x0, lsl #4] - -str b0, [sp, x0] -str q0, [sp, x0] - -str h0, [sp, w0, sxtw #1] -str q0, [sp, w0, uxtw #4] - -str s0, [sp, w0, sxtw] -str q0, [sp, w0, uxtw] - -stp d0, d1, [sp], #16 -stp q0, q1, [sp], #32 - -stp s0, s1, [sp, #8]! -stp q0, q1, [sp, #32]! - -stp d0, d1, [sp, #16] -stp q0, q1, [sp, #32] - -# ALL: Iterations: 100 -# ALL-NEXT: Instructions: 2200 - -# M3-NEXT: Total Cycles: 3203 -# M3-NEXT: Total uOps: 2900 - -# M4-NEXT: Total Cycles: 3203 -# M4-NEXT: Total uOps: 3000 - -# M5-NEXT: Total Cycles: 2803 -# M5-NEXT: Total uOps: 2500 - -# ALL: Dispatch Width: 6 - -# M3-NEXT: uOps Per Cycle: 0.91 -# M3-NEXT: IPC: 0.69 -# M3-NEXT: Block RThroughput: 22.0 - -# M4-NEXT: uOps Per Cycle: 0.94 -# M4-NEXT: IPC: 0.69 -# M4-NEXT: Block RThroughput: 12.5 - -# M5-NEXT: uOps Per Cycle: 0.89 -# M5-NEXT: IPC: 0.78 -# M5-NEXT: Block RThroughput: 11.0 - -# ALL: Instruction Info: -# ALL-NEXT: [1]: #uOps -# ALL-NEXT: [2]: Latency -# ALL-NEXT: [3]: RThroughput -# ALL-NEXT: [4]: MayLoad -# ALL-NEXT: [5]: MayStore -# ALL-NEXT: [6]: HasSideEffects (U) - -# ALL: [1] [2] [3] [4] [5] [6] Instructions: - -# M3-NEXT: 1 1 1.00 * stur d0, [sp, #2] -# M3-NEXT: 1 1 1.00 * stur q0, [sp, #16] -# M3-NEXT: 1 1 1.00 * str b0, [sp], #1 -# M3-NEXT: 1 1 1.00 * str q0, [sp], #16 -# M3-NEXT: 1 1 1.00 * str h0, [sp, #2]! -# M3-NEXT: 1 1 1.00 * str q0, [sp, #16]! -# M3-NEXT: 1 1 1.00 * str s0, [sp, #4] -# M3-NEXT: 1 1 1.00 * str q0, [sp, #16] -# M3-NEXT: 1 1 1.00 * str d0, [sp, x0, lsl #3] -# M3-NEXT: 2 3 1.00 * str q0, [sp, x0, lsl #4] -# M3-NEXT: 1 1 1.00 * str b0, [sp, x0] -# M3-NEXT: 1 1 1.00 * str q0, [sp, x0] -# M3-NEXT: 2 3 1.00 * str h0, [sp, w0, sxtw #1] -# M3-NEXT: 2 3 1.00 * str q0, [sp, w0, uxtw #4] -# M3-NEXT: 2 3 1.00 * str s0, [sp, w0, sxtw] -# M3-NEXT: 2 3 1.00 * str q0, [sp, w0, uxtw] -# M3-NEXT: 1 1 1.00 * stp d0, d1, [sp], #16 -# M3-NEXT: 2 1 1.00 * stp q0, q1, [sp], #32 -# M3-NEXT: 1 1 1.00 * stp s0, s1, [sp, #8]! -# M3-NEXT: 2 1 1.00 * stp q0, q1, [sp, #32]! -# M3-NEXT: 1 1 1.00 * stp d0, d1, [sp, #16] -# M3-NEXT: 1 1 1.00 * stp q0, q1, [sp, #32] - -# M4-NEXT: 1 1 0.50 * stur d0, [sp, #2] -# M4-NEXT: 1 1 0.50 * stur q0, [sp, #16] -# M4-NEXT: 1 1 0.50 * str b0, [sp], #1 -# M4-NEXT: 1 1 0.50 * str q0, [sp], #16 -# M4-NEXT: 1 1 0.50 * str h0, [sp, #2]! -# M4-NEXT: 1 1 0.50 * str q0, [sp, #16]! -# M4-NEXT: 1 1 0.50 * str s0, [sp, #4] -# M4-NEXT: 1 1 0.50 * str q0, [sp, #16] -# M4-NEXT: 1 1 0.50 * str d0, [sp, x0, lsl #3] -# M4-NEXT: 2 3 0.50 * str q0, [sp, x0, lsl #4] -# M4-NEXT: 1 1 0.50 * str b0, [sp, x0] -# M4-NEXT: 1 1 0.50 * str q0, [sp, x0] -# M4-NEXT: 2 3 0.50 * str h0, [sp, w0, sxtw #1] -# M4-NEXT: 2 3 0.50 * str q0, [sp, w0, uxtw #4] -# M4-NEXT: 2 3 0.50 * str s0, [sp, w0, sxtw] -# M4-NEXT: 2 3 0.50 * str q0, [sp, w0, uxtw] -# M4-NEXT: 1 1 0.50 * stp d0, d1, [sp], #16 -# M4-NEXT: 2 1 1.00 * stp q0, q1, [sp], #32 -# M4-NEXT: 1 1 0.50 * stp s0, s1, [sp, #8]! -# M4-NEXT: 2 1 1.00 * stp q0, q1, [sp, #32]! -# M4-NEXT: 1 1 0.50 * stp d0, d1, [sp, #16] -# M4-NEXT: 2 1 1.00 * stp q0, q1, [sp, #32] - -# M5-NEXT: 1 1 0.50 * stur d0, [sp, #2] -# M5-NEXT: 1 1 0.50 * stur q0, [sp, #16] -# M5-NEXT: 1 1 0.50 * str b0, [sp], #1 -# M5-NEXT: 1 1 0.50 * str q0, [sp], #16 -# M5-NEXT: 1 1 0.50 * str h0, [sp, #2]! -# M5-NEXT: 1 1 0.50 * str q0, [sp, #16]! -# M5-NEXT: 1 1 0.50 * str s0, [sp, #4] -# M5-NEXT: 1 1 0.50 * str q0, [sp, #16] -# M5-NEXT: 1 1 0.50 * str d0, [sp, x0, lsl #3] -# M5-NEXT: 2 3 0.50 * str q0, [sp, x0, lsl #4] -# M5-NEXT: 1 1 0.50 * str b0, [sp, x0] -# M5-NEXT: 1 1 0.50 * str q0, [sp, x0] -# M5-NEXT: 1 1 0.50 * str h0, [sp, w0, sxtw #1] -# M5-NEXT: 2 3 0.50 * str q0, [sp, w0, uxtw #4] -# M5-NEXT: 1 1 0.50 * str s0, [sp, w0, sxtw] -# M5-NEXT: 2 3 0.50 * str q0, [sp, w0, uxtw] -# M5-NEXT: 1 1 0.50 * stp d0, d1, [sp], #16 -# M5-NEXT: 1 1 1.00 * stp q0, q1, [sp], #32 -# M5-NEXT: 1 1 0.50 * stp s0, s1, [sp, #8]! -# M5-NEXT: 1 1 1.00 * stp q0, q1, [sp, #32]! -# M5-NEXT: 1 1 0.50 * stp d0, d1, [sp, #16] -# M5-NEXT: 1 1 1.00 * stp q0, q1, [sp, #32] diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/load.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/load.s deleted file mode 100644 index 04f30d3..0000000 --- a/llvm/test/tools/llvm-mca/AArch64/Exynos/load.s +++ /dev/null @@ -1,66 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3 -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4 -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5 - -ldr w0, 1f -ldur x0, [sp, #8] -ldrb w0, [sp], #1 -ldrsh w0, [sp, #2]! -ldr x0, [sp, #8] -ldrb w0, [sp, x31] -ldrsh w0, [sp, x31, lsl #1] -ldr w0, [sp, w31, sxtw] -ldr x0, [sp, w31, uxtw #3] -ldnp w0, w1, [sp, #8] -ldp x0, x1, [sp], #16 -ldpsw x0, x1, [sp, #8]! - -1: - -# ALL: Iterations: 100 -# ALL-NEXT: Instructions: 1200 -# ALL-NEXT: Total Cycles: 1904 - -# M3-NEXT: Total uOps: 1600 -# M4-NEXT: Total uOps: 1400 -# M5-NEXT: Total uOps: 1400 - -# ALL: Dispatch Width: 6 - -# M3-NEXT: uOps Per Cycle: 0.84 -# M4-NEXT: uOps Per Cycle: 0.74 -# M5-NEXT: uOps Per Cycle: 0.74 - -# ALL-NEXT: IPC: 0.63 -# ALL-NEXT: Block RThroughput: 6.0 - -# ALL: Instruction Info: -# ALL-NEXT: [1]: #uOps -# ALL-NEXT: [2]: Latency -# ALL-NEXT: [3]: RThroughput -# ALL-NEXT: [4]: MayLoad -# ALL-NEXT: [5]: MayStore -# ALL-NEXT: [6]: HasSideEffects (U) - -# ALL: [1] [2] [3] [4] [5] [6] Instructions: -# ALL-NEXT: 1 4 0.50 * ldr w0, {{\.?}}Ltmp0 -# ALL-NEXT: 1 4 0.50 * ldur x0, [sp, #8] -# ALL-NEXT: 1 4 0.50 * ldrb w0, [sp], #1 -# ALL-NEXT: 1 4 0.50 * ldrsh w0, [sp, #2]! -# ALL-NEXT: 1 4 0.50 * ldr x0, [sp, #8] -# ALL-NEXT: 1 4 0.50 * ldrb w0, [sp, xzr] -# ALL-NEXT: 1 5 0.50 * ldrsh w0, [sp, xzr, lsl #1] - -# M3-NEXT: 2 5 0.50 * ldr w0, [sp, wzr, sxtw] -# M3-NEXT: 2 5 0.50 * ldr x0, [sp, wzr, uxtw #3] - -# M4-NEXT: 1 5 0.50 * ldr w0, [sp, wzr, sxtw] -# M4-NEXT: 1 5 0.50 * ldr x0, [sp, wzr, uxtw #3] - -# M5-NEXT: 1 5 0.50 * ldr w0, [sp, wzr, sxtw] -# M5-NEXT: 1 5 0.50 * ldr x0, [sp, wzr, uxtw #3] - -# ALL-NEXT: 1 4 0.50 * ldnp w0, w1, [sp, #8] -# ALL-NEXT: 2 4 0.50 * ldp x0, x1, [sp], #16 -# ALL-NEXT: 2 4 0.50 * ldpsw x0, x1, [sp, #8]! diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/scheduler-queue-usage.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/scheduler-queue-usage.s index b3bbec5..9e8c071 100644 --- a/llvm/test/tools/llvm-mca/AArch64/Exynos/scheduler-queue-usage.s +++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/scheduler-queue-usage.s @@ -1,7 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -iterations=1 -scheduler-stats -resource-pressure=false -instruction-info=false < %s | FileCheck %s -check-prefixes=ALL,M3 # RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -iterations=1 -scheduler-stats -resource-pressure=false -instruction-info=false < %s | FileCheck %s -check-prefixes=ALL,M4 -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -iterations=1 -scheduler-stats -resource-pressure=false -instruction-info=false < %s | FileCheck %s -check-prefixes=ALL,M5 b main @@ -20,11 +19,6 @@ # M4-NEXT: IPC: 0.50 # M4-NEXT: Block RThroughput: 0.2 -# M5: Dispatch Width: 6 -# M5-NEXT: uOps Per Cycle: 0.50 -# M5-NEXT: IPC: 0.50 -# M5-NEXT: Block RThroughput: 0.2 - # ALL: Schedulers - number of cycles where we saw N micro opcodes issued: # ALL-NEXT: [# issued], [# cycles] # ALL-NEXT: 0, 1 (50.0%) diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/shifted-register.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/shifted-register.s index 8d885f4..6a1c81b 100644 --- a/llvm/test/tools/llvm-mca/AArch64/Exynos/shifted-register.s +++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/shifted-register.s @@ -1,7 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM3 # RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM4 -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,EM5 adds w0, w1, w2, lsl #0 sub x3, x4, x5, lsr #1 @@ -10,14 +9,13 @@ adds w12, w13, w14, lsl #4 sub x15, x16, x17, lsr #6 ands x18, x19, x20, lsl #8 - eor w21, w22, w23, asr #10 + orr w21, w22, w23, asr #10 # ALL: Iterations: 100 # ALL-NEXT: Instructions: 800 # EM3-NEXT: Total Cycles: 354 # EM4-NEXT: Total Cycles: 329 -# EM5-NEXT: Total Cycles: 220 # ALL-NEXT: Total uOps: 800 @@ -31,11 +29,6 @@ # EM4-NEXT: IPC: 2.43 # EM4-NEXT: Block RThroughput: 3.3 -# EM5: Dispatch Width: 6 -# EM5-NEXT: uOps Per Cycle: 3.64 -# EM5-NEXT: IPC: 3.64 -# EM5-NEXT: Block RThroughput: 1.5 - # ALL: Instruction Info: # ALL-NEXT: [1]: #uOps # ALL-NEXT: [2]: Latency @@ -53,7 +46,7 @@ # EM3-NEXT: 1 2 0.50 adds w12, w13, w14, lsl #4 # EM3-NEXT: 1 2 0.50 sub x15, x16, x17, lsr #6 # EM3-NEXT: 1 2 0.50 ands x18, x19, x20, lsl #8 -# EM3-NEXT: 1 2 0.50 eor w21, w22, w23, asr #10 +# EM3-NEXT: 1 2 0.50 orr w21, w22, w23, asr #10 # EM4-NEXT: 1 1 0.25 adds w0, w1, w2 # EM4-NEXT: 1 2 0.50 sub x3, x4, x5, lsr #1 @@ -62,13 +55,4 @@ # EM4-NEXT: 1 2 0.50 adds w12, w13, w14, lsl #4 # EM4-NEXT: 1 2 0.50 sub x15, x16, x17, lsr #6 # EM4-NEXT: 1 1 0.25 ands x18, x19, x20, lsl #8 -# EM4-NEXT: 1 2 0.50 eor w21, w22, w23, asr #10 - -# EM5-NEXT: 1 1 0.17 adds w0, w1, w2 -# EM5-NEXT: 1 2 0.50 sub x3, x4, x5, lsr #1 -# EM5-NEXT: 1 1 0.25 ands x6, x7, x8, lsl #2 -# EM5-NEXT: 1 2 0.33 orr w9, w10, w11, asr #3 -# EM5-NEXT: 1 2 0.33 adds w12, w13, w14, lsl #4 -# EM5-NEXT: 1 2 0.50 sub x15, x16, x17, lsr #6 -# EM5-NEXT: 1 1 0.25 ands x18, x19, x20, lsl #8 -# EM5-NEXT: 1 2 0.33 eor w21, w22, w23, asr #10 +# EM4-NEXT: 1 2 0.50 orr w21, w22, w23, asr #10 diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/store.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/store.s deleted file mode 100644 index b86cdac..0000000 --- a/llvm/test/tools/llvm-mca/AArch64/Exynos/store.s +++ /dev/null @@ -1,82 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3 -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4 -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5 - -stur x0, [sp, #8] -strb w0, [sp], #1 -strh w0, [sp, #2]! -str x0, [sp, #8] -strb w0, [sp, x31] -strh w0, [sp, x31, lsl #1] -str w0, [sp, w31, sxtw] -str x0, [sp, w31, uxtw #3] -stnp w0, w1, [sp, #8] -stp x0, x1, [sp], #16 -stp w0, w1, [sp, #8]! - -# ALL: Iterations: 100 -# ALL-NEXT: Instructions: 1100 -# ALL-NEXT: Total Cycles: 1303 - -# M3-NEXT: Total uOps: 1300 -# M4-NEXT: Total uOps: 1100 -# M5-NEXT: Total uOps: 1100 - -# ALL: Dispatch Width: 6 - -# M3-NEXT: uOps Per Cycle: 1.00 -# M4-NEXT: uOps Per Cycle: 0.84 -# M5-NEXT: uOps Per Cycle: 0.84 - -# ALL-NEXT: IPC: 0.84 - -# M3-NEXT: Block RThroughput: 11.0 -# M4-NEXT: Block RThroughput: 5.5 -# M5-NEXT: Block RThroughput: 5.5 - -# ALL: Instruction Info: -# ALL-NEXT: [1]: #uOps -# ALL-NEXT: [2]: Latency -# ALL-NEXT: [3]: RThroughput -# ALL-NEXT: [4]: MayLoad -# ALL-NEXT: [5]: MayStore -# ALL-NEXT: [6]: HasSideEffects (U) - -# ALL: [1] [2] [3] [4] [5] [6] Instructions: - -# M3-NEXT: 1 1 1.00 * stur x0, [sp, #8] -# M3-NEXT: 1 1 1.00 * strb w0, [sp], #1 -# M3-NEXT: 1 1 1.00 * strh w0, [sp, #2]! -# M3-NEXT: 1 1 1.00 * str x0, [sp, #8] -# M3-NEXT: 1 1 1.00 * strb w0, [sp, xzr] -# M3-NEXT: 1 1 1.00 * strh w0, [sp, xzr, lsl #1] -# M3-NEXT: 2 2 1.00 * str w0, [sp, wzr, sxtw] -# M3-NEXT: 2 2 1.00 * str x0, [sp, wzr, uxtw #3] -# M3-NEXT: 1 1 1.00 * stnp w0, w1, [sp, #8] -# M3-NEXT: 1 1 1.00 * stp x0, x1, [sp], #16 -# M3-NEXT: 1 1 1.00 * stp w0, w1, [sp, #8]! - -# M4-NEXT: 1 1 0.50 * stur x0, [sp, #8] -# M4-NEXT: 1 1 0.50 * strb w0, [sp], #1 -# M4-NEXT: 1 1 0.50 * strh w0, [sp, #2]! -# M4-NEXT: 1 1 0.50 * str x0, [sp, #8] -# M4-NEXT: 1 1 0.50 * strb w0, [sp, xzr] -# M4-NEXT: 1 1 0.50 * strh w0, [sp, xzr, lsl #1] -# M4-NEXT: 1 2 0.50 * str w0, [sp, wzr, sxtw] -# M4-NEXT: 1 2 0.50 * str x0, [sp, wzr, uxtw #3] -# M4-NEXT: 1 1 0.50 * stnp w0, w1, [sp, #8] -# M4-NEXT: 1 1 0.50 * stp x0, x1, [sp], #16 -# M4-NEXT: 1 1 0.50 * stp w0, w1, [sp, #8]! - -# M5-NEXT: 1 1 0.50 * stur x0, [sp, #8] -# M5-NEXT: 1 1 0.50 * strb w0, [sp], #1 -# M5-NEXT: 1 1 0.50 * strh w0, [sp, #2]! -# M5-NEXT: 1 1 0.50 * str x0, [sp, #8] -# M5-NEXT: 1 1 0.50 * strb w0, [sp, xzr] -# M5-NEXT: 1 1 0.50 * strh w0, [sp, xzr, lsl #1] -# M5-NEXT: 1 2 0.50 * str w0, [sp, wzr, sxtw] -# M5-NEXT: 1 2 0.50 * str x0, [sp, wzr, uxtw #3] -# M5-NEXT: 1 1 0.50 * stnp w0, w1, [sp, #8] -# M5-NEXT: 1 1 0.50 * stp x0, x1, [sp], #16 -# M5-NEXT: 1 1 0.50 * stp w0, w1, [sp, #8]! diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/zero-latency-move.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/zero-latency-move.s index 3fecb1e..a4229110 100644 --- a/llvm/test/tools/llvm-mca/AArch64/Exynos/zero-latency-move.s +++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/zero-latency-move.s @@ -1,7 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3 # RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4 -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5 mov x0, x1 mov sp, x0 @@ -23,13 +22,21 @@ # ALL: Iterations: 100 # ALL-NEXT: Instructions: 1000 -# ALL-NEXT: Total Cycles: 172 + +# M3-NEXT: Total Cycles: 172 +# M4-NEXT: Total Cycles: 172 + # ALL-NEXT: Total uOps: 1000 -# ALL: Dispatch Width: 6 -# ALL-NEXT: uOps Per Cycle: 5.81 -# ALL-NEXT: IPC: 5.81 -# ALL-NEXT: Block RThroughput: 1.7 +# M3: Dispatch Width: 6 +# M3-NEXT: uOps Per Cycle: 5.81 +# M3-NEXT: IPC: 5.81 +# M3-NEXT: Block RThroughput: 1.7 + +# M4: Dispatch Width: 6 +# M4-NEXT: uOps Per Cycle: 5.81 +# M4-NEXT: IPC: 5.81 +# M4-NEXT: Block RThroughput: 1.7 # ALL: Instruction Info: # ALL-NEXT: [1]: #uOps @@ -40,21 +47,25 @@ # ALL-NEXT: [6]: HasSideEffects (U) # ALL: [1] [2] [3] [4] [5] [6] Instructions: -# ALL-NEXT: 1 0 0.17 mov x0, x1 -# ALL-NEXT: 1 0 0.17 mov sp, x0 -# ALL-NEXT: 1 0 0.17 mov w0, #12816 +# M3-NEXT: 1 0 0.17 mov x0, x1 +# M3-NEXT: 1 0 0.17 mov sp, x0 +# M3-NEXT: 1 0 0.17 mov w0, #12816 # M3-NEXT: 1 1 0.25 add w0, w1, #0 -# M4-NEXT: 1 1 0.25 add w0, w1, #0 -# M5-NEXT: 1 1 0.17 add w0, w1, #0 - -# ALL-NEXT: 1 0 0.17 adr x0, {{\.?}}Ltmp0 -# ALL-NEXT: 1 4 0.50 * ldr x0, [x0] -# ALL-NEXT: 1 0 0.17 adrp x0, {{\.?}}Ltmp0 -# ALL-NEXT: 1 1 0.25 add x0, x0, :lo12:{{\.?}}Ltmp0 - +# M3-NEXT: 1 0 0.17 adr x0, {{\.?}}Ltmp0 +# M3-NEXT: 1 4 0.50 * ldr x0, [x0] +# M3-NEXT: 1 0 0.17 adrp x0, {{\.?}}Ltmp0 +# M3-NEXT: 1 1 0.25 add x0, x0, :lo12:{{\.?}}Ltmp0 # M3-NEXT: 1 1 0.33 fmov s0, s1 -# M4-NEXT: 1 1 0.33 fmov s0, s1 -# M5-NEXT: 1 2 0.33 fmov s0, s1 +# M3-NEXT: 1 0 0.17 movi d0, #0000000000000000 -# ALL-NEXT: 1 0 0.17 movi d0, #0000000000000000 +# M4-NEXT: 1 0 0.17 mov x0, x1 +# M4-NEXT: 1 0 0.17 mov sp, x0 +# M4-NEXT: 1 0 0.17 mov w0, #12816 +# M4-NEXT: 1 1 0.25 add w0, w1, #0 +# M4-NEXT: 1 0 0.17 adr x0, {{\.?}}Ltmp0 +# M4-NEXT: 1 4 0.50 * ldr x0, [x0] +# M4-NEXT: 1 0 0.17 adrp x0, {{\.?}}Ltmp0 +# M4-NEXT: 1 1 0.25 add x0, x0, :lo12:{{\.?}}Ltmp0 +# M4-NEXT: 1 1 0.33 fmov s0, s1 +# M4-NEXT: 1 0 0.17 movi d0, #0000000000000000 -- 2.7.4