def HWVALU : ProcResource<1> {
let BufferSize = 1;
}
+def HWTransVALU : ProcResource<1> { // Transcendental VALU
+ let BufferSize = 1;
+}
def HWRC : ProcResource<1> { // Register destination cache
let BufferSize = 1;
}
def : HWWriteRes<Write32Bit, [HWVALU, HWRC], 5>;
def : HWWriteRes<WriteFloatCvt, [HWVALU, HWRC], 5>;
def : HWWriteRes<Write64Bit, [HWVALU, HWRC], 6>;
-def : HWWriteRes<WriteTrans32, [HWVALU, HWRC], 10>;
+def : HWWriteRes<WriteTrans32, [HWTransVALU, HWRC], 10>;
def : HWWriteRes<WriteQuarterRate32, [HWVALU, HWRC], 8>;
def : HWWriteRes<WriteFloatFMA, [HWVALU, HWRC], 5>;
def : HWWriteRes<WriteDouble, [HWVALU, HWRC], 22>;
def : HWWriteRes<WriteDoubleAdd, [HWVALU, HWRC], 22>;
def : HWWriteRes<WriteDoubleCvt, [HWVALU, HWRC], 22>;
def : HWWriteRes<WriteIntMul, [HWVALU, HWRC], 8>;
-def : HWWriteRes<WriteTrans64, [HWVALU, HWRC], 24>;
+def : HWWriteRes<WriteTrans64, [HWVALU, HWTransVALU, HWRC], 24>;
def : HWWriteRes<WriteBranch, [HWBranch], 32>;
def : HWWriteRes<WriteExport, [HWExport, HWRC], 16>;
# CHECK-NEXT: [2] - HWLGKM
# CHECK-NEXT: [3] - HWRC
# CHECK-NEXT: [4] - HWSALU
-# CHECK-NEXT: [5] - HWVALU
-# CHECK-NEXT: [6] - HWVMEM
+# CHECK-NEXT: [5] - HWTransVALU
+# CHECK-NEXT: [6] - HWVALU
+# CHECK-NEXT: [7] - HWVMEM
# CHECK: Resource pressure per iteration:
-# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6]
-# CHECK-NEXT: - - - 3.00 - 3.00 -
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7]
+# CHECK-NEXT: - - - 3.00 - - 3.00 -
# CHECK: Resource pressure by instruction:
-# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: - - - 1.00 - 1.00 - v_add_f32_e32 v0, v0, v0
-# CHECK-NEXT: - - - 1.00 - 1.00 - v_add_f32_e32 v1, v1, v1
-# CHECK-NEXT: - - - 1.00 - 1.00 - v_add_f32_e32 v2, v1, v0
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions:
+# CHECK-NEXT: - - - 1.00 - - 1.00 - v_add_f32_e32 v0, v0, v0
+# CHECK-NEXT: - - - 1.00 - - 1.00 - v_add_f32_e32 v1, v1, v1
+# CHECK-NEXT: - - - 1.00 - - 1.00 - v_add_f32_e32 v2, v1, v0
# CHECK: Timeline view:
# CHECK-NEXT: 01
# CHECK-NEXT: [2] - HWLGKM
# CHECK-NEXT: [3] - HWRC
# CHECK-NEXT: [4] - HWSALU
-# CHECK-NEXT: [5] - HWVALU
-# CHECK-NEXT: [6] - HWVMEM
+# CHECK-NEXT: [5] - HWTransVALU
+# CHECK-NEXT: [6] - HWVALU
+# CHECK-NEXT: [7] - HWVMEM
# CHECK: Resource pressure per iteration:
-# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6]
-# CHECK-NEXT: - - - 29.00 1.00 28.00 -
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7]
+# CHECK-NEXT: - - - 29.00 1.00 3.00 28.00 -
# CHECK: Resource pressure by instruction:
-# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: - - - 1.00 - 1.00 - v_cvt_i32_f64_e32 v0, v[0:1]
-# CHECK-NEXT: - - - 1.00 - 1.00 - v_cvt_f64_i32_e32 v[2:3], v2
-# CHECK-NEXT: - - - 1.00 - 1.00 - v_cvt_f32_f64_e32 v4, v[4:5]
-# CHECK-NEXT: - - - 1.00 - 1.00 - v_cvt_f64_f32_e32 v[6:7], v6
-# CHECK-NEXT: - - - 1.00 - 1.00 - v_cvt_u32_f64_e32 v8, v[8:9]
-# CHECK-NEXT: - - - 1.00 - 1.00 - v_cvt_f64_u32_e32 v[10:11], v10
-# CHECK-NEXT: - - - 1.00 - 1.00 - v_frexp_exp_i32_f64_e32 v0, v[0:1]
-# CHECK-NEXT: - - - 1.00 - 1.00 - v_frexp_mant_f64_e32 v[2:3], v[2:3]
-# CHECK-NEXT: - - - 1.00 - 1.00 - v_fract_f64_e32 v[4:5], v[4:5]
-# CHECK-NEXT: - - - 1.00 - 1.00 - v_trunc_f64_e32 v[0:1], v[0:1]
-# CHECK-NEXT: - - - 1.00 - 1.00 - v_ceil_f64_e32 v[2:3], v[2:3]
-# CHECK-NEXT: - - - 1.00 - 1.00 - v_rndne_f64_e32 v[4:5], v[4:5]
-# CHECK-NEXT: - - - 1.00 - 1.00 - v_floor_f64_e32 v[6:7], v[6:7]
-# CHECK-NEXT: - - - 1.00 - 1.00 - v_fma_f64 v[0:1], v[0:1], v[0:1], v[0:1]
-# CHECK-NEXT: - - - 1.00 - 1.00 - v_add_f64 v[2:3], v[2:3], v[2:3]
-# CHECK-NEXT: - - - 1.00 - 1.00 - v_mul_f64 v[4:5], v[4:5], v[4:5]
-# CHECK-NEXT: - - - 1.00 - 1.00 - v_min_f64 v[6:7], v[6:7], v[6:7]
-# CHECK-NEXT: - - - 1.00 - 1.00 - v_max_f64 v[8:9], v[8:9], v[8:9]
-# CHECK-NEXT: - - - 1.00 - 1.00 - v_div_fmas_f64 v[0:1], v[0:1], v[0:1], v[0:1]
-# CHECK-NEXT: - - - 1.00 - 1.00 - v_div_fixup_f64 v[0:1], v[0:1], v[0:1], v[0:1]
-# CHECK-NEXT: - - - 1.00 - 1.00 - v_ldexp_f64 v[2:3], v[2:3], v0
-# CHECK-NEXT: - - - 2.00 1.00 1.00 - v_div_scale_f64 v[0:1], vcc_lo, v[0:1], v[0:1], v[0:1]
-# CHECK-NEXT: - - - 1.00 - 1.00 - v_trig_preop_f64 v[2:3], v[2:3], v0
-# CHECK-NEXT: - - - 1.00 - 1.00 - v_cmp_eq_f64_e32 vcc_lo, v[0:1], v[0:1]
-# CHECK-NEXT: - - - 1.00 - 1.00 - v_cmp_class_f64_e64 vcc_lo, v[2:3], s0
-# CHECK-NEXT: - - - 1.00 - 1.00 - v_rcp_f64_e32 v[0:1], v[0:1]
-# CHECK-NEXT: - - - 1.00 - 1.00 - v_rsq_f64_e32 v[2:3], v[2:3]
-# CHECK-NEXT: - - - 1.00 - 1.00 - v_sqrt_f64_e32 v[4:5], v[4:5]
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions:
+# CHECK-NEXT: - - - 1.00 - - 1.00 - v_cvt_i32_f64_e32 v0, v[0:1]
+# CHECK-NEXT: - - - 1.00 - - 1.00 - v_cvt_f64_i32_e32 v[2:3], v2
+# CHECK-NEXT: - - - 1.00 - - 1.00 - v_cvt_f32_f64_e32 v4, v[4:5]
+# CHECK-NEXT: - - - 1.00 - - 1.00 - v_cvt_f64_f32_e32 v[6:7], v6
+# CHECK-NEXT: - - - 1.00 - - 1.00 - v_cvt_u32_f64_e32 v8, v[8:9]
+# CHECK-NEXT: - - - 1.00 - - 1.00 - v_cvt_f64_u32_e32 v[10:11], v10
+# CHECK-NEXT: - - - 1.00 - - 1.00 - v_frexp_exp_i32_f64_e32 v0, v[0:1]
+# CHECK-NEXT: - - - 1.00 - - 1.00 - v_frexp_mant_f64_e32 v[2:3], v[2:3]
+# CHECK-NEXT: - - - 1.00 - - 1.00 - v_fract_f64_e32 v[4:5], v[4:5]
+# CHECK-NEXT: - - - 1.00 - - 1.00 - v_trunc_f64_e32 v[0:1], v[0:1]
+# CHECK-NEXT: - - - 1.00 - - 1.00 - v_ceil_f64_e32 v[2:3], v[2:3]
+# CHECK-NEXT: - - - 1.00 - - 1.00 - v_rndne_f64_e32 v[4:5], v[4:5]
+# CHECK-NEXT: - - - 1.00 - - 1.00 - v_floor_f64_e32 v[6:7], v[6:7]
+# CHECK-NEXT: - - - 1.00 - - 1.00 - v_fma_f64 v[0:1], v[0:1], v[0:1], v[0:1]
+# CHECK-NEXT: - - - 1.00 - - 1.00 - v_add_f64 v[2:3], v[2:3], v[2:3]
+# CHECK-NEXT: - - - 1.00 - - 1.00 - v_mul_f64 v[4:5], v[4:5], v[4:5]
+# CHECK-NEXT: - - - 1.00 - - 1.00 - v_min_f64 v[6:7], v[6:7], v[6:7]
+# CHECK-NEXT: - - - 1.00 - - 1.00 - v_max_f64 v[8:9], v[8:9], v[8:9]
+# CHECK-NEXT: - - - 1.00 - - 1.00 - v_div_fmas_f64 v[0:1], v[0:1], v[0:1], v[0:1]
+# CHECK-NEXT: - - - 1.00 - - 1.00 - v_div_fixup_f64 v[0:1], v[0:1], v[0:1], v[0:1]
+# CHECK-NEXT: - - - 1.00 - - 1.00 - v_ldexp_f64 v[2:3], v[2:3], v0
+# CHECK-NEXT: - - - 2.00 1.00 - 1.00 - v_div_scale_f64 v[0:1], vcc_lo, v[0:1], v[0:1], v[0:1]
+# CHECK-NEXT: - - - 1.00 - - 1.00 - v_trig_preop_f64 v[2:3], v[2:3], v0
+# CHECK-NEXT: - - - 1.00 - - 1.00 - v_cmp_eq_f64_e32 vcc_lo, v[0:1], v[0:1]
+# CHECK-NEXT: - - - 1.00 - - 1.00 - v_cmp_class_f64_e64 vcc_lo, v[2:3], s0
+# CHECK-NEXT: - - - 1.00 - 1.00 1.00 - v_rcp_f64_e32 v[0:1], v[0:1]
+# CHECK-NEXT: - - - 1.00 - 1.00 1.00 - v_rsq_f64_e32 v[2:3], v[2:3]
+# CHECK-NEXT: - - - 1.00 - 1.00 1.00 - v_sqrt_f64_e32 v[4:5], v[4:5]
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789 0123456789 0123456789 0
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=amdgcn -mcpu=gfx1010 --timeline --iterations=1 < %s | FileCheck %s
+
+v_log_f32 v0, v0
+v_rcp_f32 v0, v0
+v_rsq_f32 v1, v1
+v_sqrt_f32 v2, v0
+v_rcp_f64 v[0:1], v[0:1]
+v_rsq_f64 v[1:2], v[1:2]
+v_sqrt_f64 v[2:3], v[0:1]
+
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 7
+# CHECK-NEXT: Total Cycles: 94
+# CHECK-NEXT: Total uOps: 7
+
+# CHECK: Dispatch Width: 1
+# CHECK-NEXT: uOps Per Cycle: 0.07
+# CHECK-NEXT: IPC: 0.07
+# CHECK-NEXT: Block RThroughput: 7.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 10 1.00 U v_log_f32_e32 v0, v0
+# CHECK-NEXT: 1 10 1.00 U v_rcp_f32_e32 v0, v0
+# CHECK-NEXT: 1 10 1.00 U v_rsq_f32_e32 v1, v1
+# CHECK-NEXT: 1 10 1.00 U v_sqrt_f32_e32 v2, v0
+# CHECK-NEXT: 1 24 1.00 U v_rcp_f64_e32 v[0:1], v[0:1]
+# CHECK-NEXT: 1 24 1.00 U v_rsq_f64_e32 v[1:2], v[1:2]
+# CHECK-NEXT: 1 24 1.00 U v_sqrt_f64_e32 v[2:3], v[0:1]
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - HWBranch
+# CHECK-NEXT: [1] - HWExport
+# CHECK-NEXT: [2] - HWLGKM
+# CHECK-NEXT: [3] - HWRC
+# CHECK-NEXT: [4] - HWSALU
+# CHECK-NEXT: [5] - HWTransVALU
+# CHECK-NEXT: [6] - HWVALU
+# CHECK-NEXT: [7] - HWVMEM
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7]
+# CHECK-NEXT: - - - 7.00 - 7.00 3.00 -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions:
+# CHECK-NEXT: - - - 1.00 - 1.00 - - v_log_f32_e32 v0, v0
+# CHECK-NEXT: - - - 1.00 - 1.00 - - v_rcp_f32_e32 v0, v0
+# CHECK-NEXT: - - - 1.00 - 1.00 - - v_rsq_f32_e32 v1, v1
+# CHECK-NEXT: - - - 1.00 - 1.00 - - v_sqrt_f32_e32 v2, v0
+# CHECK-NEXT: - - - 1.00 - 1.00 1.00 - v_rcp_f64_e32 v[0:1], v[0:1]
+# CHECK-NEXT: - - - 1.00 - 1.00 1.00 - v_rsq_f64_e32 v[1:2], v[1:2]
+# CHECK-NEXT: - - - 1.00 - 1.00 1.00 - v_sqrt_f64_e32 v[2:3], v[0:1]
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeeeE . . . . . . . . . . . . v_log_f32_e32 v0, v0
+# CHECK-NEXT: [0,1] . . DeeeeeeeeeE . . . . . . . . . . v_rcp_f32_e32 v0, v0
+# CHECK-NEXT: [0,2] . . .DeeeeeeeeeE . . . . . . . . . . v_rsq_f32_e32 v1, v1
+# CHECK-NEXT: [0,3] . . . . DeeeeeeeeeE . . . . . . . . v_sqrt_f32_e32 v2, v0
+# CHECK-NEXT: [0,4] . . . . .DeeeeeeeeeeeeeeeeeeeeeeeE . . . . . v_rcp_f64_e32 v[0:1], v[0:1]
+# CHECK-NEXT: [0,5] . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeeeE v_rsq_f64_e32 v[1:2], v[1:2]
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 0.0 0.0 0.0 v_log_f32_e32 v0, v0
+# CHECK-NEXT: 1. 1 0.0 0.0 0.0 v_rcp_f32_e32 v0, v0
+# CHECK-NEXT: 2. 1 0.0 0.0 0.0 v_rsq_f32_e32 v1, v1
+# CHECK-NEXT: 3. 1 0.0 0.0 0.0 v_sqrt_f32_e32 v2, v0
+# CHECK-NEXT: 4. 1 0.0 0.0 0.0 v_rcp_f64_e32 v[0:1], v[0:1]
+# CHECK-NEXT: 5. 1 0.0 0.0 0.0 v_rsq_f64_e32 v[1:2], v[1:2]
+# CHECK-NEXT: 6. 1 0.0 0.0 0.0 v_sqrt_f64_e32 v[2:3], v[0:1]
+# CHECK-NEXT: 1 0.0 0.0 0.0 <total>