perf/x86/uncore: Don't WARN_ON_ONCE() for a broken discovery table
authorKan Liang <kan.liang@linux.intel.com>
Thu, 12 Jan 2023 20:01:05 +0000 (12:01 -0800)
committerPeter Zijlstra <peterz@infradead.org>
Fri, 20 Jan 2023 23:06:13 +0000 (00:06 +0100)
The kernel warning message is triggered, when SPR MCC is used.

[   17.945331] ------------[ cut here ]------------
[   17.946305] WARNING: CPU: 65 PID: 1 at
arch/x86/events/intel/uncore_discovery.c:184
intel_uncore_has_discovery_tables+0x4c0/0x65c
[   17.946305] Modules linked in:
[   17.946305] CPU: 65 PID: 1 Comm: swapper/0 Not tainted
5.4.17-2136.313.1-X10-2c+ #4

It's caused by the broken discovery table of UPI.

The discovery tables are from hardware. Except for dropping the broken
information, there is nothing Linux can do. Using WARN_ON_ONCE() is
overkilled.

Use the pr_info() to replace WARN_ON_ONCE(), and specify what uncore unit
is dropped and the reason.

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Michael Petlan <mpetlan@redhat.com>
Link: https://lore.kernel.org/r/20230112200105.733466-6-kan.liang@linux.intel.com
arch/x86/events/intel/uncore_discovery.c

index abb5119..cb488e4 100644 (file)
@@ -128,13 +128,21 @@ uncore_insert_box_info(struct uncore_unit_discovery *unit,
        unsigned int *box_offset, *ids;
        int i;
 
-       if (WARN_ON_ONCE(!unit->ctl || !unit->ctl_offset || !unit->ctr_offset))
+       if (!unit->ctl || !unit->ctl_offset || !unit->ctr_offset) {
+               pr_info("Invalid address is detected for uncore type %d box %d, "
+                       "Disable the uncore unit.\n",
+                       unit->box_type, unit->box_id);
                return;
+       }
 
        if (parsed) {
                type = search_uncore_discovery_type(unit->box_type);
-               if (WARN_ON_ONCE(!type))
+               if (!type) {
+                       pr_info("A spurious uncore type %d is detected, "
+                               "Disable the uncore type.\n",
+                               unit->box_type);
                        return;
+               }
                /* Store the first box of each die */
                if (!type->box_ctrl_die[die])
                        type->box_ctrl_die[die] = unit->ctl;
@@ -169,8 +177,12 @@ uncore_insert_box_info(struct uncore_unit_discovery *unit,
                ids[i] = type->ids[i];
                box_offset[i] = type->box_offset[i];
 
-               if (WARN_ON_ONCE(unit->box_id == ids[i]))
+               if (unit->box_id == ids[i]) {
+                       pr_info("Duplicate uncore type %d box ID %d is detected, "
+                               "Drop the duplicate uncore unit.\n",
+                               unit->box_type, unit->box_id);
                        goto free_ids;
+               }
        }
        ids[i] = unit->box_id;
        box_offset[i] = unit->ctl - type->box_ctrl;