iommu/vt-d: Fix an IOMMU perfmon warning when CPU hotplug
authorKan Liang <kan.liang@linux.intel.com>
Wed, 29 Mar 2023 13:47:21 +0000 (21:47 +0800)
committerJoerg Roedel <jroedel@suse.de>
Fri, 31 Mar 2023 08:06:16 +0000 (10:06 +0200)
A warning can be triggered when hotplug CPU 0.
$ echo 0 > /sys/devices/system/cpu/cpu0/online

 ------------[ cut here ]------------
 Voluntary context switch within RCU read-side critical section!
 WARNING: CPU: 0 PID: 19 at kernel/rcu/tree_plugin.h:318
          rcu_note_context_switch+0x4f4/0x580
 RIP: 0010:rcu_note_context_switch+0x4f4/0x580
 Call Trace:
  <TASK>
  ? perf_event_update_userpage+0x104/0x150
  __schedule+0x8d/0x960
  ? perf_event_set_state.part.82+0x11/0x50
  schedule+0x44/0xb0
  schedule_timeout+0x226/0x310
  ? __perf_event_disable+0x64/0x1a0
  ? _raw_spin_unlock+0x14/0x30
  wait_for_completion+0x94/0x130
  __wait_rcu_gp+0x108/0x130
  synchronize_rcu+0x67/0x70
  ? invoke_rcu_core+0xb0/0xb0
  ? __bpf_trace_rcu_stall_warning+0x10/0x10
  perf_pmu_migrate_context+0x121/0x370
  iommu_pmu_cpu_offline+0x6a/0xa0
  ? iommu_pmu_del+0x1e0/0x1e0
  cpuhp_invoke_callback+0x129/0x510
  cpuhp_thread_fun+0x94/0x150
  smpboot_thread_fn+0x183/0x220
  ? sort_range+0x20/0x20
  kthread+0xe6/0x110
  ? kthread_complete_and_exit+0x20/0x20
  ret_from_fork+0x1f/0x30
  </TASK>
 ---[ end trace 0000000000000000 ]---

The synchronize_rcu() will be invoked in the perf_pmu_migrate_context(),
when migrating a PMU to a new CPU. However, the current for_each_iommu()
is within RCU read-side critical section.

Two methods were considered to fix the issue.
- Use the dmar_global_lock to replace the RCU read lock when going
  through the drhd list. But it triggers a lockdep warning.
- Use the cpuhp_setup_state_multi() to set up a dedicated state for each
  IOMMU PMU. The lock can be avoided.

The latter method is implemented in this patch. Since each IOMMU PMU has
a dedicated state, add cpuhp_node and cpu in struct iommu_pmu to track
the state. The state can be dynamically allocated now. Remove the
CPUHP_AP_PERF_X86_IOMMU_PERF_ONLINE.

Fixes: 46284c6ceb5e ("iommu/vt-d: Support cpumask for IOMMU perfmon")
Reported-by: Ammy Yi <ammy.yi@intel.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Link: https://lore.kernel.org/r/20230328182028.1366416-1-kan.liang@linux.intel.com
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Link: https://lore.kernel.org/r/20230329134721.469447-4-baolu.lu@linux.intel.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
drivers/iommu/intel/iommu.h
drivers/iommu/intel/perfmon.c
include/linux/cpuhotplug.h

index d6df3b8..694ab9b 100644 (file)
@@ -641,6 +641,8 @@ struct iommu_pmu {
        DECLARE_BITMAP(used_mask, IOMMU_PMU_IDX_MAX);
        struct perf_event       *event_list[IOMMU_PMU_IDX_MAX];
        unsigned char           irq_name[16];
+       struct hlist_node       cpuhp_node;
+       int                     cpu;
 };
 
 #define IOMMU_IRQ_ID_OFFSET_PRQ                (DMAR_UNITS_SUPPORTED)
index e17d974..cf43e79 100644 (file)
@@ -773,19 +773,34 @@ static void iommu_pmu_unset_interrupt(struct intel_iommu *iommu)
        iommu->perf_irq = 0;
 }
 
-static int iommu_pmu_cpu_online(unsigned int cpu)
+static int iommu_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
 {
+       struct iommu_pmu *iommu_pmu = hlist_entry_safe(node, typeof(*iommu_pmu), cpuhp_node);
+
        if (cpumask_empty(&iommu_pmu_cpu_mask))
                cpumask_set_cpu(cpu, &iommu_pmu_cpu_mask);
 
+       if (cpumask_test_cpu(cpu, &iommu_pmu_cpu_mask))
+               iommu_pmu->cpu = cpu;
+
        return 0;
 }
 
-static int iommu_pmu_cpu_offline(unsigned int cpu)
+static int iommu_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
 {
-       struct dmar_drhd_unit *drhd;
-       struct intel_iommu *iommu;
-       int target;
+       struct iommu_pmu *iommu_pmu = hlist_entry_safe(node, typeof(*iommu_pmu), cpuhp_node);
+       int target = cpumask_first(&iommu_pmu_cpu_mask);
+
+       /*
+        * The iommu_pmu_cpu_mask has been updated when offline the CPU
+        * for the first iommu_pmu. Migrate the other iommu_pmu to the
+        * new target.
+        */
+       if (target < nr_cpu_ids && target != iommu_pmu->cpu) {
+               perf_pmu_migrate_context(&iommu_pmu->pmu, cpu, target);
+               iommu_pmu->cpu = target;
+               return 0;
+       }
 
        if (!cpumask_test_and_clear_cpu(cpu, &iommu_pmu_cpu_mask))
                return 0;
@@ -795,45 +810,50 @@ static int iommu_pmu_cpu_offline(unsigned int cpu)
        if (target < nr_cpu_ids)
                cpumask_set_cpu(target, &iommu_pmu_cpu_mask);
        else
-               target = -1;
+               return 0;
 
-       rcu_read_lock();
-
-       for_each_iommu(iommu, drhd) {
-               if (!iommu->pmu)
-                       continue;
-               perf_pmu_migrate_context(&iommu->pmu->pmu, cpu, target);
-       }
-       rcu_read_unlock();
+       perf_pmu_migrate_context(&iommu_pmu->pmu, cpu, target);
+       iommu_pmu->cpu = target;
 
        return 0;
 }
 
 static int nr_iommu_pmu;
+static enum cpuhp_state iommu_cpuhp_slot;
 
 static int iommu_pmu_cpuhp_setup(struct iommu_pmu *iommu_pmu)
 {
        int ret;
 
-       if (nr_iommu_pmu++)
-               return 0;
+       if (!nr_iommu_pmu) {
+               ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
+                                             "driver/iommu/intel/perfmon:online",
+                                             iommu_pmu_cpu_online,
+                                             iommu_pmu_cpu_offline);
+               if (ret < 0)
+                       return ret;
+               iommu_cpuhp_slot = ret;
+       }
 
-       ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_IOMMU_PERF_ONLINE,
-                               "driver/iommu/intel/perfmon:online",
-                               iommu_pmu_cpu_online,
-                               iommu_pmu_cpu_offline);
-       if (ret)
-               nr_iommu_pmu = 0;
+       ret = cpuhp_state_add_instance(iommu_cpuhp_slot, &iommu_pmu->cpuhp_node);
+       if (ret) {
+               if (!nr_iommu_pmu)
+                       cpuhp_remove_multi_state(iommu_cpuhp_slot);
+               return ret;
+       }
+       nr_iommu_pmu++;
 
-       return ret;
+       return 0;
 }
 
 static void iommu_pmu_cpuhp_free(struct iommu_pmu *iommu_pmu)
 {
+       cpuhp_state_remove_instance(iommu_cpuhp_slot, &iommu_pmu->cpuhp_node);
+
        if (--nr_iommu_pmu)
                return;
 
-       cpuhp_remove_state(CPUHP_AP_PERF_X86_IOMMU_PERF_ONLINE);
+       cpuhp_remove_multi_state(iommu_cpuhp_slot);
 }
 
 void iommu_pmu_register(struct intel_iommu *iommu)
index c6fab00..5b2f814 100644 (file)
@@ -218,7 +218,6 @@ enum cpuhp_state {
        CPUHP_AP_PERF_X86_CQM_ONLINE,
        CPUHP_AP_PERF_X86_CSTATE_ONLINE,
        CPUHP_AP_PERF_X86_IDXD_ONLINE,
-       CPUHP_AP_PERF_X86_IOMMU_PERF_ONLINE,
        CPUHP_AP_PERF_S390_CF_ONLINE,
        CPUHP_AP_PERF_S390_SF_ONLINE,
        CPUHP_AP_PERF_ARM_CCI_ONLINE,