iommu/vt-d: Support cpumask for IOMMU perfmon
authorKan Liang <kan.liang@linux.intel.com>
Tue, 31 Jan 2023 07:37:38 +0000 (15:37 +0800)
committerJoerg Roedel <jroedel@suse.de>
Fri, 3 Feb 2023 10:06:08 +0000 (11:06 +0100)
The perf subsystem assumes that all counters are by default per-CPU. So
the user space tool reads a counter from each CPU. However, the IOMMU
counters are system-wide and can be read from any CPU. Here we use a CPU
mask to restrict counting to one CPU to handle the issue. (with CPU
hotplug notifier to choose a different CPU if the chosen one is taken
off-line).

The CPU is exposed to /sys/bus/event_source/devices/dmar*/cpumask for
the user space perf tool.

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Link: https://lore.kernel.org/r/20230128200428.1459118-6-kan.liang@linux.intel.com
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Documentation/ABI/testing/sysfs-bus-event_source-devices-iommu
drivers/iommu/intel/perfmon.c
include/linux/cpuhotplug.h

index 988210a..d7af491 100644 (file)
@@ -27,3 +27,11 @@ Description: Read-only.  Attribute group to describe the magic bits
                    filter_pasid        = "config2:0-21"  - PASID filter
                    filter_ats          = "config2:24-28" - Address Type filter
                    filter_page_table   = "config2:32-36" - Page Table Level filter
+
+What:          /sys/bus/event_source/devices/dmar*/cpumask
+Date:          Jan 2023
+KernelVersion: 6.3
+Contact:       Kan Liang <kan.liang@linux.intel.com>
+Description:   Read-only. This file always returns the CPU to which the
+               IOMMU pmu is bound for access to all IOMMU pmu performance
+               monitoring events.
index df9b787..322d362 100644 (file)
@@ -34,9 +34,28 @@ static struct attribute_group iommu_pmu_events_attr_group = {
        .attrs = attrs_empty,
 };
 
+static cpumask_t iommu_pmu_cpu_mask;
+
+static ssize_t
+cpumask_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+       return cpumap_print_to_pagebuf(true, buf, &iommu_pmu_cpu_mask);
+}
+static DEVICE_ATTR_RO(cpumask);
+
+static struct attribute *iommu_pmu_cpumask_attrs[] = {
+       &dev_attr_cpumask.attr,
+       NULL
+};
+
+static struct attribute_group iommu_pmu_cpumask_attr_group = {
+       .attrs = iommu_pmu_cpumask_attrs,
+};
+
 static const struct attribute_group *iommu_pmu_attr_groups[] = {
        &iommu_pmu_format_attr_group,
        &iommu_pmu_events_attr_group,
+       &iommu_pmu_cpumask_attr_group,
        NULL
 };
 
@@ -679,20 +698,98 @@ void free_iommu_pmu(struct intel_iommu *iommu)
        iommu->pmu = NULL;
 }
 
+static int iommu_pmu_cpu_online(unsigned int cpu)
+{
+       if (cpumask_empty(&iommu_pmu_cpu_mask))
+               cpumask_set_cpu(cpu, &iommu_pmu_cpu_mask);
+
+       return 0;
+}
+
+static int iommu_pmu_cpu_offline(unsigned int cpu)
+{
+       struct dmar_drhd_unit *drhd;
+       struct intel_iommu *iommu;
+       int target;
+
+       if (!cpumask_test_and_clear_cpu(cpu, &iommu_pmu_cpu_mask))
+               return 0;
+
+       target = cpumask_any_but(cpu_online_mask, cpu);
+
+       if (target < nr_cpu_ids)
+               cpumask_set_cpu(target, &iommu_pmu_cpu_mask);
+       else
+               target = -1;
+
+       rcu_read_lock();
+
+       for_each_iommu(iommu, drhd) {
+               if (!iommu->pmu)
+                       continue;
+               perf_pmu_migrate_context(&iommu->pmu->pmu, cpu, target);
+       }
+       rcu_read_unlock();
+
+       return 0;
+}
+
+static int nr_iommu_pmu;
+
+static int iommu_pmu_cpuhp_setup(struct iommu_pmu *iommu_pmu)
+{
+       int ret;
+
+       if (nr_iommu_pmu++)
+               return 0;
+
+       ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_IOMMU_PERF_ONLINE,
+                               "driver/iommu/intel/perfmon:online",
+                               iommu_pmu_cpu_online,
+                               iommu_pmu_cpu_offline);
+       if (ret)
+               nr_iommu_pmu = 0;
+
+       return ret;
+}
+
+static void iommu_pmu_cpuhp_free(struct iommu_pmu *iommu_pmu)
+{
+       if (--nr_iommu_pmu)
+               return;
+
+       cpuhp_remove_state(CPUHP_AP_PERF_X86_IOMMU_PERF_ONLINE);
+}
+
 void iommu_pmu_register(struct intel_iommu *iommu)
 {
-       if (!iommu->pmu)
+       struct iommu_pmu *iommu_pmu = iommu->pmu;
+
+       if (!iommu_pmu)
                return;
 
-       if (__iommu_pmu_register(iommu)) {
-               pr_err("Failed to register PMU for iommu (seq_id = %d)\n",
-                      iommu->seq_id);
-               free_iommu_pmu(iommu);
-       }
+       if (__iommu_pmu_register(iommu))
+               goto err;
+
+       if (iommu_pmu_cpuhp_setup(iommu_pmu))
+               goto unregister;
+
+       return;
+
+unregister:
+       perf_pmu_unregister(&iommu_pmu->pmu);
+err:
+       pr_err("Failed to register PMU for iommu (seq_id = %d)\n", iommu->seq_id);
+       free_iommu_pmu(iommu);
 }
 
 void iommu_pmu_unregister(struct intel_iommu *iommu)
 {
-       if (iommu->pmu)
-               perf_pmu_unregister(&iommu->pmu->pmu);
+       struct iommu_pmu *iommu_pmu = iommu->pmu;
+
+       if (!iommu_pmu)
+               return;
+
+       iommu_pmu_cpuhp_free(iommu_pmu);
+       perf_pmu_unregister(&iommu_pmu->pmu);
 }
index 6c6859b..f2ea348 100644 (file)
@@ -221,6 +221,7 @@ enum cpuhp_state {
        CPUHP_AP_PERF_X86_CQM_ONLINE,
        CPUHP_AP_PERF_X86_CSTATE_ONLINE,
        CPUHP_AP_PERF_X86_IDXD_ONLINE,
+       CPUHP_AP_PERF_X86_IOMMU_PERF_ONLINE,
        CPUHP_AP_PERF_S390_CF_ONLINE,
        CPUHP_AP_PERF_S390_SF_ONLINE,
        CPUHP_AP_PERF_ARM_CCI_ONLINE,