perf/x86/intel/uncore: Fix multi-segment problem of perf_event_intel_uncore
authorTaku Izumi <izumi.taku@jp.fujitsu.com>
Thu, 24 Sep 2015 12:10:21 +0000 (21:10 +0900)
committerIngo Molnar <mingo@kernel.org>
Tue, 6 Oct 2015 15:31:51 +0000 (17:31 +0200)
In multi-segment system, uncore devices may belong to buses whose segment
number is other than 0:

  ....
  0000:ff:10.5 System peripheral: Intel Corporation Xeon E5 v3/Core i7 Scratchpad & Semaphore Registers (rev 03)
  ...
  0001:7f:10.5 System peripheral: Intel Corporation Xeon E5 v3/Core i7 Scratchpad & Semaphore Registers (rev 03)
  ...
  0001:bf:10.5 System peripheral: Intel Corporation Xeon E5 v3/Core i7 Scratchpad & Semaphore Registers (rev 03)
  ...
  0001:ff:10.5 System peripheral: Intel Corporation Xeon E5 v3/Core i7 Scratchpad & Semaphore Registers (rev 03
  ...

In that case, relation of bus number and physical id may be broken
because "uncore_pcibus_to_physid" doesn't take account of PCI segment.
For example, bus 0000:ff and 0001:ff uses the same entry of
"uncore_pcibus_to_physid" array.

This patch fixes this problem by introducing the segment-aware pci2phy_map instead.

Signed-off-by: Taku Izumi <izumi.taku@jp.fujitsu.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@kernel.org
Cc: hpa@zytor.com
Link: http://lkml.kernel.org/r/1443096621-4119-1-git-send-email-izumi.taku@jp.fujitsu.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
arch/x86/kernel/cpu/perf_event_intel_uncore.c
arch/x86/kernel/cpu/perf_event_intel_uncore.h
arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c

index 560e525..61215a6 100644 (file)
@@ -7,7 +7,8 @@ struct intel_uncore_type **uncore_pci_uncores = empty_uncore;
 static bool pcidrv_registered;
 struct pci_driver *uncore_pci_driver;
 /* pci bus to socket mapping */
-int uncore_pcibus_to_physid[256] = { [0 ... 255] = -1, };
+DEFINE_RAW_SPINLOCK(pci2phy_map_lock);
+struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head);
 struct pci_dev *uncore_extra_pci_dev[UNCORE_SOCKET_MAX][UNCORE_EXTRA_PCI_DEV_MAX];
 
 static DEFINE_RAW_SPINLOCK(uncore_box_lock);
@@ -20,6 +21,59 @@ static struct event_constraint uncore_constraint_fixed =
 struct event_constraint uncore_constraint_empty =
        EVENT_CONSTRAINT(0, 0, 0);
 
+int uncore_pcibus_to_physid(struct pci_bus *bus)
+{
+       struct pci2phy_map *map;
+       int phys_id = -1;
+
+       raw_spin_lock(&pci2phy_map_lock);
+       list_for_each_entry(map, &pci2phy_map_head, list) {
+               if (map->segment == pci_domain_nr(bus)) {
+                       phys_id = map->pbus_to_physid[bus->number];
+                       break;
+               }
+       }
+       raw_spin_unlock(&pci2phy_map_lock);
+
+       return phys_id;
+}
+
+struct pci2phy_map *__find_pci2phy_map(int segment)
+{
+       struct pci2phy_map *map, *alloc = NULL;
+       int i;
+
+       lockdep_assert_held(&pci2phy_map_lock);
+
+lookup:
+       list_for_each_entry(map, &pci2phy_map_head, list) {
+               if (map->segment == segment)
+                       goto end;
+       }
+
+       if (!alloc) {
+               raw_spin_unlock(&pci2phy_map_lock);
+               alloc = kmalloc(sizeof(struct pci2phy_map), GFP_KERNEL);
+               raw_spin_lock(&pci2phy_map_lock);
+
+               if (!alloc)
+                       return NULL;
+
+               goto lookup;
+       }
+
+       map = alloc;
+       alloc = NULL;
+       map->segment = segment;
+       for (i = 0; i < 256; i++)
+               map->pbus_to_physid[i] = -1;
+       list_add_tail(&map->list, &pci2phy_map_head);
+
+end:
+       kfree(alloc);
+       return map;
+}
+
 ssize_t uncore_event_show(struct kobject *kobj,
                          struct kobj_attribute *attr, char *buf)
 {
@@ -809,7 +863,7 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
        int phys_id;
        bool first_box = false;
 
-       phys_id = uncore_pcibus_to_physid[pdev->bus->number];
+       phys_id = uncore_pcibus_to_physid(pdev->bus);
        if (phys_id < 0)
                return -ENODEV;
 
@@ -856,9 +910,10 @@ static void uncore_pci_remove(struct pci_dev *pdev)
 {
        struct intel_uncore_box *box = pci_get_drvdata(pdev);
        struct intel_uncore_pmu *pmu;
-       int i, cpu, phys_id = uncore_pcibus_to_physid[pdev->bus->number];
+       int i, cpu, phys_id;
        bool last_box = false;
 
+       phys_id = uncore_pcibus_to_physid(pdev->bus);
        box = pci_get_drvdata(pdev);
        if (!box) {
                for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) {
index 72c54c2..2f0a4a9 100644 (file)
@@ -117,6 +117,15 @@ struct uncore_event_desc {
        const char *config;
 };
 
+struct pci2phy_map {
+       struct list_head list;
+       int segment;
+       int pbus_to_physid[256];
+};
+
+int uncore_pcibus_to_physid(struct pci_bus *bus);
+struct pci2phy_map *__find_pci2phy_map(int segment);
+
 ssize_t uncore_event_show(struct kobject *kobj,
                          struct kobj_attribute *attr, char *buf);
 
@@ -317,7 +326,8 @@ u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx);
 extern struct intel_uncore_type **uncore_msr_uncores;
 extern struct intel_uncore_type **uncore_pci_uncores;
 extern struct pci_driver *uncore_pci_driver;
-extern int uncore_pcibus_to_physid[256];
+extern raw_spinlock_t pci2phy_map_lock;
+extern struct list_head pci2phy_map_head;
 extern struct pci_dev *uncore_extra_pci_dev[UNCORE_SOCKET_MAX][UNCORE_EXTRA_PCI_DEV_MAX];
 extern struct event_constraint uncore_constraint_empty;
 
index f78574b..8452561 100644 (file)
@@ -420,15 +420,25 @@ static void snb_uncore_imc_event_del(struct perf_event *event, int flags)
 static int snb_pci2phy_map_init(int devid)
 {
        struct pci_dev *dev = NULL;
-       int bus;
+       struct pci2phy_map *map;
+       int bus, segment;
 
        dev = pci_get_device(PCI_VENDOR_ID_INTEL, devid, dev);
        if (!dev)
                return -ENOTTY;
 
        bus = dev->bus->number;
-
-       uncore_pcibus_to_physid[bus] = 0;
+       segment = pci_domain_nr(dev->bus);
+
+       raw_spin_lock(&pci2phy_map_lock);
+       map = __find_pci2phy_map(segment);
+       if (!map) {
+               raw_spin_unlock(&pci2phy_map_lock);
+               pci_dev_put(dev);
+               return -ENOMEM;
+       }
+       map->pbus_to_physid[bus] = 0;
+       raw_spin_unlock(&pci2phy_map_lock);
 
        pci_dev_put(dev);
 
index 8cbb3f3..f0f4fcb 100644 (file)
@@ -1087,7 +1087,8 @@ static struct pci_driver snbep_uncore_pci_driver = {
 static int snbep_pci2phy_map_init(int devid)
 {
        struct pci_dev *ubox_dev = NULL;
-       int i, bus, nodeid;
+       int i, bus, nodeid, segment;
+       struct pci2phy_map *map;
        int err = 0;
        u32 config = 0;
 
@@ -1106,16 +1107,27 @@ static int snbep_pci2phy_map_init(int devid)
                err = pci_read_config_dword(ubox_dev, 0x54, &config);
                if (err)
                        break;
+
+               segment = pci_domain_nr(ubox_dev->bus);
+               raw_spin_lock(&pci2phy_map_lock);
+               map = __find_pci2phy_map(segment);
+               if (!map) {
+                       raw_spin_unlock(&pci2phy_map_lock);
+                       err = -ENOMEM;
+                       break;
+               }
+
                /*
                 * every three bits in the Node ID mapping register maps
                 * to a particular node.
                 */
                for (i = 0; i < 8; i++) {
                        if (nodeid == ((config >> (3 * i)) & 0x7)) {
-                               uncore_pcibus_to_physid[bus] = i;
+                               map->pbus_to_physid[bus] = i;
                                break;
                        }
                }
+               raw_spin_unlock(&pci2phy_map_lock);
        }
 
        if (!err) {
@@ -1123,13 +1135,17 @@ static int snbep_pci2phy_map_init(int devid)
                 * For PCI bus with no UBOX device, find the next bus
                 * that has UBOX device and use its mapping.
                 */
-               i = -1;
-               for (bus = 255; bus >= 0; bus--) {
-                       if (uncore_pcibus_to_physid[bus] >= 0)
-                               i = uncore_pcibus_to_physid[bus];
-                       else
-                               uncore_pcibus_to_physid[bus] = i;
+               raw_spin_lock(&pci2phy_map_lock);
+               list_for_each_entry(map, &pci2phy_map_head, list) {
+                       i = -1;
+                       for (bus = 255; bus >= 0; bus--) {
+                               if (map->pbus_to_physid[bus] >= 0)
+                                       i = map->pbus_to_physid[bus];
+                               else
+                                       map->pbus_to_physid[bus] = i;
+                       }
                }
+               raw_spin_unlock(&pci2phy_map_lock);
        }
 
        pci_dev_put(ubox_dev);